{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ExtendedDefaultRules #-}
{-# LANGUAGE BlockArguments #-}
module Exercise04 where
import Data.List ((\\))
import Data.Char (ord, chr, isSpace, isPrint)
import qualified Data.Text as Text
-- Trivial implementation
--data State = Normal -- == ExpectOpeningTagOrEOL
-- | OpeningTagIdentifier
-- | ClosingTagIdentifier
-- | TagEnd
--
--showState Normal = "N "
--showState OpeningTagIdentifier = "< "
--showState ClosingTagIdentifier = ""
--showState TagEnd = "> "
--
--trivialXmlLightImplementation :: String -> Bool
----trivialXmlLightImplementation s | trace ("process: " ++ show s) False = undefined
--trivialXmlLightImplementation s = txli Normal s []
--
--txli :: State -> String -> [String] -> Bool
----txli state cs stack | trace (showState state ++ " | " ++ show (map reverse stack) ++ " | " ++ cs) False = undefined
--txli Normal "" [] = True
--txli _ "" _ = False
--txli Normal ('<':'/':cs) (t:ts) = txli ClosingTagIdentifier cs (reverse t : ts)
--txli Normal ('<':cs) ts = txli OpeningTagIdentifier cs ("":ts)
--txli Normal ('>':cs) ts = False
--txli Normal (c:cs) ts = txli Normal cs ts
--txli OpeningTagIdentifier ss@(c:cs) stack@(t:ts)
-- | c `elem` "> \t\n\v\f\r" = txli TagEnd ss stack
-- | otherwise = txli OpeningTagIdentifier cs ((c:t):ts)
--txli ClosingTagIdentifier ss@(c:cs) stack@(t:ts)
-- | c `elem` "> \t\n\v\f\r" = txli TagEnd ss ts
-- | null t = False
-- | otherwise = let (i:u) = t in
-- if i == c then txli ClosingTagIdentifier cs (u:ts)
-- else False
--txli TagEnd (c:cs) ts
-- | c `elem` " \t\n\v\f\r" = txli TagEnd cs ts
-- | c == '>' = txli Normal cs ts
-- | otherwise = False
----txli ClosingTagIdentifier _ [] = False
----txli ClosingTagIdentifier _ ("":ts) = False sometimes when there comes another char
--txli _ _ _ = False
--xmlLight :: String -> Bool
--xmlLight = trivialXmlLightImplementation
--checkT (x,y) | trace (show y ++ if xmlLight y == x then " ." else " FAIL!") False = undefined
checkT (x,y) = xmlLight y == x
t = all checkT testData
{-WETT-}
{- MCCOMMENT
-- slower, i.e. to slow for artemis tests, but only 77 tokens
--genlist :: [(Text.Text,Text.Text)]
--genlist =
-- [(Text.pack $ chr i : "<", Text.pack $ "<") | i <- [0..255] \\ [ord '<', ord '>']]++
-- [(Text.pack $ chr i : ">", Text.pack $ ">") | i <- [k | k <- [0..255], isSpace (chr k)]]
--showlist = show genlist
list :: [(Text.Text,Text.Text)]
eliminateValidMatchingPairs :: [Text.Text] -> Text.Text -> [Text.Text]
xrep :: Text.Text -> (Text.Text, Text.Text) -> Text.Text
replaceall :: Text.Text -> Text.Text
xmlLight :: String -> Bool
list = read "[(\"\\NUL<\",\"<\"),(\"\\SOH<\",\"<\"),(\"\\STX<\",\"<\"),(\"\\ETX<\",\"<\"),(\"\\EOT<\",\"<\"),(\"\\ENQ<\",\"<\"),(\"\\ACK<\",\"<\"),(\"\\a<\",\"<\"),(\"\\b<\",\"<\"),(\"\\t<\",\"<\"),(\"\\n<\",\"<\"),(\"\\v<\",\"<\"),(\"\\f<\",\"<\"),(\"\\r<\",\"<\"),(\"\\SO<\",\"<\"),(\"\\SI<\",\"<\"),(\"\\DLE<\",\"<\"),(\"\\DC1<\",\"<\"),(\"\\DC2<\",\"<\"),(\"\\DC3<\",\"<\"),(\"\\DC4<\",\"<\"),(\"\\NAK<\",\"<\"),(\"\\SYN<\",\"<\"),(\"\\ETB<\",\"<\"),(\"\\CAN<\",\"<\"),(\"\\EM<\",\"<\"),(\"\\SUB<\",\"<\"),(\"\\ESC<\",\"<\"),(\"\\FS<\",\"<\"),(\"\\GS<\",\"<\"),(\"\\RS<\",\"<\"),(\"\\US<\",\"<\"),(\" <\",\"<\"),(\"!<\",\"<\"),(\"\\\"<\",\"<\"),(\"#<\",\"<\"),(\"$<\",\"<\"),(\"%<\",\"<\"),(\"&<\",\"<\"),(\"'<\",\"<\"),(\"(<\",\"<\"),(\")<\",\"<\"),(\"*<\",\"<\"),(\"+<\",\"<\"),(\",<\",\"<\"),(\"-<\",\"<\"),(\".<\",\"<\"),(\"/<\",\"<\"),(\"0<\",\"<\"),(\"1<\",\"<\"),(\"2<\",\"<\"),(\"3<\",\"<\"),(\"4<\",\"<\"),(\"5<\",\"<\"),(\"6<\",\"<\"),(\"7<\",\"<\"),(\"8<\",\"<\"),(\"9<\",\"<\"),(\":<\",\"<\"),(\";<\",\"<\"),(\"=<\",\"<\"),(\"?<\",\"<\"),(\"@<\",\"<\"),(\"A<\",\"<\"),(\"B<\",\"<\"),(\"C<\",\"<\"),(\"D<\",\"<\"),(\"E<\",\"<\"),(\"F<\",\"<\"),(\"G<\",\"<\"),(\"H<\",\"<\"),(\"I<\",\"<\"),(\"J<\",\"<\"),(\"K<\",\"<\"),(\"L<\",\"<\"),(\"M<\",\"<\"),(\"N<\",\"<\"),(\"O<\",\"<\"),(\"P<\",\"<\"),(\"Q<\",\"<\"),(\"R<\",\"<\"),(\"S<\",\"<\"),(\"T<\",\"<\"),(\"U<\",\"<\"),(\"V<\",\"<\"),(\"W<\",\"<\"),(\"X<\",\"<\"),(\"Y<\",\"<\"),(\"Z<\",\"<\"),(\"[<\",\"<\"),(\"\\\\<\",\"<\"),(\"]<\",\"<\"),(\"^<\",\"<\"),(\"_<\",\"<\"),(\"`<\",\"<\"),(\"a<\",\"<\"),(\"b<\",\"<\"),(\"c<\",\"<\"),(\"d<\",\"<\"),(\"e<\",\"<\"),(\"f<\",\"<\"),(\"g<\",\"<\"),(\"h<\",\"<\"),(\"i<\",\"<\"),(\"j<\",\"<\"),(\"k<\",\"<\"),(\"l<\",\"<\"),(\"m<\",\"<\"),(\"n<\",\"<\"),(\"o<\",\"<\"),(\"p<\",\"<\"),(\"q<\",\"<\"),(\"r<\",\"<\"),(\"s<\",\"<\"),(\"t<\",\"<\"),(\"u<\",\"<\"),(\"v<\",\"<\"),(\"w<\",\"<\"),(\"x<\",\"<\"),(\"y<\",\"<\"),(\"z<\",\"<\"),(\"{<\",\"<\"),(\"|<\",\"<\"),(\"}<\",\"<\"),(\"~<\",\"<\"),(\"\\DEL<\",\"<\"),(\"\\128<\",\"<\"),(\"\\129<\",\"<\"),(\"\\130<\",\"<\"),(\"\\131<\",\"<\"),(\"\\132<\",\"<\"),(\"\\133<\",\"<\"),(\"\\134<\",\"<\"),(\"\\135<\",\"<\"),(\"\\136<\",\"<\"),(\"\\137<\",\"<\"),(\"\\138<\",\"<\"),(\"\\139<\",\"<\"),(\"\\140<\",\"<\"),(\"\\141<\",\"<\"),(\"\\142<\",\"<\"),(\"\\143<\",\"<\"),(\"\\144<\",\"<\"),(\"\\145<\",\"<\"),(\"\\146<\",\"<\"),(\"\\147<\",\"<\"),(\"\\148<\",\"<\"),(\"\\149<\",\"<\"),(\"\\150<\",\"<\"),(\"\\151<\",\"<\"),(\"\\152<\",\"<\"),(\"\\153<\",\"<\"),(\"\\154<\",\"<\"),(\"\\155<\",\"<\"),(\"\\156<\",\"<\"),(\"\\157<\",\"<\"),(\"\\158<\",\"<\"),(\"\\159<\",\"<\"),(\"\\160<\",\"<\"),(\"\\161<\",\"<\"),(\"\\162<\",\"<\"),(\"\\163<\",\"<\"),(\"\\164<\",\"<\"),(\"\\165<\",\"<\"),(\"\\166<\",\"<\"),(\"\\167<\",\"<\"),(\"\\168<\",\"<\"),(\"\\169<\",\"<\"),(\"\\170<\",\"<\"),(\"\\171<\",\"<\"),(\"\\172<\",\"<\"),(\"\\173<\",\"<\"),(\"\\174<\",\"<\"),(\"\\175<\",\"<\"),(\"\\176<\",\"<\"),(\"\\177<\",\"<\"),(\"\\178<\",\"<\"),(\"\\179<\",\"<\"),(\"\\180<\",\"<\"),(\"\\181<\",\"<\"),(\"\\182<\",\"<\"),(\"\\183<\",\"<\"),(\"\\184<\",\"<\"),(\"\\185<\",\"<\"),(\"\\186<\",\"<\"),(\"\\187<\",\"<\"),(\"\\188<\",\"<\"),(\"\\189<\",\"<\"),(\"\\190<\",\"<\"),(\"\\191<\",\"<\"),(\"\\192<\",\"<\"),(\"\\193<\",\"<\"),(\"\\194<\",\"<\"),(\"\\195<\",\"<\"),(\"\\196<\",\"<\"),(\"\\197<\",\"<\"),(\"\\198<\",\"<\"),(\"\\199<\",\"<\"),(\"\\200<\",\"<\"),(\"\\201<\",\"<\"),(\"\\202<\",\"<\"),(\"\\203<\",\"<\"),(\"\\204<\",\"<\"),(\"\\205<\",\"<\"),(\"\\206<\",\"<\"),(\"\\207<\",\"<\"),(\"\\208<\",\"<\"),(\"\\209<\",\"<\"),(\"\\210<\",\"<\"),(\"\\211<\",\"<\"),(\"\\212<\",\"<\"),(\"\\213<\",\"<\"),(\"\\214<\",\"<\"),(\"\\215<\",\"<\"),(\"\\216<\",\"<\"),(\"\\217<\",\"<\"),(\"\\218<\",\"<\"),(\"\\219<\",\"<\"),(\"\\220<\",\"<\"),(\"\\221<\",\"<\"),(\"\\222<\",\"<\"),(\"\\223<\",\"<\"),(\"\\224<\",\"<\"),(\"\\225<\",\"<\"),(\"\\226<\",\"<\"),(\"\\227<\",\"<\"),(\"\\228<\",\"<\"),(\"\\229<\",\"<\"),(\"\\230<\",\"<\"),(\"\\231<\",\"<\"),(\"\\232<\",\"<\"),(\"\\233<\",\"<\"),(\"\\234<\",\"<\"),(\"\\235<\",\"<\"),(\"\\236<\",\"<\"),(\"\\237<\",\"<\"),(\"\\238<\",\"<\"),(\"\\239<\",\"<\"),(\"\\240<\",\"<\"),(\"\\241<\",\"<\"),(\"\\242<\",\"<\"),(\"\\243<\",\"<\"),(\"\\244<\",\"<\"),(\"\\245<\",\"<\"),(\"\\246<\",\"<\"),(\"\\247<\",\"<\"),(\"\\248<\",\"<\"),(\"\\249<\",\"<\"),(\"\\250<\",\"<\"),(\"\\251<\",\"<\"),(\"\\252<\",\"<\"),(\"\\253<\",\"<\"),(\"\\254<\",\"<\"),(\"\\255<\",\"<\"),(\"\\t>\",\">\"),(\"\\n>\",\">\"),(\"\\v>\",\">\"),(\"\\f>\",\">\"),(\"\\r>\",\">\"),(\" >\",\">\"),(\"\\160>\",\">\")]"
eliminateValidMatchingPairs xss x = if null xss || "/" <> head xss /= x then x:xss else tail xss
xrep a (b,c) = Text.replace b c a
replaceall s = foldl xrep s list
-- For explanation, refer to 85 token solution from below!
xmlLight x =
Text.any isSpace s
< null (
foldl eliminateValidMatchingPairs []
$ Text.splitOn "<" s)
where
s = iterate replaceall y !! Text.length y
y = Text.pack x <> ""
-}
removeWithContext :: Char -> (Char -> Bool) -> String -> String
eliminateValidMatchingPairs :: [Text.Text] -> Text.Text -> [Text.Text]
xmlLight :: String -> Bool
--removeWithContext matchRight f = foldr g "\t"
-- where g x rss = if head rss == matchRight && f x then rss else x:rss
removeWithContext matchRight f = flip foldr "\t" \x rss -> if head rss == matchRight && f x then rss else x:rss
-- Eliminate tag stack while folding
eliminateValidMatchingPairs xss x = if null xss || "/" <> head xss /= x then x:xss else tail xss
xmlLight x =
Text.any isSpace s -- 6a. If text still contains whitespace, it is invalid.
< null ( -- 6b.3. If the result is empty, the xml was valid.
-- HINT: Some Magic: not a && b == a < b
foldl eliminateValidMatchingPairs [] -- 6b.2. Eliminate matching pairs by folding
$ Text.splitOn "<" s) -- 6b.1. Split on '<'
where
s = Text.stripEnd -- 5. Remove whitespace from the end of the string, which
-- is introduced by the dirty remove* methods from below
-- (they both need a non empty string as a primer)
$ Text.pack -- 4. String -> Text.Text
$ removeWithContext '<' -- 3. Remove text between tags >...<
(`notElem` "><") -- (== regex replace ">[^<>]<" with "><")
$ removeWithContext '>' isSpace -- 2. Remove valid whitespace inside of tags
-- (== regex replace "[:space:]*>" with ">")
$ x <> "" -- 1. Put string in empty tag like "<>{input}>". Then we will never
-- have to deal with any trailing characters. The empty tag may be
-- considered valid (see Zulip), but will according to the exercise
-- never be given to us as an input. Therefore, "<>" is unique and
-- no invalid can become valid afterwards (this would be the case with
-- e.g. "{str}" when we get the invalid input "").
-- HINT: We can fully skip the opening "<>" due to a coincidence:
-- A) We would skip the first '<' character here, because we split on '<'
-- and otherwise would have to remove this first (empty) element.
-- B) We would also skip the closing bracket '>', which is possible because
-- we parse it as part of the identifier, and therefore ok if we skip
-- it on both opening and closing tag.
-- -> Togerther this removes the need for a prefix concatenation, which
-- would cost us 2 additional tokens.
{-TTEW-}