{-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ExtendedDefaultRules #-} {-# LANGUAGE BlockArguments #-} module Exercise04 where import Data.List ((\\)) import Data.Char (ord, chr, isSpace, isPrint) import qualified Data.Text as Text -- Trivial implementation --data State = Normal -- == ExpectOpeningTagOrEOL -- | OpeningTagIdentifier -- | ClosingTagIdentifier -- | TagEnd -- --showState Normal = "N " --showState OpeningTagIdentifier = "< " --showState ClosingTagIdentifier = " " -- --trivialXmlLightImplementation :: String -> Bool ----trivialXmlLightImplementation s | trace ("process: " ++ show s) False = undefined --trivialXmlLightImplementation s = txli Normal s [] -- --txli :: State -> String -> [String] -> Bool ----txli state cs stack | trace (showState state ++ " | " ++ show (map reverse stack) ++ " | " ++ cs) False = undefined --txli Normal "" [] = True --txli _ "" _ = False --txli Normal ('<':'/':cs) (t:ts) = txli ClosingTagIdentifier cs (reverse t : ts) --txli Normal ('<':cs) ts = txli OpeningTagIdentifier cs ("":ts) --txli Normal ('>':cs) ts = False --txli Normal (c:cs) ts = txli Normal cs ts --txli OpeningTagIdentifier ss@(c:cs) stack@(t:ts) -- | c `elem` "> \t\n\v\f\r" = txli TagEnd ss stack -- | otherwise = txli OpeningTagIdentifier cs ((c:t):ts) --txli ClosingTagIdentifier ss@(c:cs) stack@(t:ts) -- | c `elem` "> \t\n\v\f\r" = txli TagEnd ss ts -- | null t = False -- | otherwise = let (i:u) = t in -- if i == c then txli ClosingTagIdentifier cs (u:ts) -- else False --txli TagEnd (c:cs) ts -- | c `elem` " \t\n\v\f\r" = txli TagEnd cs ts -- | c == '>' = txli Normal cs ts -- | otherwise = False ----txli ClosingTagIdentifier _ [] = False ----txli ClosingTagIdentifier _ ("":ts) = False sometimes when there comes another char --txli _ _ _ = False --xmlLight :: String -> Bool --xmlLight = trivialXmlLightImplementation --checkT (x,y) | trace (show y ++ if xmlLight y == x then " ." else " FAIL!") False = undefined checkT (x,y) = xmlLight y == x t = all checkT testData {-WETT-} {- MCCOMMENT -- slower, i.e. to slow for artemis tests, but only 77 tokens --genlist :: [(Text.Text,Text.Text)] --genlist = -- [(Text.pack $ chr i : "<", Text.pack $ "<") | i <- [0..255] \\ [ord '<', ord '>']]++ -- [(Text.pack $ chr i : ">", Text.pack $ ">") | i <- [k | k <- [0..255], isSpace (chr k)]] --showlist = show genlist list :: [(Text.Text,Text.Text)] eliminateValidMatchingPairs :: [Text.Text] -> Text.Text -> [Text.Text] xrep :: Text.Text -> (Text.Text, Text.Text) -> Text.Text replaceall :: Text.Text -> Text.Text xmlLight :: String -> Bool list = read "[(\"\\NUL<\",\"<\"),(\"\\SOH<\",\"<\"),(\"\\STX<\",\"<\"),(\"\\ETX<\",\"<\"),(\"\\EOT<\",\"<\"),(\"\\ENQ<\",\"<\"),(\"\\ACK<\",\"<\"),(\"\\a<\",\"<\"),(\"\\b<\",\"<\"),(\"\\t<\",\"<\"),(\"\\n<\",\"<\"),(\"\\v<\",\"<\"),(\"\\f<\",\"<\"),(\"\\r<\",\"<\"),(\"\\SO<\",\"<\"),(\"\\SI<\",\"<\"),(\"\\DLE<\",\"<\"),(\"\\DC1<\",\"<\"),(\"\\DC2<\",\"<\"),(\"\\DC3<\",\"<\"),(\"\\DC4<\",\"<\"),(\"\\NAK<\",\"<\"),(\"\\SYN<\",\"<\"),(\"\\ETB<\",\"<\"),(\"\\CAN<\",\"<\"),(\"\\EM<\",\"<\"),(\"\\SUB<\",\"<\"),(\"\\ESC<\",\"<\"),(\"\\FS<\",\"<\"),(\"\\GS<\",\"<\"),(\"\\RS<\",\"<\"),(\"\\US<\",\"<\"),(\" <\",\"<\"),(\"!<\",\"<\"),(\"\\\"<\",\"<\"),(\"#<\",\"<\"),(\"$<\",\"<\"),(\"%<\",\"<\"),(\"&<\",\"<\"),(\"'<\",\"<\"),(\"(<\",\"<\"),(\")<\",\"<\"),(\"*<\",\"<\"),(\"+<\",\"<\"),(\",<\",\"<\"),(\"-<\",\"<\"),(\".<\",\"<\"),(\"/<\",\"<\"),(\"0<\",\"<\"),(\"1<\",\"<\"),(\"2<\",\"<\"),(\"3<\",\"<\"),(\"4<\",\"<\"),(\"5<\",\"<\"),(\"6<\",\"<\"),(\"7<\",\"<\"),(\"8<\",\"<\"),(\"9<\",\"<\"),(\":<\",\"<\"),(\";<\",\"<\"),(\"=<\",\"<\"),(\"?<\",\"<\"),(\"@<\",\"<\"),(\"A<\",\"<\"),(\"B<\",\"<\"),(\"C<\",\"<\"),(\"D<\",\"<\"),(\"E<\",\"<\"),(\"F<\",\"<\"),(\"G<\",\"<\"),(\"H<\",\"<\"),(\"I<\",\"<\"),(\"J<\",\"<\"),(\"K<\",\"<\"),(\"L<\",\"<\"),(\"M<\",\"<\"),(\"N<\",\"<\"),(\"O<\",\"<\"),(\"P<\",\"<\"),(\"Q<\",\"<\"),(\"R<\",\"<\"),(\"S<\",\"<\"),(\"T<\",\"<\"),(\"U<\",\"<\"),(\"V<\",\"<\"),(\"W<\",\"<\"),(\"X<\",\"<\"),(\"Y<\",\"<\"),(\"Z<\",\"<\"),(\"[<\",\"<\"),(\"\\\\<\",\"<\"),(\"]<\",\"<\"),(\"^<\",\"<\"),(\"_<\",\"<\"),(\"`<\",\"<\"),(\"a<\",\"<\"),(\"b<\",\"<\"),(\"c<\",\"<\"),(\"d<\",\"<\"),(\"e<\",\"<\"),(\"f<\",\"<\"),(\"g<\",\"<\"),(\"h<\",\"<\"),(\"i<\",\"<\"),(\"j<\",\"<\"),(\"k<\",\"<\"),(\"l<\",\"<\"),(\"m<\",\"<\"),(\"n<\",\"<\"),(\"o<\",\"<\"),(\"p<\",\"<\"),(\"q<\",\"<\"),(\"r<\",\"<\"),(\"s<\",\"<\"),(\"t<\",\"<\"),(\"u<\",\"<\"),(\"v<\",\"<\"),(\"w<\",\"<\"),(\"x<\",\"<\"),(\"y<\",\"<\"),(\"z<\",\"<\"),(\"{<\",\"<\"),(\"|<\",\"<\"),(\"}<\",\"<\"),(\"~<\",\"<\"),(\"\\DEL<\",\"<\"),(\"\\128<\",\"<\"),(\"\\129<\",\"<\"),(\"\\130<\",\"<\"),(\"\\131<\",\"<\"),(\"\\132<\",\"<\"),(\"\\133<\",\"<\"),(\"\\134<\",\"<\"),(\"\\135<\",\"<\"),(\"\\136<\",\"<\"),(\"\\137<\",\"<\"),(\"\\138<\",\"<\"),(\"\\139<\",\"<\"),(\"\\140<\",\"<\"),(\"\\141<\",\"<\"),(\"\\142<\",\"<\"),(\"\\143<\",\"<\"),(\"\\144<\",\"<\"),(\"\\145<\",\"<\"),(\"\\146<\",\"<\"),(\"\\147<\",\"<\"),(\"\\148<\",\"<\"),(\"\\149<\",\"<\"),(\"\\150<\",\"<\"),(\"\\151<\",\"<\"),(\"\\152<\",\"<\"),(\"\\153<\",\"<\"),(\"\\154<\",\"<\"),(\"\\155<\",\"<\"),(\"\\156<\",\"<\"),(\"\\157<\",\"<\"),(\"\\158<\",\"<\"),(\"\\159<\",\"<\"),(\"\\160<\",\"<\"),(\"\\161<\",\"<\"),(\"\\162<\",\"<\"),(\"\\163<\",\"<\"),(\"\\164<\",\"<\"),(\"\\165<\",\"<\"),(\"\\166<\",\"<\"),(\"\\167<\",\"<\"),(\"\\168<\",\"<\"),(\"\\169<\",\"<\"),(\"\\170<\",\"<\"),(\"\\171<\",\"<\"),(\"\\172<\",\"<\"),(\"\\173<\",\"<\"),(\"\\174<\",\"<\"),(\"\\175<\",\"<\"),(\"\\176<\",\"<\"),(\"\\177<\",\"<\"),(\"\\178<\",\"<\"),(\"\\179<\",\"<\"),(\"\\180<\",\"<\"),(\"\\181<\",\"<\"),(\"\\182<\",\"<\"),(\"\\183<\",\"<\"),(\"\\184<\",\"<\"),(\"\\185<\",\"<\"),(\"\\186<\",\"<\"),(\"\\187<\",\"<\"),(\"\\188<\",\"<\"),(\"\\189<\",\"<\"),(\"\\190<\",\"<\"),(\"\\191<\",\"<\"),(\"\\192<\",\"<\"),(\"\\193<\",\"<\"),(\"\\194<\",\"<\"),(\"\\195<\",\"<\"),(\"\\196<\",\"<\"),(\"\\197<\",\"<\"),(\"\\198<\",\"<\"),(\"\\199<\",\"<\"),(\"\\200<\",\"<\"),(\"\\201<\",\"<\"),(\"\\202<\",\"<\"),(\"\\203<\",\"<\"),(\"\\204<\",\"<\"),(\"\\205<\",\"<\"),(\"\\206<\",\"<\"),(\"\\207<\",\"<\"),(\"\\208<\",\"<\"),(\"\\209<\",\"<\"),(\"\\210<\",\"<\"),(\"\\211<\",\"<\"),(\"\\212<\",\"<\"),(\"\\213<\",\"<\"),(\"\\214<\",\"<\"),(\"\\215<\",\"<\"),(\"\\216<\",\"<\"),(\"\\217<\",\"<\"),(\"\\218<\",\"<\"),(\"\\219<\",\"<\"),(\"\\220<\",\"<\"),(\"\\221<\",\"<\"),(\"\\222<\",\"<\"),(\"\\223<\",\"<\"),(\"\\224<\",\"<\"),(\"\\225<\",\"<\"),(\"\\226<\",\"<\"),(\"\\227<\",\"<\"),(\"\\228<\",\"<\"),(\"\\229<\",\"<\"),(\"\\230<\",\"<\"),(\"\\231<\",\"<\"),(\"\\232<\",\"<\"),(\"\\233<\",\"<\"),(\"\\234<\",\"<\"),(\"\\235<\",\"<\"),(\"\\236<\",\"<\"),(\"\\237<\",\"<\"),(\"\\238<\",\"<\"),(\"\\239<\",\"<\"),(\"\\240<\",\"<\"),(\"\\241<\",\"<\"),(\"\\242<\",\"<\"),(\"\\243<\",\"<\"),(\"\\244<\",\"<\"),(\"\\245<\",\"<\"),(\"\\246<\",\"<\"),(\"\\247<\",\"<\"),(\"\\248<\",\"<\"),(\"\\249<\",\"<\"),(\"\\250<\",\"<\"),(\"\\251<\",\"<\"),(\"\\252<\",\"<\"),(\"\\253<\",\"<\"),(\"\\254<\",\"<\"),(\"\\255<\",\"<\"),(\"\\t>\",\">\"),(\"\\n>\",\">\"),(\"\\v>\",\">\"),(\"\\f>\",\">\"),(\"\\r>\",\">\"),(\" >\",\">\"),(\"\\160>\",\">\")]" eliminateValidMatchingPairs xss x = if null xss || "/" <> head xss /= x then x:xss else tail xss xrep a (b,c) = Text.replace b c a replaceall s = foldl xrep s list -- For explanation, refer to 85 token solution from below! xmlLight x = Text.any isSpace s < null ( foldl eliminateValidMatchingPairs [] $ Text.splitOn "<" s) where s = iterate replaceall y !! Text.length y y = Text.pack x <> " (Char -> Bool) -> String -> String eliminateValidMatchingPairs :: [Text.Text] -> Text.Text -> [Text.Text] xmlLight :: String -> Bool --removeWithContext matchRight f = foldr g "\t" -- where g x rss = if head rss == matchRight && f x then rss else x:rss removeWithContext matchRight f = flip foldr "\t" \x rss -> if head rss == matchRight && f x then rss else x:rss -- Eliminate tag stack while folding eliminateValidMatchingPairs xss x = if null xss || "/" <> head xss /= x then x:xss else tail xss xmlLight x = Text.any isSpace s -- 6a. If text still contains whitespace, it is invalid. < null ( -- 6b.3. If the result is empty, the xml was valid. -- HINT: Some Magic: not a && b == a < b foldl eliminateValidMatchingPairs [] -- 6b.2. Eliminate matching pairs by folding $ Text.splitOn "<" s) -- 6b.1. Split on '<' where s = Text.stripEnd -- 5. Remove whitespace from the end of the string, which -- is introduced by the dirty remove* methods from below -- (they both need a non empty string as a primer) $ Text.pack -- 4. String -> Text.Text $ removeWithContext '<' -- 3. Remove text between tags >...< (`notElem` "><") -- (== regex replace ">[^<>]<" with "><") $ removeWithContext '>' isSpace -- 2. Remove valid whitespace inside of tags -- (== regex replace "[:space:]*>" with ">") $ x <> "{input}". Then we will never -- have to deal with any trailing characters. The empty tag may be -- considered valid (see Zulip), but will according to the exercise -- never be given to us as an input. Therefore, "<>" is unique and -- no invalid can become valid afterwards (this would be the case with -- e.g. "{str}" when we get the invalid input ""). -- HINT: We can fully skip the opening "<>" due to a coincidence: -- A) We would skip the first '<' character here, because we split on '<' -- and otherwise would have to remove this first (empty) element. -- B) We would also skip the closing bracket '>', which is possible because -- we parse it as part of the identifier, and therefore ok if we skip -- it on both opening and closing tag. -- -> Togerther this removes the need for a prefix concatenation, which -- would cost us 2 additional tokens. {-TTEW-}