{- Title: Tokenize.hs Author: Master of Competition, TU Muenchen Copyright 2012 Token counter for Haskell programs. -} module Main where import Language.Haskell.Exts import Language.Haskell.Exts.Lexer charFromToken (VarId s) = head s charFromToken (QVarId (s, _)) = head s charFromToken (ConId s) = head s charFromToken (QConId (s, _)) = head s charFromToken (VarSym s) = head s charFromToken (ConSym s) = head s charFromToken (QVarSym (s, _)) = head s charFromToken (QConSym (s, _)) = head s charFromToken (IntTok (_, s)) = head s charFromToken (FloatTok _) = '.' charFromToken (Character _) = '\'' charFromToken (StringTok _) = '"' charFromToken LeftParen = '(' charFromToken RightParen = ')' charFromToken LeftCurly = '{' charFromToken RightCurly = '}' charFromToken LeftSquare = '[' charFromToken RightSquare = ']' charFromToken Comma = ',' charFromToken Underscore = '_' charFromToken BackQuote = '`' charFromToken Dot = '.' charFromToken DotDot = '.' charFromToken Colon = ':' charFromToken DoubleColon = ':' charFromToken Equals = '=' charFromToken Backslash = '\\' charFromToken Bar = '|' charFromToken LeftArrow = '<' charFromToken RightArrow = '>' charFromToken At = '@' charFromToken Tilde = '~' charFromToken DoubleArrow = '>' charFromToken Minus = '-' charFromToken Exclamation = '!' charFromToken Star = '*' charFromToken KW_As = 'A' charFromToken KW_By = 'B' charFromToken KW_Case = 'C' charFromToken KW_Class = 'C' charFromToken KW_Data = 'D' charFromToken KW_Default = 'D' charFromToken KW_Deriving = 'D' charFromToken KW_Do = 'D' charFromToken KW_MDo = 'M' charFromToken KW_Else = 'E' charFromToken KW_Family = 'F' charFromToken KW_Forall = 'F' charFromToken KW_Group = 'G' charFromToken KW_Hiding = 'H' charFromToken KW_If = 'I' charFromToken KW_Import = 'I' charFromToken KW_In = 'I' charFromToken KW_Infix = 'I' charFromToken KW_InfixL = 'I' charFromToken KW_InfixR = 'I' charFromToken KW_Instance = 'I' charFromToken KW_Let = 'L' charFromToken KW_Module = 'M' charFromToken KW_NewType = 'N' charFromToken KW_Of = 'O' charFromToken KW_Proc = 'P' charFromToken KW_Rec = 'R' charFromToken KW_Then = 'T' charFromToken KW_Type = 'T' charFromToken KW_Using = 'U' charFromToken KW_Where = 'W' charFromToken _ = '?' simplifyBackQuotes :: [Token] -> [Token] simplifyBackQuotes [] = [] simplifyBackQuotes (BackQuote : t : BackQuote : ts) = t : simplifyBackQuotes ts simplifyBackQuotes (t : ts) = t : simplifyBackQuotes ts tokenize :: [Char] -> (Int, [Char]) tokenize s = case fmap (map unLoc) (lexTokenStream s) of ParseOk toks -> (length toks', map charFromToken toks') where toks' = simplifyBackQuotes toks _ -> (0, []) main = do text <- getContents let (numToks, shortToks) = tokenize text in putStrLn (show numToks ++ " " ++ shortToks)