------------------------------------------------------------------------- -- -- Huffman coding in Haskell. -- -- Based on the files by Simon Thompson -- ------------------------------------------------------------------------- module Huffman (Bit, Tree(..), encode, decode, mkTree, mkCTable, mkFTable) where import Data.List (sortBy) import Data.Maybe (fromJust) -- Interface encode :: CTable -> String -> [Bit] decode :: Tree -> [Bit] -> String mkTree :: FTable -> Tree mkCTable :: Tree -> CTable mkFTable :: String -> FTable data Tree = Leaf Char Int | Node Tree Tree Int data Bit = L | R deriving (Eq,Show) type CTable = [ (Char,[Bit]) ] type FTable = [ (Char, Int) ] -- Implementation -- Encoding a string encode tbl = concat . map (\c -> fromJust (lookup c tbl)) -- Decoding a string decode tr = dec tr where dec (Node t1 t2 _) (L:rest) = dec t1 rest dec (Node t1 t2 _) (R:rest) = dec t2 rest dec (Leaf c _) rest = c : decode tr rest dec _ [] = [] -- Huffman's algorithm: Building the code tree -- Convert the trees to a list, then combine into a single tree. mkTree = combine . toTrees -- Huffman codes are created bottom up: look for the least -- two frequent letters, make these a new Node -- and repeat until one tree is formed. -- The function toTreeList makes the initial data structure. toTrees :: FTable -> [Tree] toTrees = map (uncurry Leaf) . sortBy (\(_,m) (_,n) -> compare m n) -- The value of a tree. value :: Tree -> Int value (Leaf _ n) = n value (Node _ _ n) = n -- Pair two trees. pair :: Tree -> Tree -> Tree pair t1 t2 = Node t1 t2 (v1+v2) where v1 = value t1 v2 = value t2 -- Insert a tree in a list of trees sorted by ascending value. insTree :: Tree -> [Tree] -> [Tree] insTree t [] = [t] insTree t (t1:ts) | (value t <= value t1) = t : t1 : ts | otherwise = t1 : insTree t ts -- -- Combine the front two elements of the list of trees. combine2 :: [Tree] -> [Tree] combine2 (t1 : t2 : ts) = insTree (pair t1 t2) ts -- Combine the whole list. combine :: [Tree] -> Tree combine [t] = t combine ts = combine (combine2 ts) -- Making a code table from a Huffman tree. mkCTable = mkCTab [] -- Auxiliary function used in conversion to a code table. The first argument -- is the Bit list which codes the path in the tree to the current Node, -- and so mkCTable initialises it with the empty list. mkCTab :: [Bit] -> Tree -> CTable mkCTab cd (Leaf c _) = [(c,cd)] mkCTab cd (Node t1 t2 _) = (mkCTab (cd++[L]) t1) ++ (mkCTab (cd++[R]) t2) -- Show a tree, using indentation to show structure. instance Show Tree where show t = showIndent 0 t -- The auxiliary function showIndent has a second, current -- level of indentation, as a parameter. where showIndent :: Int -> Tree -> String showIndent m (Leaf c n) = spaces m ++ show c ++ " " ++ show n ++ "\n" showIndent m (Node t1 t2 n) = showIndent (m+4) t1 ++ spaces m ++ "[" ++ show n ++ "]" ++ "\n" ++ showIndent (m+4) t2 spaces n = replicate n ' ' -- Build a frequency table table from a string mkFTable cs = freq cs [] freq :: String -> FTable -> FTable freq [] tbl = tbl freq (c:cs) tbl = freq cs (incr c tbl) incr :: Char -> FTable -> FTable incr c [] = [(c,1)] incr c ((d,n):ps) = if c==d then (d,n+1):ps else (d,n) : incr c ps