module Exercise_4 where import Data.Array import Data.List (partition) import Data.Maybe (fromMaybe) import Control.Monad (liftM2) import Control.Monad.Fix (fix) {-H4.1.10-} editDistance :: Eq a => [a] -> [a] -> Int editDistance a b = d (length a) (length b) where d :: Int -> Int -> Int d i j = minimum $ filter (/= -1) [f1, f2, f3, f4, f5, f6] where f1 = if i == 0 && j == 0 then 0 else -1 f2 = if i > 0 then d (i-1) j + 1 else -1 f3 = if j > 0 then d i (j-1) + 1 else -1 f4 = if i > 0 && j > 0 && a!!(i-1) == b!!(j-1) then d (i-1) (j-1) else -1 f5 = if i > 0 && j > 0 && a!!(i-1) /= b!!(j-1) then d (i-1) (j-1) + 1 else -1 f6 = if i > 1 && j > 1 && a!!(i-1) == b!!(j-2) && a!!(i-2) == b!!(j-1) then d (i-2) (j-2) + 1 else -1 {-H4.1.11-} {-WETT-} spellCorrect :: [String] -> [String] -> [[String]] spellCorrect dict = map (closestWords (-1) [] dict) closestWords :: Int -> [String] -> [String] -> String -> [String] closestWords _ cws [] _ = cws closestWords sd cws (d:ds) x | sd == -1 = closestWords ed [d] ds x -- first dictionary word -- abs (length x - length d) is a lower bound of the edit distance | sd < abs (lx - ld) = closestWords sd cws ds x -- compare calculated edit distance with shortest edit distance found so far | ed > sd = closestWords sd cws ds x | ed < sd = closestWords ed [d] ds x | otherwise = closestWords ed (d:cws) ds x where ed = dynamicDamerauLevenshtein x d sd ld = length d lx = length x -- Restricted Damerau-Levenshtein distance (optimal string alignment). -- Based on the Wagner-Fischer algorithm. dynamicDamerauLevenshtein :: String -> String -> Int -> Int dynamicDamerauLevenshtein a b k = dij la lb where dij :: Int -> Int -> Int dij i j | i == 0 = j | j == 0 = i | thresholdExceeded = maxBound -- for the remaining conditions: i > 0, j > 0 | a' ! i == b' ! j = lazyDijArray ! (i-1, j-1) -- char equality, move ↖, cost 0 -- for the remaining conditions: a' ! i /= b' ! i | otherwise = minimum [delete, insert, edit, transpose] where thresholdExceeded :: Bool thresholdExceeded = k /= -1 && abs ((la-i) - (lb-j)) > k delete = if deleteCell == maxBound then maxBound else deleteCell + 1 where deleteCell = lazyDijArray ! (i - 1, j) -- move ↑ insert = if insertCell == maxBound then maxBound else insertCell + 1 where insertCell = lazyDijArray ! (i, j - 1) -- move ← edit = if editCell == maxBound then maxBound else editCell + 1 where editCell = lazyDijArray ! (i - 1, j - 1) -- move ↖ transpose | i > 1 && j > 1 && a' ! i == b' ! (j - 1) && a' ! (i - 1) == b' ! j && transposeCell /= maxBound = transposeCell + 1 | otherwise = maxBound where transposeCell = lazyDijArray ! (i - 2, j - 2) -- move ↖↖ -- use arrays as they are now allowed and should be accessible faster lazyDijArray = listArray ((0, 0), (la, lb)) [dij i' j' | i' <- [0..la], j' <- [0..lb]] -- convert words to arrays for faster access, one-indexed a' = listArray (1, la) a b' = listArray (1, lb) b la = length a lb = length b {-TTEW-} {-MCCOMMENT Alternative solution, following Allison's approach (http://users.monash.edu/~lloyd/tildeStrings/Alignment/92.IPL.html), but adding the Damerau part. As the algorithm was already given in Lazy ML, implementing it in Haskell was no real challenge, so I guess this would not count as a valid submission. spellCorrect :: [String] -> [String] -> [[String]] spellCorrect dict = map (closestWords (-1) [] dict) closestWords :: Int -> [String] -> [String] -> String -> [String] closestWords _ cws [] _ = cws closestWords sd cws (d:ds) x | sd == -1 = closestWords ed [d] ds x -- first dictionary word -- abs (length x - length d) is a lower bound of the edit distance | sd < abs (lx - ld) = closestWords sd cws ds x -- compare calculated edit distance with shortest edit distance found so far | ed > sd = closestWords sd cws ds x | ed < sd = closestWords ed [d] ds x | otherwise = closestWords ed (d:cws) ds x where ed = diagonalDamerauLevenshtein x d ld = length d lx = length x diagonalDamerauLevenshtein :: String -> String -> Int diagonalDamerauLevenshtein x d | lengthDiff == 0 = last centralDiagonal | lengthDiff > 0 = last $ lowerDiagonals !! (lengthDiff - 1) | otherwise = last $ higherDiagonals !! negate (lengthDiff + 1) where lengthDiff :: Int lengthDiff = length x - length d centralDiagonal :: [Int] centralDiagonal = buildDiagonal x d (head higherDiagonals) (-1 : head lowerDiagonals) higherDiagonals :: [[Int]] higherDiagonals = fix (buildDiagonals x d . (centralDiagonal :)) lowerDiagonals :: [[Int]] lowerDiagonals = fix (buildDiagonals d x . (centralDiagonal :)) buildDiagonals :: [Char] -> [Char] -> [[Int]] -> [[Int]] buildDiagonals _ [] _ = [] buildDiagonals _ _ [] = [] buildDiagonals a (_:bs) (lastDiagonal:diagonals) = buildDiagonal a bs nextDiagonal lastDiagonal : buildDiagonals a bs diagonals where nextDiagonal :: [Int] nextDiagonal = head (tail diagonals) buildDiagonal :: [Char] -> [Char] -> [Int] -> [Int] -> [Int] buildDiagonal a b higherDiagonal lowerDiagonal = firstElement : buildHelper a b firstElement higherDiagonal (tail lowerDiagonal) where firstElement = head lowerDiagonal + 1 buildHelper :: [Char] -> [Char] -> Int -> [Int] -> [Int] -> [Int] buildHelper [] _ _ _ _ = [] buildHelper _ [] _ _ _ = [] buildHelper (a:as) (b:bs) nw n w | not (null as) && not (null bs) && a == head bs && b == head as = nw : buildHelper as bs nw (tail n) (tail w) -- transposition | otherwise = cost : buildHelper as bs cost (tail n) (tail w) where cost :: Int cost | a == b = nw | otherwise = lazyMin (head w) nw (head n) + 1 where lazyMin :: Int -> Int -> Int -> Int lazyMin a' b' c' | a' < b' = a' | otherwise = min b' c' -}