module Exercise_4 where import Data.List import Data.Array {-- -- Use a dynamic programming matrix to avoid recomputing values. -- Pretty basic really, but getting the array to behave was an interesting -- experience - all for that O(1) access-time complexity. -- At a certain threshold, transforming the strings to char-arrays -- is worth the overhead. When testing, I found this to be at a length -- of ~50 chars, which is, of course, specific to my machine, but that's -- the best I can do right now :D -- Did NOT realize we should compute transposition aswell, -- so I had to include that last minute, hence the ugly way the ITE is included. --} {-H4.1.10-} editDistance :: Eq a => [a] -> [a] -> Int editDistance s1 [] = length s1 editDistance[] s2 = length s2 editDistance s1 s2 | s1 == s2 = 0 | n >= 50 || m >= 50 = editDistance' s1 s2 n m | otherwise = editMat!(n,m) where n = length s1 m = length s2 editMat = listArray ((0,0),(n,m)) (map subDist [(x,y) | x<- [0..n], y<-[0..m]]) subDist (i,j) | i == 0 = j | j == 0 = i | otherwise = minimum [(editMat!(i1,j))+1,((editMat!(i,j1))+1), (editMat!(i1,j1)+x), y] where i1 = i-1 j1 = j-1 i2 = i-2 j2 = j-2 x = if (s1 !! (i1)) == (s2 !! (j1)) then 0 else 1 -- transposition; very dirty but works D: y = if i > 1 && j > 1 && (s1!!i1) == (s2!!j2) && (s1!!i2) == (s2!!j1) then editMat!(i2,j2) + 1 else maxBound::Int -- same implementation, just uses an array to speed up access time of s1/s2 editDistance' :: Eq a => [a] -> [a] -> Int -> Int -> Int editDistance' s1 s2 n m = editMat!(n,m) where s1' = listArray (1,n) s1 s2' = listArray (1,m) s2 editMat = listArray ((0,0),(n,m)) (map subDist [(x,y) | x<- [0..n], y<-[0..m]]) subDist (i,j) | i == 0 = j | j == 0 = i | otherwise = minimum [(editMat!(i1,j))+1,((editMat!(i,j1))+1), (editMat!(i1,j1)+x), y] where i1 = i-1 j1 = j-1 i2 = i-2 j2 = j-2 x = if (s1'!i) == (s2'!j) then 0 else 1 -- transposition; very dirty but works D: y = if i > 1 && j > 1 && (s1'!i) == (s2'!j1) && (s1'!i1) == (s2'!j) then editMat!(i2,j2) + 1 else maxBound::Int {-- -- Could have been optimized using a trie, but I don't really know the input -- sets so figuring out a good way to ensure the added overhead is -- worth it turned out to be rather difficult. -- Ideally I would have liked to use some realation between the ammount -- of lcs present and the size of the set, but that went nowhere and required -- me to compute a ton of lcs anyways, so yeah. Could not find a good fix, maybe -- I'm just missing something obvious... --} {-H4.1.11-} {-WETT-} spellCorrect :: [String] -> [String] -> [[String]] spellCorrect [] _ = [] spellCorrect (d1:ds) xs = map (\w -> editList ds w (editDistance d1 w) [d1]) xs where editList [] _ _ res = res editList (y:ys) w cur res | dist < cur = editList ys w dist [y] | dist > cur = editList ys w cur res | otherwise = editList ys w cur (y:res) where dist = editDistance y w {-TTEW-}