diff options
author | Peter Wu <peter@lekensteyn.nl> | 2015-03-31 20:03:39 +0200 |
---|---|---|
committer | Peter Wu <peter@lekensteyn.nl> | 2015-03-31 20:03:39 +0200 |
commit | 8d123ad5161a326597386c5261f2c88dffecf7ee (patch) | |
tree | ffb120d18165edb2e38c96edb0a722ae1a3fddea | |
parent | bec3ac2780fce9f90830f9f21a03b6c5487cd21c (diff) | |
download | assignment4-8d123ad5161a326597386c5261f2c88dffecf7ee.tar.gz |
Implement Damerau-Levenshtein distance (add transposition)
Suggested around 6:16 of the Coursera lecture on
"The Noisy Channel Model of Spelling".
https://class.coursera.org/nlp/lecture/22
-rw-r--r-- | spellchecker/src/SpellCorrector.java | 20 | ||||
-rw-r--r-- | spellchecker/test/SpellCorrectorTest.java | 5 |
2 files changed, 23 insertions, 2 deletions
diff --git a/spellchecker/src/SpellCorrector.java b/spellchecker/src/SpellCorrector.java index c0d7818..8ebb829 100644 --- a/spellchecker/src/SpellCorrector.java +++ b/spellchecker/src/SpellCorrector.java @@ -36,8 +36,8 @@ public class SpellCorrector { } /** - * Gets all candidate words, resulting from a single addition, deletion or - * substitution. + * Gets all candidate words, resulting from a single addition, deletion. + * substitution or transposition. * * @param word The word to find candidates for. * @return A set of words with candidate words. @@ -73,6 +73,22 @@ public class SpellCorrector { ListOfWords.add(head + tail); } } + + // The Damerau-Levenshtein distance also includes transposition to + // account for spelling mistakes. This loop transposes the previous + // character with the current index. + for (int i = 1; i < word.length(); i++) { + // the word is split into [0..i-1] [i-1..i] [i..i+1] [i+1..n] + // if i == word.length() - 1, then the tail is empty. + String head = word.substring(0, i - 1); + char c1 = word.charAt(i - 1); + char c2 = word.charAt(i); + String tail = i + 1 < word.length() ? word.substring(i + 1) : ""; + + // transposition of two adjacent letters + ListOfWords.add(head + c2 + c1 + tail); + } + return cr.inVocabulary(ListOfWords); } } diff --git a/spellchecker/test/SpellCorrectorTest.java b/spellchecker/test/SpellCorrectorTest.java index dbea0ab..1438b04 100644 --- a/spellchecker/test/SpellCorrectorTest.java +++ b/spellchecker/test/SpellCorrectorTest.java @@ -75,6 +75,8 @@ public class SpellCorrectorTest { // deletion words.add("p"); words.add("u"); + // transposition + words.add("pu"); checkGetCandidateWords(fullCR, "up", words); } @@ -97,6 +99,9 @@ public class SpellCorrectorTest { words.add("ps"); words.add("us"); words.add("up"); + // transposition + words.add("pus"); + words.add("usp"); checkGetCandidateWords(fullCR, "ups", words); } |