summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2015-03-31 20:03:39 +0200
committerPeter Wu <peter@lekensteyn.nl>2015-03-31 20:03:39 +0200
commit8d123ad5161a326597386c5261f2c88dffecf7ee (patch)
treeffb120d18165edb2e38c96edb0a722ae1a3fddea
parentbec3ac2780fce9f90830f9f21a03b6c5487cd21c (diff)
downloadassignment4-8d123ad5161a326597386c5261f2c88dffecf7ee.tar.gz
Implement Damerau-Levenshtein distance (add transposition)
Suggested around 6:16 of the Coursera lecture on "The Noisy Channel Model of Spelling". https://class.coursera.org/nlp/lecture/22
-rw-r--r--spellchecker/src/SpellCorrector.java20
-rw-r--r--spellchecker/test/SpellCorrectorTest.java5
2 files changed, 23 insertions, 2 deletions
diff --git a/spellchecker/src/SpellCorrector.java b/spellchecker/src/SpellCorrector.java
index c0d7818..8ebb829 100644
--- a/spellchecker/src/SpellCorrector.java
+++ b/spellchecker/src/SpellCorrector.java
@@ -36,8 +36,8 @@ public class SpellCorrector {
}
/**
- * Gets all candidate words, resulting from a single addition, deletion or
- * substitution.
+ * Gets all candidate words, resulting from a single addition, deletion.
+ * substitution or transposition.
*
* @param word The word to find candidates for.
* @return A set of words with candidate words.
@@ -73,6 +73,22 @@ public class SpellCorrector {
ListOfWords.add(head + tail);
}
}
+
+ // The Damerau-Levenshtein distance also includes transposition to
+ // account for spelling mistakes. This loop transposes the previous
+ // character with the current index.
+ for (int i = 1; i < word.length(); i++) {
+ // the word is split into [0..i-1] [i-1..i] [i..i+1] [i+1..n]
+ // if i == word.length() - 1, then the tail is empty.
+ String head = word.substring(0, i - 1);
+ char c1 = word.charAt(i - 1);
+ char c2 = word.charAt(i);
+ String tail = i + 1 < word.length() ? word.substring(i + 1) : "";
+
+ // transposition of two adjacent letters
+ ListOfWords.add(head + c2 + c1 + tail);
+ }
+
return cr.inVocabulary(ListOfWords);
}
}
diff --git a/spellchecker/test/SpellCorrectorTest.java b/spellchecker/test/SpellCorrectorTest.java
index dbea0ab..1438b04 100644
--- a/spellchecker/test/SpellCorrectorTest.java
+++ b/spellchecker/test/SpellCorrectorTest.java
@@ -75,6 +75,8 @@ public class SpellCorrectorTest {
// deletion
words.add("p");
words.add("u");
+ // transposition
+ words.add("pu");
checkGetCandidateWords(fullCR, "up", words);
}
@@ -97,6 +99,9 @@ public class SpellCorrectorTest {
words.add("ps");
words.add("us");
words.add("up");
+ // transposition
+ words.add("pus");
+ words.add("usp");
checkGetCandidateWords(fullCR, "ups", words);
}