summaryrefslogtreecommitdiff
path: root/spellchecker
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2015-03-31 22:24:21 +0200
committerPeter Wu <peter@lekensteyn.nl>2015-03-31 22:24:21 +0200
commitc3a9b5c241e89a6df6a87eb62b5ffea551ed4617 (patch)
treefeb37d5becd6bbc6f3aa232aeb4653aad8888877 /spellchecker
parent8d123ad5161a326597386c5261f2c88dffecf7ee (diff)
downloadassignment4-c3a9b5c241e89a6df6a87eb62b5ffea551ed4617.tar.gz
Refactor getCandidateWords for ConfusionMatrix
Diffstat (limited to 'spellchecker')
-rw-r--r--spellchecker/src/SpellCorrector.java70
-rw-r--r--spellchecker/src/TriFunction.java13
-rw-r--r--spellchecker/test/SpellCorrectorTest.java5
3 files changed, 70 insertions, 18 deletions
diff --git a/spellchecker/src/SpellCorrector.java b/spellchecker/src/SpellCorrector.java
index 8ebb829..0456165 100644
--- a/spellchecker/src/SpellCorrector.java
+++ b/spellchecker/src/SpellCorrector.java
@@ -39,38 +39,70 @@ public class SpellCorrector {
* Gets all candidate words, resulting from a single addition, deletion.
* substitution or transposition.
*
- * @param word The word to find candidates for.
+ * @param word The (wrong) word to find (corrected) candidates for.
* @return A set of words with candidate words.
*/
public HashSet<String> getCandidateWords(String word) {
HashSet<String> ListOfWords = new HashSet<>();
+ // tries to add word2 to the list of words. This word2 was generated
+ // from "word" by changing "error" to "correct".
+ TriFunction<String, String, String> push = (word2, error, correct) -> {
+ if (!cr.inVocabulary(word2)) {
+ return;
+ }
+
+ ListOfWords.add(word2);
+ };
+
// generate words by insertion of a character
for (int i = 0; i <= word.length(); i++) {
+ String head, head_last, tail;
// the word is split into [0..i] [i..n]
// if i == word.length, then the last part is empty
- String head = word.substring(0, i);
- String tail = i < word.length() ? word.substring(i) : "";
+ head = word.substring(0, i);
+ tail = i < word.length() ? word.substring(i) : "";
+
+ // |p]en -> [ap]en, [p|en -> [pi]en,
+ // p[e|n -> p[ei]n, pe[n| -> pe[nm].
for (char c : ALPHABET) {
- // insertion of a single character
- ListOfWords.add(head + c + tail);
+ if (i == 0) {
+ // insert "c" before current letter (i = 0)
+ head_last = word.substring(0);
+ push.call(head + c + tail, head_last, c + head_last);
+ } else {
+ // insert "c" after current letter (i > 0)
+ head_last = word.substring(i - 1, i);
+ push.call(head + c + tail, head_last, head_last + c);
+ }
}
}
+ // by substitution or deletion of a character
for (int i = 0; i < word.length(); i++) {
+ String head, middle, error, tail;
// the word is split into [0..i] [i..i+1] [i+1..n]
// if i == word.length() - 1, then the tail is empty.
- String head = word.substring(0, i);
- String tail = word.substring(i + 1);
+ head = word.substring(0, i);
+ middle = word.substring(i, i + 1);
+ tail = word.substring(i + 1);
+ // |p]en -> [P]en, p|e]n -> p[E]n, pe|n] -> pe[N].
for (char c : ALPHABET) {
- // substitution
- ListOfWords.add(head + c + tail);
+ // substitution of "middle" with "c"
+ push.call(head + c + tail, middle, "" + c);
}
- // deletion of a single character (prevent adding empty words)
+ // deletion of a single character after the current cursor.
+ // Requires at least two characters in the word.
+ // |pe]n -> [e]n, p|en] -> p[n], p[e|n] -> p[e].
if (word.length() > 1) {
- ListOfWords.add(head + tail);
+ if (i + 1 < word.length()) {
+ error = word.substring(i, i + 2);
+ } else {
+ error = word.substring(i - 1, i + 1);
+ }
+ push.call(head + tail, error, middle);
}
}
@@ -78,15 +110,17 @@ public class SpellCorrector {
// account for spelling mistakes. This loop transposes the previous
// character with the current index.
for (int i = 1; i < word.length(); i++) {
- // the word is split into [0..i-1] [i-1..i] [i..i+1] [i+1..n]
- // if i == word.length() - 1, then the tail is empty.
- String head = word.substring(0, i - 1);
- char c1 = word.charAt(i - 1);
- char c2 = word.charAt(i);
- String tail = i + 1 < word.length() ? word.substring(i + 1) : "";
+ String head, middle, transposed, tail;
+ // the word is split into [0..i-1] [i-1..i+1] [i+1..n]
+ head = word.substring(0, i - 1);
+ middle = word.substring(i - 1, i + 1);
+ tail = word.substring(i + 1);
+
+ transposed = "" + middle.charAt(1) + middle.charAt(0);
// transposition of two adjacent letters
- ListOfWords.add(head + c2 + c1 + tail);
+ // [p|e]n -> [ep]n, p[e|n] -> p[ne].
+ push.call(head + transposed + tail, middle, transposed);
}
return cr.inVocabulary(ListOfWords);
diff --git a/spellchecker/src/TriFunction.java b/spellchecker/src/TriFunction.java
new file mode 100644
index 0000000..327e670
--- /dev/null
+++ b/spellchecker/src/TriFunction.java
@@ -0,0 +1,13 @@
+
+/**
+ * An interface for a lambda function accepting three different parameters.
+ *
+ * @author Peter Wu
+ * @param <A>
+ * @param <B>
+ * @param <C>
+ */
+interface TriFunction<A, B, C> {
+
+ public void call(A a, B b, C c);
+}
diff --git a/spellchecker/test/SpellCorrectorTest.java b/spellchecker/test/SpellCorrectorTest.java
index 1438b04..a00e236 100644
--- a/spellchecker/test/SpellCorrectorTest.java
+++ b/spellchecker/test/SpellCorrectorTest.java
@@ -116,6 +116,11 @@ public class SpellCorrectorTest {
}
@Override
+ public boolean inVocabulary(String word) {
+ return true;
+ }
+
+ @Override
public HashSet<String> inVocabulary(Set<String> set) {
return new HashSet<>(set);
}