summaryrefslogtreecommitdiff
path: root/spellchecker
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2015-03-31 22:42:33 +0200
committerPeter Wu <peter@lekensteyn.nl>2015-03-31 22:42:33 +0200
commit925caaf80c022cd1ff82e640a71bb3c796de820b (patch)
treee92812b4899143e842b35dc3afe3ebf493b8dad9 /spellchecker
parentc3a9b5c241e89a6df6a87eb62b5ffea551ed4617 (diff)
downloadassignment4-925caaf80c022cd1ff82e640a71bb3c796de820b.tar.gz
Refactor to improve understandability
Split deletion and substitution, introduce consistent style (comment, example, implementation).
Diffstat (limited to 'spellchecker')
-rw-r--r--spellchecker/src/SpellCorrector.java44
1 files changed, 25 insertions, 19 deletions
diff --git a/spellchecker/src/SpellCorrector.java b/spellchecker/src/SpellCorrector.java
index 0456165..e1ee640 100644
--- a/spellchecker/src/SpellCorrector.java
+++ b/spellchecker/src/SpellCorrector.java
@@ -36,7 +36,7 @@ public class SpellCorrector {
}
/**
- * Gets all candidate words, resulting from a single addition, deletion.
+ * Gets all candidate words, resulting from a single insertion, deletion.
* substitution or transposition.
*
* @param word The (wrong) word to find (corrected) candidates for.
@@ -55,16 +55,14 @@ public class SpellCorrector {
ListOfWords.add(word2);
};
- // generate words by insertion of a character
+ // Generate words by insertion of a character
+ // |p]en -> [ap]en, [p|en -> [pi]en, p[e|n -> p[ei]n, pe[n| -> pe[nm].
for (int i = 0; i <= word.length(); i++) {
String head, head_last, tail;
// the word is split into [0..i] [i..n]
- // if i == word.length, then the last part is empty
head = word.substring(0, i);
- tail = i < word.length() ? word.substring(i) : "";
+ tail = word.substring(i);
- // |p]en -> [ap]en, [p|en -> [pi]en,
- // p[e|n -> p[ei]n, pe[n| -> pe[nm].
for (char c : ALPHABET) {
if (i == 0) {
// insert "c" before current letter (i = 0)
@@ -78,37 +76,47 @@ public class SpellCorrector {
}
}
- // by substitution or deletion of a character
+ // by substitution of a character
+ // |p]en -> [P]en, p|e]n -> p[E]n, pe|n] -> pe[N].
for (int i = 0; i < word.length(); i++) {
- String head, middle, error, tail;
+ String head, middle, tail;
// the word is split into [0..i] [i..i+1] [i+1..n]
- // if i == word.length() - 1, then the tail is empty.
head = word.substring(0, i);
middle = word.substring(i, i + 1);
tail = word.substring(i + 1);
- // |p]en -> [P]en, p|e]n -> p[E]n, pe|n] -> pe[N].
for (char c : ALPHABET) {
// substitution of "middle" with "c"
push.call(head + c + tail, middle, "" + c);
}
+ }
+ // by deletion of a character. Requires at least two characters in the
+ // word (an empty string is invalid).
+ // |pe]in -> [e]in, p|ei]n -> p[i]n, pe|in] -> pe[n], pe[i|n] -> pe[i].
+ if (word.length() > 1) {
+ for (int i = 0; i < word.length(); i++) {
+ String head, middle, error, tail;
+ // the word is split into [0..i] [i..i+1] [i+1..n]
+ head = word.substring(0, i);
+ middle = word.substring(i, i + 1);
+ tail = word.substring(i + 1);
- // deletion of a single character after the current cursor.
- // Requires at least two characters in the word.
- // |pe]n -> [e]n, p|en] -> p[n], p[e|n] -> p[e].
- if (word.length() > 1) {
if (i + 1 < word.length()) {
+ // Common case: deletion of the following character.
+ // p|ei]n -> p[i]n
error = word.substring(i, i + 2);
} else {
+ // Last case: operate on the previous and next characters.
+ // pe[i|n] -> pe[i]
error = word.substring(i - 1, i + 1);
}
push.call(head + tail, error, middle);
}
}
- // The Damerau-Levenshtein distance also includes transposition to
- // account for spelling mistakes. This loop transposes the previous
- // character with the current index.
+ // The Damerau-Levenshtein distance also includes transposition of two
+ // adjacent letters to account for spelling mistakes.
+ // [p|e]n -> [ep]n, p[e|n] -> p[ne].
for (int i = 1; i < word.length(); i++) {
String head, middle, transposed, tail;
// the word is split into [0..i-1] [i-1..i+1] [i+1..n]
@@ -118,8 +126,6 @@ public class SpellCorrector {
transposed = "" + middle.charAt(1) + middle.charAt(0);
- // transposition of two adjacent letters
- // [p|e]n -> [ep]n, p[e|n] -> p[ne].
push.call(head + transposed + tail, middle, transposed);
}