From 925caaf80c022cd1ff82e640a71bb3c796de820b Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Tue, 31 Mar 2015 22:42:33 +0200 Subject: Refactor to improve understandability Split deletion and substitution, introduce consistent style (comment, example, implementation). --- spellchecker/src/SpellCorrector.java | 44 ++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 19 deletions(-) (limited to 'spellchecker') diff --git a/spellchecker/src/SpellCorrector.java b/spellchecker/src/SpellCorrector.java index 0456165..e1ee640 100644 --- a/spellchecker/src/SpellCorrector.java +++ b/spellchecker/src/SpellCorrector.java @@ -36,7 +36,7 @@ public class SpellCorrector { } /** - * Gets all candidate words, resulting from a single addition, deletion. + * Gets all candidate words, resulting from a single insertion, deletion. * substitution or transposition. * * @param word The (wrong) word to find (corrected) candidates for. @@ -55,16 +55,14 @@ public class SpellCorrector { ListOfWords.add(word2); }; - // generate words by insertion of a character + // Generate words by insertion of a character + // |p]en -> [ap]en, [p|en -> [pi]en, p[e|n -> p[ei]n, pe[n| -> pe[nm]. for (int i = 0; i <= word.length(); i++) { String head, head_last, tail; // the word is split into [0..i] [i..n] - // if i == word.length, then the last part is empty head = word.substring(0, i); - tail = i < word.length() ? word.substring(i) : ""; + tail = word.substring(i); - // |p]en -> [ap]en, [p|en -> [pi]en, - // p[e|n -> p[ei]n, pe[n| -> pe[nm]. for (char c : ALPHABET) { if (i == 0) { // insert "c" before current letter (i = 0) @@ -78,37 +76,47 @@ public class SpellCorrector { } } - // by substitution or deletion of a character + // by substitution of a character + // |p]en -> [P]en, p|e]n -> p[E]n, pe|n] -> pe[N]. for (int i = 0; i < word.length(); i++) { - String head, middle, error, tail; + String head, middle, tail; // the word is split into [0..i] [i..i+1] [i+1..n] - // if i == word.length() - 1, then the tail is empty. head = word.substring(0, i); middle = word.substring(i, i + 1); tail = word.substring(i + 1); - // |p]en -> [P]en, p|e]n -> p[E]n, pe|n] -> pe[N]. for (char c : ALPHABET) { // substitution of "middle" with "c" push.call(head + c + tail, middle, "" + c); } + } + // by deletion of a character. Requires at least two characters in the + // word (an empty string is invalid). + // |pe]in -> [e]in, p|ei]n -> p[i]n, pe|in] -> pe[n], pe[i|n] -> pe[i]. + if (word.length() > 1) { + for (int i = 0; i < word.length(); i++) { + String head, middle, error, tail; + // the word is split into [0..i] [i..i+1] [i+1..n] + head = word.substring(0, i); + middle = word.substring(i, i + 1); + tail = word.substring(i + 1); - // deletion of a single character after the current cursor. - // Requires at least two characters in the word. - // |pe]n -> [e]n, p|en] -> p[n], p[e|n] -> p[e]. - if (word.length() > 1) { if (i + 1 < word.length()) { + // Common case: deletion of the following character. + // p|ei]n -> p[i]n error = word.substring(i, i + 2); } else { + // Last case: operate on the previous and next characters. + // pe[i|n] -> pe[i] error = word.substring(i - 1, i + 1); } push.call(head + tail, error, middle); } } - // The Damerau-Levenshtein distance also includes transposition to - // account for spelling mistakes. This loop transposes the previous - // character with the current index. + // The Damerau-Levenshtein distance also includes transposition of two + // adjacent letters to account for spelling mistakes. + // [p|e]n -> [ep]n, p[e|n] -> p[ne]. for (int i = 1; i < word.length(); i++) { String head, middle, transposed, tail; // the word is split into [0..i-1] [i-1..i+1] [i+1..n] @@ -118,8 +126,6 @@ public class SpellCorrector { transposed = "" + middle.charAt(1) + middle.charAt(0); - // transposition of two adjacent letters - // [p|e]n -> [ep]n, p[e|n] -> p[ne]. push.call(head + transposed + tail, middle, transposed); } -- cgit v1.2.1