diff options
Diffstat (limited to 'spellchecker/src/SpellCorrector.java')
-rw-r--r-- | spellchecker/src/SpellCorrector.java | 42 |
1 files changed, 37 insertions, 5 deletions
diff --git a/spellchecker/src/SpellCorrector.java b/spellchecker/src/SpellCorrector.java index c8451bf..c0d7818 100644 --- a/spellchecker/src/SpellCorrector.java +++ b/spellchecker/src/SpellCorrector.java @@ -6,7 +6,7 @@ public class SpellCorrector { final private CorpusReader cr; final private ConfusionMatrixReader cmr; - final char[] ALPHABET = "abcdefghijklmnopqrstuvwxyz'".toCharArray(); + final static char[] ALPHABET = "abcdefghijklmnopqrstuvwxyz'".toCharArray(); public SpellCorrector(CorpusReader cr, ConfusionMatrixReader cmr) { this.cr = cr; @@ -35,12 +35,44 @@ public class SpellCorrector { return 0.0; } + /** + * Gets all candidate words, resulting from a single addition, deletion or + * substitution. + * + * @param word The word to find candidates for. + * @return A set of words with candidate words. + */ public HashSet<String> getCandidateWords(String word) { - HashSet<String> ListOfWords = new HashSet<String>(); + HashSet<String> ListOfWords = new HashSet<>(); - /** - * CODE TO BE ADDED * - */ + // generate words by insertion of a character + for (int i = 0; i <= word.length(); i++) { + // the word is split into [0..i] [i..n] + // if i == word.length, then the last part is empty + String head = word.substring(0, i); + String tail = i < word.length() ? word.substring(i) : ""; + for (char c : ALPHABET) { + // insertion of a single character + ListOfWords.add(head + c + tail); + } + } + + for (int i = 0; i < word.length(); i++) { + // the word is split into [0..i] [i..i+1] [i+1..n] + // if i == word.length() - 1, then the tail is empty. + String head = word.substring(0, i); + String tail = word.substring(i + 1); + + for (char c : ALPHABET) { + // substitution + ListOfWords.add(head + c + tail); + } + + // deletion of a single character (prevent adding empty words) + if (word.length() > 1) { + ListOfWords.add(head + tail); + } + } return cr.inVocabulary(ListOfWords); } } |