summaryrefslogtreecommitdiff
path: root/spellchecker/src/SpellCorrector.java
diff options
context:
space:
mode:
Diffstat (limited to 'spellchecker/src/SpellCorrector.java')
-rw-r--r--spellchecker/src/SpellCorrector.java42
1 files changed, 37 insertions, 5 deletions
diff --git a/spellchecker/src/SpellCorrector.java b/spellchecker/src/SpellCorrector.java
index c8451bf..c0d7818 100644
--- a/spellchecker/src/SpellCorrector.java
+++ b/spellchecker/src/SpellCorrector.java
@@ -6,7 +6,7 @@ public class SpellCorrector {
final private CorpusReader cr;
final private ConfusionMatrixReader cmr;
- final char[] ALPHABET = "abcdefghijklmnopqrstuvwxyz'".toCharArray();
+ final static char[] ALPHABET = "abcdefghijklmnopqrstuvwxyz'".toCharArray();
public SpellCorrector(CorpusReader cr, ConfusionMatrixReader cmr) {
this.cr = cr;
@@ -35,12 +35,44 @@ public class SpellCorrector {
return 0.0;
}
+ /**
+ * Gets all candidate words, resulting from a single addition, deletion or
+ * substitution.
+ *
+ * @param word The word to find candidates for.
+ * @return A set of words with candidate words.
+ */
public HashSet<String> getCandidateWords(String word) {
- HashSet<String> ListOfWords = new HashSet<String>();
+ HashSet<String> ListOfWords = new HashSet<>();
- /**
- * CODE TO BE ADDED *
- */
+ // generate words by insertion of a character
+ for (int i = 0; i <= word.length(); i++) {
+ // the word is split into [0..i] [i..n]
+ // if i == word.length, then the last part is empty
+ String head = word.substring(0, i);
+ String tail = i < word.length() ? word.substring(i) : "";
+ for (char c : ALPHABET) {
+ // insertion of a single character
+ ListOfWords.add(head + c + tail);
+ }
+ }
+
+ for (int i = 0; i < word.length(); i++) {
+ // the word is split into [0..i] [i..i+1] [i+1..n]
+ // if i == word.length() - 1, then the tail is empty.
+ String head = word.substring(0, i);
+ String tail = word.substring(i + 1);
+
+ for (char c : ALPHABET) {
+ // substitution
+ ListOfWords.add(head + c + tail);
+ }
+
+ // deletion of a single character (prevent adding empty words)
+ if (word.length() > 1) {
+ ListOfWords.add(head + tail);
+ }
+ }
return cr.inVocabulary(ListOfWords);
}
}