summaryrefslogtreecommitdiff
path: root/spellchecker
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2015-04-01 05:13:41 +0200
committerPeter Wu <peter@lekensteyn.nl>2015-04-01 05:13:41 +0200
commitadb460408017c6344ee0f836aa56ce25143d9d6b (patch)
treef4259fe58a7c5a1c2ba095765358eaeb5a6ea096 /spellchecker
parent94838f67f689bf2123631555f13b5b2287f07118 (diff)
downloadassignment4-adb460408017c6344ee0f836aa56ce25143d9d6b.tar.gz
Initial attempt at rating sentences
Diffstat (limited to 'spellchecker')
-rw-r--r--spellchecker/src/SpellCorrector.java151
1 files changed, 145 insertions, 6 deletions
diff --git a/spellchecker/src/SpellCorrector.java b/spellchecker/src/SpellCorrector.java
index 92f7479..179206f 100644
--- a/spellchecker/src/SpellCorrector.java
+++ b/spellchecker/src/SpellCorrector.java
@@ -20,12 +20,32 @@ public class SpellCorrector {
}
String[] words = phrase.split(" ");
- String finalSuggestion = "";
- /**
- * CODE TO BE ADDED *
- */
- return finalSuggestion.trim();
+ SentenceRater rater = new SentenceRater(words);
+
+ // Try to change 1 word and see whether the suggestions are OK.
+ for (int attempt = 0; attempt < 3; attempt++) {
+ for (int i = 0; i < words.length; i++) {
+ final int word_index = i;
+ String old_word = words[word_index];
+ // try to find a better suggestion for this word.
+ Map<String, Double> candidates = getCandidateWords(old_word);
+ candidates.forEach((word, channel_probability) -> {
+ rater.tryWord(word_index, word, channel_probability);
+ });
+ };
+ if (rater.hasBetterSuggestion()) {
+ rater.saveSuggestion();
+ // TODO: make this nicer
+ words = rater.getBestSentence();
+ System.err.println("Got new sentence: " + String.join(" ", words));
+ } else {
+ System.err.println("No suggestion found.");
+ break;
+ }
+ }
+
+ return String.join(" ", words);
}
/**
@@ -54,7 +74,7 @@ public class SpellCorrector {
// is this a known correction?
if (errorCount == 0.0) {
// no,
- p_channel = 0.0;
+ p_channel = 0.1;
} else {
// yes,
p_channel = correctionCount / errorCount;
@@ -162,4 +182,123 @@ public class SpellCorrector {
push.call(head + transposed + tail, middle, transposed);
}
}
+
+ private class SentenceRater {
+
+ private final String[] words;
+ private final double[] word_likelihoods;
+ /**
+ * Words that cannot be modified in further iterations.
+ */
+ private final boolean[] words_locked;
+
+ private double best_likelihood = Double.MIN_VALUE;
+ private WordModification best_modification;
+
+ public SentenceRater(String[] words) {
+ this.words = words.clone();
+ this.word_likelihoods = new double[words.length];
+ this.words_locked = new boolean[words.length];
+ for (int i = 0; i < words.length; i++) {
+ word_likelihoods[i] = getWordProbability(i, words[i], .5);
+ }
+ }
+
+ /**
+ * Calculates the probability that the word {@code word} is valid at
+ * position {@code index}.
+ */
+ public double getWordProbability(int index, String word,
+ double channel_probability) {
+ double probability;
+ // a word not in the vocabulary is certainly wrong,
+ // changed (or consequentive) words should also not be changed.
+ if (!cr.inVocabulary(word) || words_locked[index]) {
+ return 0.0;
+ }
+
+ // Calculate probability of unigram.
+ double p = cr.getSmoothedCount(words[index]);
+ p *= channel_probability;
+ // Add probability of bi-grams.
+ if (index > 0) {
+ // TODO bigrams
+ }
+ probability = p;
+
+ if (words[index].equals(word)) {
+ // TODO: tune and magic number
+ probability *= .80;
+ } else {
+ probability *= channel_probability;
+ }
+ // TODO: what if this is 0?
+ return probability;
+ }
+
+ /**
+ * Tries to modify word at position {@code index} to word and calculates
+ * the likelihood of the word. The best single-word modification is
+ * remembered (the sentence itself will not be modified).
+ */
+ public void tryWord(int index, String word, double channel_probability) {
+ double likelihood;
+
+ // try the modification, calculate the result and restore.
+ likelihood = getWordProbability(index, word, channel_probability);
+
+ if (likelihood > best_likelihood) {
+ best_likelihood = likelihood;
+ best_modification = new WordModification(index, word);
+ }
+ }
+
+ /**
+ * Returns the sentence, possibly changed.
+ */
+ public String[] getBestSentence() {
+ String[] new_words = words.clone();
+ if (best_modification != null) {
+ new_words[best_modification.index] = best_modification.word;
+ }
+ return new_words;
+ }
+
+ /**
+ * Returns true if it is likely that a word in the sentence can be
+ * corrected.
+ */
+ public boolean hasBetterSuggestion() {
+ return best_modification != null;
+ }
+
+ /**
+ * Save the best suggestion so far, ensuring that future suggestions
+ * won't overwrite this one.
+ */
+ public void saveSuggestion() {
+ int index = best_modification.index;
+ words[index] = best_modification.word;
+ if (index > 0) {
+ words_locked[index - 1] = true;
+ }
+ words_locked[index] = true;
+ if (index + 1 < words.length) {
+ words_locked[index + 1] = true;
+ }
+ best_modification = null;
+ // TODO: save likelihood
+ }
+
+ private class WordModification {
+
+ private final int index;
+ private final String word;
+
+ public WordModification(int index, String word) {
+ this.index = index;
+ this.word = word;
+ }
+ }
+ }
}