summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2015-04-02 10:30:03 +0200
committerPeter Wu <peter@lekensteyn.nl>2015-04-02 10:30:03 +0200
commit1d223946bd0f71e9bf905d4e5fd034a8aab9d56f (patch)
tree72f5b150ed3ecd552b344d3a08b86befc4db3850
parent148cdd22624c36997c331c1baf9fdccf5e40bcb4 (diff)
downloadassignment4-1d223946bd0f71e9bf905d4e5fd034a8aab9d56f.tar.gz
Consider the probability of the whole sentence
-rw-r--r--spellchecker/src/SpellCorrector.java55
1 files changed, 40 insertions, 15 deletions
diff --git a/spellchecker/src/SpellCorrector.java b/spellchecker/src/SpellCorrector.java
index 4978f4e..ef67622 100644
--- a/spellchecker/src/SpellCorrector.java
+++ b/spellchecker/src/SpellCorrector.java
@@ -48,7 +48,7 @@ public class SpellCorrector {
rater.saveSuggestion();
// TODO: make this nicer
words = rater.getBestSentence();
- System.err.println("Got new sentence: " + String.join(" ", words));
+ System.err.println("Suggest: " + String.join(" ", words));
} else {
System.err.println("No suggestion found.");
break;
@@ -197,7 +197,7 @@ public class SpellCorrector {
*/
private final boolean[] words_readonly;
- private double best_likelihood = Double.MIN_VALUE;
+ private double best_sentence_probability;
private WordModification best_modification;
public SentenceRater(String[] words) {
@@ -208,6 +208,9 @@ public class SpellCorrector {
word_likelihoods[i] = getWordLikelihood(i, words[i],
LM_PROBABILITY_UNMODIFIED);
}
+ // determine the rating of the current sentence without ignoring
+ // words.
+ best_sentence_probability = evaluateWord(-1);
}
/**
@@ -234,6 +237,7 @@ public class SpellCorrector {
// Now obtain n-gram probabilities. Use interpolation to combine
// unigrams and bigrams.
+ // unigram probability is computed by P(w) = #w / N (no smoothing).
p = LAMBDAS[0] * cr.getSmoothedCount(word) / cr.getUnigramCount();
// Add probability of bi-grams.
@@ -242,6 +246,7 @@ public class SpellCorrector {
prev_word = words[index - 1];
ngram = prev_word + " " + word;
p += LAMBDAS[1] * cr.getSmoothedCount(ngram) / cr.getSmoothedCount(prev_word);
+ //System.err.println("W: " + word + " " + score + " " + ngram + " |" + words[index]);
}
// Combine the candidate score with the n-gram probabilities.
@@ -256,19 +261,28 @@ public class SpellCorrector {
* remembered (the sentence itself will not be modified).
*/
public void tryWord(int index, String word, double channel_probability) {
- double likelihood;
+ double score, p;
- // try the modification, calculate the result and restore.
- likelihood = getWordLikelihood(index, word, channel_probability);
+ // find score and see how it affects the whole sentence.
+ score = getWordLikelihood(index, word, channel_probability);
+ p = score * evaluateWord(index);
- // look for the word which increases the likelihood the most
- // (that is, the difference of the old and new likelihood).
- likelihood -= word_likelihoods[index];
+ if (p > best_sentence_probability) {
+ best_sentence_probability = p;
+ best_modification = new WordModification(index, word, score);
+ }
+ }
- if (likelihood > best_likelihood) {
- best_likelihood = likelihood;
- best_modification = new WordModification(index, word);
+ private double evaluateWord(int ignoreIndex) {
+ double p = 1;
+ // calculate the probability of the combination of all word
+ // probabilities.
+ for (int i = 0; i < words.length; i++) {
+ if (ignoreIndex != i) {
+ p *= word_likelihoods[i];
+ }
}
+ return p;
}
/**
@@ -297,7 +311,17 @@ public class SpellCorrector {
public void saveSuggestion() {
int index = best_modification.index;
String word = best_modification.word;
+ double score = best_modification.score;
+
+ // save the word and its associated score
+ assert word_likelihoods[index] < score :
+ "The score should only get better for word " + word
+ + ". Change: " + word_likelihoods[index] + " -> " + score;
words[index] = word;
+ word_likelihoods[index] = score;
+
+ // if this was the best word, do not change it now. The context
+ // should also not change.
if (index > 0) {
words_readonly[index - 1] = true;
}
@@ -305,20 +329,21 @@ public class SpellCorrector {
if (index + 1 < words.length) {
words_readonly[index + 1] = true;
}
+
+ // change was applied, forget suggestion.
best_modification = null;
- best_likelihood = Double.MIN_VALUE;
- word_likelihoods[index] = getWordLikelihood(index, word,
- LM_PROBABILITY_UNMODIFIED);
}
private class WordModification {
private final int index;
private final String word;
+ private final double score;
- public WordModification(int index, String word) {
+ public WordModification(int index, String word, double score) {
this.index = index;
this.word = word;
+ this.score = score;
}
}
}