summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2015-04-03 20:02:16 +0200
committerPeter Wu <peter@lekensteyn.nl>2015-04-03 20:02:16 +0200
commit6e38d50ba49d7f9be72fcd0294890ba483133942 (patch)
treefda004bec0cc2d01210e94dbf741f3b63fde90f0
parent1db72a38d9d051ef3f72ebc983d50cf0e14c62fd (diff)
downloadassignment4-6e38d50ba49d7f9be72fcd0294890ba483133942.tar.gz
Consider changes to ngram probabilties in the context
Finally "hme locations" gets corrected to "home locations"!
-rw-r--r--spellchecker/src/SpellCorrector.java38
1 files changed, 32 insertions, 6 deletions
diff --git a/spellchecker/src/SpellCorrector.java b/spellchecker/src/SpellCorrector.java
index 53255fd..eee6cf1 100644
--- a/spellchecker/src/SpellCorrector.java
+++ b/spellchecker/src/SpellCorrector.java
@@ -246,7 +246,7 @@ public class SpellCorrector {
this.word_likelihoods = new double[words.length];
this.words_readonly = new boolean[words.length];
for (int i = 0; i < words.length; i++) {
- word_likelihoods[i] = getWordLikelihood(i, words[i],
+ word_likelihoods[i] = getWordLikelihood(i,
LM_PROBABILITY_UNMODIFIED);
}
sentence_probability = combineProbabilities(word_likelihoods);
@@ -268,9 +268,9 @@ public class SpellCorrector {
* Calculates the probability that the word {@code word} is valid at
* position {@code index}.
*/
- public double getWordLikelihood(int index, String word,
- double channel_probability) {
+ public double getWordLikelihood(int index, double channel_probability) {
double prior, score, p, igram_p;
+ String word = words[index];
// a suggested word not in the vocabulary is certainly wrong,
// changed (or consequentive) words should also not be changed.
if (!cr.inVocabulary(word) || words_readonly[index]) {
@@ -345,11 +345,38 @@ public class SpellCorrector {
*/
public void tryWord(int index, String word, double channel_probability) {
double score, p;
+ String old_word = words[index];
double[] scores;
+ int index_left, index_right;
+
+ // Simulate the change of changing this word.
+ words[index] = word;
+ // As changing the word itself can affect the ngram rating of
+ // the context, recalculate the probabilities for those too.
+ index_left = Math.max(0, index - NGRAM_N + 1);
+ index_right = Math.min(words.length, index + NGRAM_N - 1);
scores = word_likelihoods.clone();
- score = getWordLikelihood(index, word, channel_probability);
- scores[index] = score;
+
+ // calculate probabilities for each word that is possible affected
+ // by this change.
+ for (int i = 0; i < words.length; i++) {
+ if (i < index_left || i > index_right) {
+ // the probability is unchanged, ignore.
+ continue;
+ }
+ if (i == index) {
+ // the word that is being modified
+ score = getWordLikelihood(i, channel_probability);
+ } else {
+ // the word around the modified word
+ score = getWordLikelihood(i, LM_PROBABILITY_UNMODIFIED);
+ }
+ scores[i] = score;
+ }
+
+ // restore word
+ words[index] = old_word;
// group the effects of this modifications for tracking.
WordModification effect = new WordModification(index, word, scores);
@@ -357,7 +384,6 @@ public class SpellCorrector {
if ((best_modification != null
&& effect.probability > best_modification.probability)
|| effect.probability > sentence_probability) {
- System.err.println("found better word!" + word + " p=" + effect.probability);
best_modification = effect;
}
}