From 6e38d50ba49d7f9be72fcd0294890ba483133942 Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Fri, 3 Apr 2015 20:02:16 +0200 Subject: Consider changes to ngram probabilties in the context Finally "hme locations" gets corrected to "home locations"! --- spellchecker/src/SpellCorrector.java | 38 ++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/spellchecker/src/SpellCorrector.java b/spellchecker/src/SpellCorrector.java index 53255fd..eee6cf1 100644 --- a/spellchecker/src/SpellCorrector.java +++ b/spellchecker/src/SpellCorrector.java @@ -246,7 +246,7 @@ public class SpellCorrector { this.word_likelihoods = new double[words.length]; this.words_readonly = new boolean[words.length]; for (int i = 0; i < words.length; i++) { - word_likelihoods[i] = getWordLikelihood(i, words[i], + word_likelihoods[i] = getWordLikelihood(i, LM_PROBABILITY_UNMODIFIED); } sentence_probability = combineProbabilities(word_likelihoods); @@ -268,9 +268,9 @@ public class SpellCorrector { * Calculates the probability that the word {@code word} is valid at * position {@code index}. */ - public double getWordLikelihood(int index, String word, - double channel_probability) { + public double getWordLikelihood(int index, double channel_probability) { double prior, score, p, igram_p; + String word = words[index]; // a suggested word not in the vocabulary is certainly wrong, // changed (or consequentive) words should also not be changed. if (!cr.inVocabulary(word) || words_readonly[index]) { @@ -345,11 +345,38 @@ public class SpellCorrector { */ public void tryWord(int index, String word, double channel_probability) { double score, p; + String old_word = words[index]; double[] scores; + int index_left, index_right; + + // Simulate the change of changing this word. + words[index] = word; + // As changing the word itself can affect the ngram rating of + // the context, recalculate the probabilities for those too. + index_left = Math.max(0, index - NGRAM_N + 1); + index_right = Math.min(words.length, index + NGRAM_N - 1); scores = word_likelihoods.clone(); - score = getWordLikelihood(index, word, channel_probability); - scores[index] = score; + + // calculate probabilities for each word that is possible affected + // by this change. + for (int i = 0; i < words.length; i++) { + if (i < index_left || i > index_right) { + // the probability is unchanged, ignore. + continue; + } + if (i == index) { + // the word that is being modified + score = getWordLikelihood(i, channel_probability); + } else { + // the word around the modified word + score = getWordLikelihood(i, LM_PROBABILITY_UNMODIFIED); + } + scores[i] = score; + } + + // restore word + words[index] = old_word; // group the effects of this modifications for tracking. WordModification effect = new WordModification(index, word, scores); @@ -357,7 +384,6 @@ public class SpellCorrector { if ((best_modification != null && effect.probability > best_modification.probability) || effect.probability > sentence_probability) { - System.err.println("found better word!" + word + " p=" + effect.probability); best_modification = effect; } } -- cgit v1.2.1