From be34f8b98aa4942953deb383ea61156ee1bf20b1 Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Thu, 2 Apr 2015 11:19:53 +0200 Subject: Split lambda p --- spellchecker/src/SpellCorrector.java | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/spellchecker/src/SpellCorrector.java b/spellchecker/src/SpellCorrector.java index 9028fdc..475e889 100644 --- a/spellchecker/src/SpellCorrector.java +++ b/spellchecker/src/SpellCorrector.java @@ -222,7 +222,7 @@ public class SpellCorrector { public double getWordLikelihood(int index, String word, double channel_probability) { String prev_word, ngram; - double prior, score, p; + double prior, score, p, p_uni, p_bi; // a suggested word not in the vocabulary is certainly wrong, // changed (or consequentive) words should also not be changed. if (!cr.inVocabulary(word) || words_readonly[index]) { @@ -237,21 +237,23 @@ public class SpellCorrector { prior = (cr.getNGramCount(word) + .5) / cr.getUnigramCount(); score = prior * channel_probability; - // Now obtain n-gram probabilities. Use interpolation to combine - // unigrams and bigrams. // unigram probability is computed by P(w) = #w / N (no smoothing). - p = LAMBDAS[0] * cr.getSmoothedCount(word) / cr.getUnigramCount(); + p_uni = cr.getSmoothedCount(word) / cr.getUnigramCount(); // Add probability of bi-grams. // For words u and w, P(w|u) = P(u, w) / P(u). if (index > 0) { prev_word = words[index - 1]; ngram = prev_word + " " + word; - p += LAMBDAS[1] * cr.getSmoothedCount(ngram) / cr.getSmoothedCount(prev_word); + p_bi = cr.getSmoothedCount(ngram) / cr.getSmoothedCount(prev_word); //System.err.println("W: " + word + " " + score + " " + ngram + " |" + words[index]); + } else { + p_bi = 0; } - // Combine the candidate score with the n-gram probabilities. + // Now obtain n-gram probabilities. Use interpolation to combine + // unigrams and bigrams. + p = LAMBDAS[0] * p_uni + LAMBDAS[1] * p_bi; p *= score; assert p > 0.0 : "failed probability for " + word; return p; -- cgit v1.2.1