diff options
author | Peter Wu <peter@lekensteyn.nl> | 2015-04-02 11:19:53 +0200 |
---|---|---|
committer | Peter Wu <peter@lekensteyn.nl> | 2015-04-02 11:19:53 +0200 |
commit | be34f8b98aa4942953deb383ea61156ee1bf20b1 (patch) | |
tree | eeb489a16f60ec7f35147d2d19ec381fb946855f | |
parent | 2957a918e369a32e51ea9c8b7b06063b5d7c09b6 (diff) | |
download | assignment4-be34f8b98aa4942953deb383ea61156ee1bf20b1.tar.gz |
Split lambda p
-rw-r--r-- | spellchecker/src/SpellCorrector.java | 14 |
1 files changed, 8 insertions, 6 deletions
diff --git a/spellchecker/src/SpellCorrector.java b/spellchecker/src/SpellCorrector.java index 9028fdc..475e889 100644 --- a/spellchecker/src/SpellCorrector.java +++ b/spellchecker/src/SpellCorrector.java @@ -222,7 +222,7 @@ public class SpellCorrector { public double getWordLikelihood(int index, String word, double channel_probability) { String prev_word, ngram; - double prior, score, p; + double prior, score, p, p_uni, p_bi; // a suggested word not in the vocabulary is certainly wrong, // changed (or consequentive) words should also not be changed. if (!cr.inVocabulary(word) || words_readonly[index]) { @@ -237,21 +237,23 @@ public class SpellCorrector { prior = (cr.getNGramCount(word) + .5) / cr.getUnigramCount(); score = prior * channel_probability; - // Now obtain n-gram probabilities. Use interpolation to combine - // unigrams and bigrams. // unigram probability is computed by P(w) = #w / N (no smoothing). - p = LAMBDAS[0] * cr.getSmoothedCount(word) / cr.getUnigramCount(); + p_uni = cr.getSmoothedCount(word) / cr.getUnigramCount(); // Add probability of bi-grams. // For words u and w, P(w|u) = P(u, w) / P(u). if (index > 0) { prev_word = words[index - 1]; ngram = prev_word + " " + word; - p += LAMBDAS[1] * cr.getSmoothedCount(ngram) / cr.getSmoothedCount(prev_word); + p_bi = cr.getSmoothedCount(ngram) / cr.getSmoothedCount(prev_word); //System.err.println("W: " + word + " " + score + " " + ngram + " |" + words[index]); + } else { + p_bi = 0; } - // Combine the candidate score with the n-gram probabilities. + // Now obtain n-gram probabilities. Use interpolation to combine + // unigrams and bigrams. + p = LAMBDAS[0] * p_uni + LAMBDAS[1] * p_bi; p *= score; assert p > 0.0 : "failed probability for " + word; return p; |