Split lambda p

author: Peter Wu <peter@lekensteyn.nl> 2015-04-02 11:19:53 +0200
committer: Peter Wu <peter@lekensteyn.nl> 2015-04-02 11:19:53 +0200
commit: be34f8b98aa4942953deb383ea61156ee1bf20b1 (patch)
tree: eeb489a16f60ec7f35147d2d19ec381fb946855f
parent: 2957a918e369a32e51ea9c8b7b06063b5d7c09b6 (diff)
download: assignment4-be34f8b98aa4942953deb383ea61156ee1bf20b1.tar.gz
1 files changed, 8 insertions, 6 deletions
diff --git a/spellchecker/src/SpellCorrector.java b/spellchecker/src/SpellCorrector.java
index 9028fdc..475e889 100644
--- a/spellchecker/src/SpellCorrector.java
+++ b/spellchecker/src/SpellCorrector.java
@@ -222,7 +222,7 @@ public class SpellCorrector {
         public double getWordLikelihood(int index, String word,
                 double channel_probability) {
             String prev_word, ngram;
-            double prior, score, p;
+            double prior, score, p, p_uni, p_bi;
             // a suggested word not in the vocabulary is certainly wrong,
             // changed (or consequentive) words should also not be changed.
             if (!cr.inVocabulary(word) || words_readonly[index]) {
@@ -237,21 +237,23 @@ public class SpellCorrector {
             prior = (cr.getNGramCount(word) + .5) / cr.getUnigramCount();
             score = prior * channel_probability;
 
-            // Now obtain n-gram probabilities. Use interpolation to combine
-            // unigrams and bigrams.
             // unigram probability is computed by P(w) = #w / N (no smoothing).
-            p = LAMBDAS[0] * cr.getSmoothedCount(word) / cr.getUnigramCount();
+            p_uni = cr.getSmoothedCount(word) / cr.getUnigramCount();
 
             // Add probability of bi-grams.
             // For words u and w, P(w|u) = P(u, w) / P(u).
             if (index > 0) {
                 prev_word = words[index - 1];
                 ngram = prev_word + " " + word;
-                p += LAMBDAS[1] * cr.getSmoothedCount(ngram) / cr.getSmoothedCount(prev_word);
+                p_bi = cr.getSmoothedCount(ngram) / cr.getSmoothedCount(prev_word);
                 //System.err.println("W: " + word + " " + score + " " + ngram + " |" + words[index]);
+            } else {
+                p_bi = 0;
             }
 
-            // Combine the candidate score with the n-gram probabilities.
+            // Now obtain n-gram probabilities. Use interpolation to combine
+            // unigrams and bigrams.
+            p = LAMBDAS[0] * p_uni + LAMBDAS[1] * p_bi;
             p *= score;
             assert p > 0.0 : "failed probability for " + word;
             return p;
author	Peter Wu <peter@lekensteyn.nl>	2015-04-02 11:19:53 +0200
committer	Peter Wu <peter@lekensteyn.nl>	2015-04-02 11:19:53 +0200
commit	be34f8b98aa4942953deb383ea61156ee1bf20b1 (patch)
tree	eeb489a16f60ec7f35147d2d19ec381fb946855f
parent	2957a918e369a32e51ea9c8b7b06063b5d7c09b6 (diff)
download	assignment4-be34f8b98aa4942953deb383ea61156ee1bf20b1.tar.gz