From cc9d613274b9cb7945cbcad2874c3a106728804e Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Thu, 2 Apr 2015 02:21:05 +0200 Subject: Calculate probability for unigram Find L * P(word) instead of L * #word. --- spellchecker/src/SpellCorrector.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spellchecker/src/SpellCorrector.java b/spellchecker/src/SpellCorrector.java index ab41c3d..3202949 100644 --- a/spellchecker/src/SpellCorrector.java +++ b/spellchecker/src/SpellCorrector.java @@ -234,7 +234,7 @@ public class SpellCorrector { // Now obtain n-gram probabilities. Use interpolation to combine // unigrams and bigrams. - p = LAMBDAS[0] * cr.getSmoothedCount(word); + p = LAMBDAS[0] * cr.getSmoothedCount(word) / cr.getUnigramCount(); // Add probability of bi-grams. // For words u and w, P(w|u) = P(u, w) / P(u). -- cgit v1.2.1