From 81a6cc4a67c8ac5aa601eaaef728f03245576cb9 Mon Sep 17 00:00:00 2001
From: Peter Wu <peter@lekensteyn.nl>
Date: Thu, 2 Apr 2015 18:05:12 +0200
Subject: Further attmept to improve score: drop .5 correction

---
 spellchecker/src/SpellCorrector.java | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/spellchecker/src/SpellCorrector.java b/spellchecker/src/SpellCorrector.java
index b5e383e..33b804f 100644
--- a/spellchecker/src/SpellCorrector.java
+++ b/spellchecker/src/SpellCorrector.java
@@ -280,7 +280,10 @@ public class SpellCorrector {
             // P(x|w) is given by language model (noisy channel probability).
             // Find prior P(w) = (freq(w) + .5) / N (N is number of words).
             // Then compute candidate score by P(w) * P(x|w)
-            prior = (cr.getNGramCount(word) + .5) / cr.getWordCount();
+            // Note: Kernighan uses + .5 here to compensate for unknown words,
+            // but that assigns too much value to the score so do not do that.
+            // (all words are in the vocubalary anyway).
+            prior = (cr.getNGramCount(word) + 0.0) / cr.getWordCount();
             score = prior * channel_probability;
 
             // compute unigrams
@@ -310,14 +313,14 @@ public class SpellCorrector {
                 }
             }
 
-            // finally add the score
+            // finally combine the score with ngram probabilities
+            //p = .1 * score + .9 * p;
+            p *= score;
             if (debug_word != null) {
-                System.err.println("# " + word + " p=" + (p * score)
+                System.err.println("# " + word + " p=" + p
                         + " score=" + score + " chan=" + channel_probability
-                        + " prior=" + prior
-                        + " ngram=" + p + debug_word);
+                        + " prior=" + prior + debug_word);
             }
-            p *= score;
             assert p > 0.0 : "failed probability for " + word;
             return p;
         }
-- 
cgit v1.2.1