Debug and lambda tweaks

Damn, greedy does not work....
author: Peter Wu <peter@lekensteyn.nl> 2015-04-02 11:45:31 +0200
committer: Peter Wu <peter@lekensteyn.nl> 2015-04-02 11:45:31 +0200
commit: f7c1881a84d468f8377b2165964911561452fef1 (patch)
tree: 1d879f59465d51606011c881a756828f1676f65d /spellchecker
parent: 7b56b731f525087c25b0e479077edac3674d2274 (diff)
download: assignment4-f7c1881a84d468f8377b2165964911561452fef1.tar.gz
1 files changed, 6 insertions, 2 deletions
diff --git a/spellchecker/src/SpellCorrector.java b/spellchecker/src/SpellCorrector.java
index e4086aa..dd6e73a 100644
--- a/spellchecker/src/SpellCorrector.java
+++ b/spellchecker/src/SpellCorrector.java
@@ -13,7 +13,7 @@ public class SpellCorrector {
      * Lambda values for interpolation of n-gram probabilities. The first value
      * is for unigrams, the second for bigrams, etc.
      */
-    private final static double[] LAMBDAS = new double[]{.5, .5};
+    private final static double[] LAMBDAS = new double[]{.25, .75};
     /**
      * The language model probability for uncorrected words.
      */
@@ -247,13 +247,17 @@ public class SpellCorrector {
                 p_bi = cr.getSmoothedCount(ngram) / cr.getSmoothedCount(prev_word);
                 //System.err.println("W: " + word + " " + score + " " + ngram + " |" + words[index]);
             } else {
-                p_bi = 0;
+                // no previous word, assume likely.
+                p_bi = 1;
             }
 
             // Now obtain n-gram probabilities. Use interpolation to combine
             // unigrams and bigrams.
             p = LAMBDAS[0] * p_uni + LAMBDAS[1] * p_bi;
             p *= score;
+            if (DEBUG_SCORE && (word.equals("he") || word.equals("hme") || word.equals("home"))) {
+                System.err.println(word + " p=" + p + " score=" + score + " uni=" + p_uni + " bi=" + p_bi);
+            }
             assert p > 0.0 : "failed probability for " + word;
             return p;
         }
author	Peter Wu <peter@lekensteyn.nl>	2015-04-02 11:45:31 +0200
committer	Peter Wu <peter@lekensteyn.nl>	2015-04-02 11:45:31 +0200
commit	f7c1881a84d468f8377b2165964911561452fef1 (patch)
tree	1d879f59465d51606011c881a756828f1676f65d /spellchecker
parent	7b56b731f525087c25b0e479077edac3674d2274 (diff)
download	assignment4-f7c1881a84d468f8377b2165964911561452fef1.tar.gz