diff options
author | Peter Wu <peter@lekensteyn.nl> | 2015-04-02 11:45:31 +0200 |
---|---|---|
committer | Peter Wu <peter@lekensteyn.nl> | 2015-04-02 11:45:31 +0200 |
commit | f7c1881a84d468f8377b2165964911561452fef1 (patch) | |
tree | 1d879f59465d51606011c881a756828f1676f65d /spellchecker | |
parent | 7b56b731f525087c25b0e479077edac3674d2274 (diff) | |
download | assignment4-f7c1881a84d468f8377b2165964911561452fef1.tar.gz |
Debug and lambda tweaks
Damn, greedy does not work....
Diffstat (limited to 'spellchecker')
-rw-r--r-- | spellchecker/src/SpellCorrector.java | 8 |
1 files changed, 6 insertions, 2 deletions
diff --git a/spellchecker/src/SpellCorrector.java b/spellchecker/src/SpellCorrector.java index e4086aa..dd6e73a 100644 --- a/spellchecker/src/SpellCorrector.java +++ b/spellchecker/src/SpellCorrector.java @@ -13,7 +13,7 @@ public class SpellCorrector { * Lambda values for interpolation of n-gram probabilities. The first value * is for unigrams, the second for bigrams, etc. */ - private final static double[] LAMBDAS = new double[]{.5, .5}; + private final static double[] LAMBDAS = new double[]{.25, .75}; /** * The language model probability for uncorrected words. */ @@ -247,13 +247,17 @@ public class SpellCorrector { p_bi = cr.getSmoothedCount(ngram) / cr.getSmoothedCount(prev_word); //System.err.println("W: " + word + " " + score + " " + ngram + " |" + words[index]); } else { - p_bi = 0; + // no previous word, assume likely. + p_bi = 1; } // Now obtain n-gram probabilities. Use interpolation to combine // unigrams and bigrams. p = LAMBDAS[0] * p_uni + LAMBDAS[1] * p_bi; p *= score; + if (DEBUG_SCORE && (word.equals("he") || word.equals("hme") || word.equals("home"))) { + System.err.println(word + " p=" + p + " score=" + score + " uni=" + p_uni + " bi=" + p_bi); + } assert p > 0.0 : "failed probability for " + word; return p; } |