summaryrefslogtreecommitdiff
path: root/spellchecker
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2015-04-02 11:45:31 +0200
committerPeter Wu <peter@lekensteyn.nl>2015-04-02 11:45:31 +0200
commitf7c1881a84d468f8377b2165964911561452fef1 (patch)
tree1d879f59465d51606011c881a756828f1676f65d /spellchecker
parent7b56b731f525087c25b0e479077edac3674d2274 (diff)
downloadassignment4-f7c1881a84d468f8377b2165964911561452fef1.tar.gz
Debug and lambda tweaks
Damn, greedy does not work....
Diffstat (limited to 'spellchecker')
-rw-r--r--spellchecker/src/SpellCorrector.java8
1 files changed, 6 insertions, 2 deletions
diff --git a/spellchecker/src/SpellCorrector.java b/spellchecker/src/SpellCorrector.java
index e4086aa..dd6e73a 100644
--- a/spellchecker/src/SpellCorrector.java
+++ b/spellchecker/src/SpellCorrector.java
@@ -13,7 +13,7 @@ public class SpellCorrector {
* Lambda values for interpolation of n-gram probabilities. The first value
* is for unigrams, the second for bigrams, etc.
*/
- private final static double[] LAMBDAS = new double[]{.5, .5};
+ private final static double[] LAMBDAS = new double[]{.25, .75};
/**
* The language model probability for uncorrected words.
*/
@@ -247,13 +247,17 @@ public class SpellCorrector {
p_bi = cr.getSmoothedCount(ngram) / cr.getSmoothedCount(prev_word);
//System.err.println("W: " + word + " " + score + " " + ngram + " |" + words[index]);
} else {
- p_bi = 0;
+ // no previous word, assume likely.
+ p_bi = 1;
}
// Now obtain n-gram probabilities. Use interpolation to combine
// unigrams and bigrams.
p = LAMBDAS[0] * p_uni + LAMBDAS[1] * p_bi;
p *= score;
+ if (DEBUG_SCORE && (word.equals("he") || word.equals("hme") || word.equals("home"))) {
+ System.err.println(word + " p=" + p + " score=" + score + " uni=" + p_uni + " bi=" + p_bi);
+ }
assert p > 0.0 : "failed probability for " + word;
return p;
}