summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2015-04-02 11:19:53 +0200
committerPeter Wu <peter@lekensteyn.nl>2015-04-02 11:19:53 +0200
commitbe34f8b98aa4942953deb383ea61156ee1bf20b1 (patch)
treeeeb489a16f60ec7f35147d2d19ec381fb946855f
parent2957a918e369a32e51ea9c8b7b06063b5d7c09b6 (diff)
downloadassignment4-be34f8b98aa4942953deb383ea61156ee1bf20b1.tar.gz
Split lambda p
-rw-r--r--spellchecker/src/SpellCorrector.java14
1 files changed, 8 insertions, 6 deletions
diff --git a/spellchecker/src/SpellCorrector.java b/spellchecker/src/SpellCorrector.java
index 9028fdc..475e889 100644
--- a/spellchecker/src/SpellCorrector.java
+++ b/spellchecker/src/SpellCorrector.java
@@ -222,7 +222,7 @@ public class SpellCorrector {
public double getWordLikelihood(int index, String word,
double channel_probability) {
String prev_word, ngram;
- double prior, score, p;
+ double prior, score, p, p_uni, p_bi;
// a suggested word not in the vocabulary is certainly wrong,
// changed (or consequentive) words should also not be changed.
if (!cr.inVocabulary(word) || words_readonly[index]) {
@@ -237,21 +237,23 @@ public class SpellCorrector {
prior = (cr.getNGramCount(word) + .5) / cr.getUnigramCount();
score = prior * channel_probability;
- // Now obtain n-gram probabilities. Use interpolation to combine
- // unigrams and bigrams.
// unigram probability is computed by P(w) = #w / N (no smoothing).
- p = LAMBDAS[0] * cr.getSmoothedCount(word) / cr.getUnigramCount();
+ p_uni = cr.getSmoothedCount(word) / cr.getUnigramCount();
// Add probability of bi-grams.
// For words u and w, P(w|u) = P(u, w) / P(u).
if (index > 0) {
prev_word = words[index - 1];
ngram = prev_word + " " + word;
- p += LAMBDAS[1] * cr.getSmoothedCount(ngram) / cr.getSmoothedCount(prev_word);
+ p_bi = cr.getSmoothedCount(ngram) / cr.getSmoothedCount(prev_word);
//System.err.println("W: " + word + " " + score + " " + ngram + " |" + words[index]);
+ } else {
+ p_bi = 0;
}
- // Combine the candidate score with the n-gram probabilities.
+ // Now obtain n-gram probabilities. Use interpolation to combine
+ // unigrams and bigrams.
+ p = LAMBDAS[0] * p_uni + LAMBDAS[1] * p_bi;
p *= score;
assert p > 0.0 : "failed probability for " + word;
return p;