Improve ngram score debugging details

author: Peter Wu <peter@lekensteyn.nl> 2015-04-02 17:54:39 +0200
committer: Peter Wu <peter@lekensteyn.nl> 2015-04-02 17:54:39 +0200
commit: dee51f2ec8cdb7d5a61139455219b20e6604f4c2 (patch)
tree: ad37788fc7ea0b3eb88f93af9fa0fd90614e9ea5
parent: d303a5bde5002d2099958bcac6838b5bc463a623 (diff)
download: assignment4-dee51f2ec8cdb7d5a61139455219b20e6604f4c2.tar.gz
1 files changed, 25 insertions, 6 deletions
diff --git a/spellchecker/src/SpellCorrector.java b/spellchecker/src/SpellCorrector.java
index f5f5472..b5e383e 100644
--- a/spellchecker/src/SpellCorrector.java
+++ b/spellchecker/src/SpellCorrector.java
@@ -261,7 +261,7 @@ public class SpellCorrector {
          */
         public double getWordLikelihood(int index, String word,
                 double channel_probability) {
-            double prior, score;
+            double prior, score, p, igram_p;
             // a suggested word not in the vocabulary is certainly wrong,
             // changed (or consequentive) words should also not be changed.
             if (!cr.inVocabulary(word) || words_readonly[index]) {
@@ -269,6 +269,13 @@ public class SpellCorrector {
             }
 
             assert channel_probability > 0.0;
+            String debug_word = null;
+            if (DEBUG_SCORE
+                    && (word.equals("he")
+                    || word.equals("hme")
+                    || word.equals("home"))) {
+                debug_word = "";
+            }
 
             // P(x|w) is given by language model (noisy channel probability).
             // Find prior P(w) = (freq(w) + .5) / N (N is number of words).
@@ -277,7 +284,12 @@ public class SpellCorrector {
             score = prior * channel_probability;
 
             // compute unigrams
-            double p = LAMBDAS[0] * cr.getNgramProbability(word, "");
+            igram_p = cr.getNgramProbability(word, "");
+            p = LAMBDAS[0] * igram_p;
+            if (debug_word != null) {
+                debug_word += " 1p=" + igram_p;
+            }
+
             // compute bigrams, etc.
             String ngram = word;
             for (int i = 1; i < NGRAM_N; i++) {
@@ -287,16 +299,23 @@ public class SpellCorrector {
                     ngram += " " + words[index - i];
 
                     // Obtain n-gram probs and combine using interpolation.
-                    p += LAMBDAS[i] * cr.getNgramProbability(word, ngram);
+                    igram_p = cr.getNgramProbability(word, ngram);
                 } else {
                     // no metrics found, cannot deduce much information from it
-                    p += LAMBDAS[i] * .5;
+                    igram_p = .5;
+                }
+                p += LAMBDAS[i] * igram_p;
+                if (debug_word != null) {
+                    debug_word += " " + (i + 1) + "p=" + igram_p;
                 }
             }
 
             // finally add the score
-            if (DEBUG_SCORE && (word.equals("he") || word.equals("hme") || word.equals("home"))) {
-                System.err.println(word + " p=" + (p * score) + " score=" + score + " ngram=" + p);
+            if (debug_word != null) {
+                System.err.println("# " + word + " p=" + (p * score)
+                        + " score=" + score + " chan=" + channel_probability
+                        + " prior=" + prior
+                        + " ngram=" + p + debug_word);
             }
             p *= score;
             assert p > 0.0 : "failed probability for " + word;
author	Peter Wu <peter@lekensteyn.nl>	2015-04-02 17:54:39 +0200
committer	Peter Wu <peter@lekensteyn.nl>	2015-04-02 17:54:39 +0200
commit	dee51f2ec8cdb7d5a61139455219b20e6604f4c2 (patch)
tree	ad37788fc7ea0b3eb88f93af9fa0fd90614e9ea5
parent	d303a5bde5002d2099958bcac6838b5bc463a623 (diff)
download	assignment4-dee51f2ec8cdb7d5a61139455219b20e6604f4c2.tar.gz