Fix ngram PREFIX condition, improve ngram prob debug

author: Peter Wu <peter@lekensteyn.nl> 2015-04-03 15:12:45 +0200
committer: Peter Wu <peter@lekensteyn.nl> 2015-04-03 15:12:45 +0200
commit: 6d0e2e299835eca671951120e72ad5898866839f (patch)
tree: b0e7068abcfa36dcbe41dbf25b836edeb6c39b3d
parent: 81a6cc4a67c8ac5aa601eaaef728f03245576cb9 (diff)
download: assignment4-6d0e2e299835eca671951120e72ad5898866839f.tar.gz
2 files changed, 12 insertions, 5 deletions
diff --git a/spellchecker/src/CorpusReader.java b/spellchecker/src/CorpusReader.java
index 2ad7e85..2e0a855 100644
--- a/spellchecker/src/CorpusReader.java
+++ b/spellchecker/src/CorpusReader.java
@@ -137,7 +137,8 @@ public class CorpusReader {
      * @return
      */
     public double getNgramProbability(String word, String ngram) {
-        double a, b;
+        double a, b, p;
+
         // special case: unigram has no prior ngram
         if (ngram.isEmpty()) {
             a = getNGramCount(word);
@@ -146,7 +147,7 @@ public class CorpusReader {
             // apply add-1 smoothing under the assumption that there are many
             // unigrams and this does not significantly affect the chance,
             // it just ensures that it is non-zero.
-            return (a + 1) / (b + 1);
+            p = (a + 1) / (b + 1);
         } else {
             // other ngram cases
             a = getNGramCount(ngram + " " + word);
@@ -155,8 +156,10 @@ public class CorpusReader {
             // apply smoothing, but add a smaller number because "b" is
             // typically very small.
             // TODO: Kneser-Ney smoothing?
-            return (a + .001) / (b + 1);
+            p = (a + .001) / (b + 1);
         }
+
+        return p;
     }
 
     /**
diff --git a/spellchecker/src/SpellCorrector.java b/spellchecker/src/SpellCorrector.java
index 33b804f..2bc788b 100644
--- a/spellchecker/src/SpellCorrector.java
+++ b/spellchecker/src/SpellCorrector.java
@@ -294,12 +294,16 @@ public class SpellCorrector {
             }
 
             // compute bigrams, etc.
-            String ngram = word;
+            String ngram = "";
             for (int i = 1; i < NGRAM_N; i++) {
                 // are there actually enough words to compute this metric?
                 if (index - i >= 0) {
                     // increase ngram prefix
-                    ngram += " " + words[index - i];
+                    if (ngram.isEmpty()) {
+                        ngram = words[index - i];
+                    } else {
+                        ngram = words[index - i] + " " + ngram;
+                    }
 
                     // Obtain n-gram probs and combine using interpolation.
                     igram_p = cr.getNgramProbability(word, ngram);
author	Peter Wu <peter@lekensteyn.nl>	2015-04-03 15:12:45 +0200
committer	Peter Wu <peter@lekensteyn.nl>	2015-04-03 15:12:45 +0200
commit	6d0e2e299835eca671951120e72ad5898866839f (patch)
tree	b0e7068abcfa36dcbe41dbf25b836edeb6c39b3d
parent	81a6cc4a67c8ac5aa601eaaef728f03245576cb9 (diff)
download	assignment4-6d0e2e299835eca671951120e72ad5898866839f.tar.gz