diff options
Diffstat (limited to 'spellchecker/src/CorpusReader.java')
-rw-r--r-- | spellchecker/src/CorpusReader.java | 9 |
1 files changed, 6 insertions, 3 deletions
diff --git a/spellchecker/src/CorpusReader.java b/spellchecker/src/CorpusReader.java index 2ad7e85..2e0a855 100644 --- a/spellchecker/src/CorpusReader.java +++ b/spellchecker/src/CorpusReader.java @@ -137,7 +137,8 @@ public class CorpusReader { * @return */ public double getNgramProbability(String word, String ngram) { - double a, b; + double a, b, p; + // special case: unigram has no prior ngram if (ngram.isEmpty()) { a = getNGramCount(word); @@ -146,7 +147,7 @@ public class CorpusReader { // apply add-1 smoothing under the assumption that there are many // unigrams and this does not significantly affect the chance, // it just ensures that it is non-zero. - return (a + 1) / (b + 1); + p = (a + 1) / (b + 1); } else { // other ngram cases a = getNGramCount(ngram + " " + word); @@ -155,8 +156,10 @@ public class CorpusReader { // apply smoothing, but add a smaller number because "b" is // typically very small. // TODO: Kneser-Ney smoothing? - return (a + .001) / (b + 1); + p = (a + .001) / (b + 1); } + + return p; } /** |