diff options
-rw-r--r-- | spellchecker/src/CorpusReader.java | 20 |
1 files changed, 16 insertions, 4 deletions
diff --git a/spellchecker/src/CorpusReader.java b/spellchecker/src/CorpusReader.java index f815cfd..f9df1ca 100644 --- a/spellchecker/src/CorpusReader.java +++ b/spellchecker/src/CorpusReader.java @@ -113,10 +113,22 @@ public class CorpusReader { throw new IllegalArgumentException("NGram must be non-empty."); } - double smoothedCount = 0.0; - - // simplest smoothing implementation: plus 1. - smoothedCount = getNGramCount(NGram) + 1; + double smoothedCount = getNGramCount(NGram); + + int n_words = NGram.split(" ").length + 1; + switch (n_words) { + case 1: // unigram + smoothedCount += 1.0; + break; + case 2: // bigram + smoothedCount += 2.0; + break; + case 3: // trigram + smoothedCount += 4.0; + break; + default: + throw new AssertionError("Unknown n-gram with n=" + n_words); + } return smoothedCount; } |