diff options
Diffstat (limited to 'spellchecker/src/CorpusReader.java')
-rw-r--r-- | spellchecker/src/CorpusReader.java | 21 |
1 files changed, 8 insertions, 13 deletions
diff --git a/spellchecker/src/CorpusReader.java b/spellchecker/src/CorpusReader.java index f9df1ca..8281210 100644 --- a/spellchecker/src/CorpusReader.java +++ b/spellchecker/src/CorpusReader.java @@ -115,19 +115,14 @@ public class CorpusReader { double smoothedCount = getNGramCount(NGram); - int n_words = NGram.split(" ").length + 1; - switch (n_words) { - case 1: // unigram - smoothedCount += 1.0; - break; - case 2: // bigram - smoothedCount += 2.0; - break; - case 3: // trigram - smoothedCount += 4.0; - break; - default: - throw new AssertionError("Unknown n-gram with n=" + n_words); + // The caller invokes func(bigram) / func(unigram) and expects a + // probability as result. + if (NGram.indexOf(' ') != -1) { + // bigram, must be the nominator + smoothedCount += 1; + } else { + // unigram, must be the denominator + smoothedCount += 1; } return smoothedCount; |