summaryrefslogtreecommitdiff
path: root/spellchecker/src/CorpusReader.java
diff options
context:
space:
mode:
Diffstat (limited to 'spellchecker/src/CorpusReader.java')
-rw-r--r--spellchecker/src/CorpusReader.java21
1 files changed, 8 insertions, 13 deletions
diff --git a/spellchecker/src/CorpusReader.java b/spellchecker/src/CorpusReader.java
index f9df1ca..8281210 100644
--- a/spellchecker/src/CorpusReader.java
+++ b/spellchecker/src/CorpusReader.java
@@ -115,19 +115,14 @@ public class CorpusReader {
double smoothedCount = getNGramCount(NGram);
- int n_words = NGram.split(" ").length + 1;
- switch (n_words) {
- case 1: // unigram
- smoothedCount += 1.0;
- break;
- case 2: // bigram
- smoothedCount += 2.0;
- break;
- case 3: // trigram
- smoothedCount += 4.0;
- break;
- default:
- throw new AssertionError("Unknown n-gram with n=" + n_words);
+ // The caller invokes func(bigram) / func(unigram) and expects a
+ // probability as result.
+ if (NGram.indexOf(' ') != -1) {
+ // bigram, must be the nominator
+ smoothedCount += 1;
+ } else {
+ // unigram, must be the denominator
+ smoothedCount += 1;
}
return smoothedCount;