summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--spellchecker/src/CorpusReader.java20
1 files changed, 16 insertions, 4 deletions
diff --git a/spellchecker/src/CorpusReader.java b/spellchecker/src/CorpusReader.java
index f815cfd..f9df1ca 100644
--- a/spellchecker/src/CorpusReader.java
+++ b/spellchecker/src/CorpusReader.java
@@ -113,10 +113,22 @@ public class CorpusReader {
throw new IllegalArgumentException("NGram must be non-empty.");
}
- double smoothedCount = 0.0;
-
- // simplest smoothing implementation: plus 1.
- smoothedCount = getNGramCount(NGram) + 1;
+ double smoothedCount = getNGramCount(NGram);
+
+ int n_words = NGram.split(" ").length + 1;
+ switch (n_words) {
+ case 1: // unigram
+ smoothedCount += 1.0;
+ break;
+ case 2: // bigram
+ smoothedCount += 2.0;
+ break;
+ case 3: // trigram
+ smoothedCount += 4.0;
+ break;
+ default:
+ throw new AssertionError("Unknown n-gram with n=" + n_words);
+ }
return smoothedCount;
}