summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2015-04-01 09:20:19 +0200
committerPeter Wu <peter@lekensteyn.nl>2015-04-01 09:20:19 +0200
commit023487875139e3e8acd52298979b7dd903d70ed5 (patch)
tree5f89f0c26bbc17651226751a564f1ad8ad77b516
parent815478980f4a881d1efbbd6b7d2441ec47376c7d (diff)
downloadassignment4-023487875139e3e8acd52298979b7dd903d70ed5.tar.gz
Rate multi n-grams higher
-rw-r--r--spellchecker/src/CorpusReader.java20
1 files changed, 16 insertions, 4 deletions
diff --git a/spellchecker/src/CorpusReader.java b/spellchecker/src/CorpusReader.java
index f815cfd..f9df1ca 100644
--- a/spellchecker/src/CorpusReader.java
+++ b/spellchecker/src/CorpusReader.java
@@ -113,10 +113,22 @@ public class CorpusReader {
throw new IllegalArgumentException("NGram must be non-empty.");
}
- double smoothedCount = 0.0;
-
- // simplest smoothing implementation: plus 1.
- smoothedCount = getNGramCount(NGram) + 1;
+ double smoothedCount = getNGramCount(NGram);
+
+ int n_words = NGram.split(" ").length + 1;
+ switch (n_words) {
+ case 1: // unigram
+ smoothedCount += 1.0;
+ break;
+ case 2: // bigram
+ smoothedCount += 2.0;
+ break;
+ case 3: // trigram
+ smoothedCount += 4.0;
+ break;
+ default:
+ throw new AssertionError("Unknown n-gram with n=" + n_words);
+ }
return smoothedCount;
}