summaryrefslogtreecommitdiff
path: root/spellchecker
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2015-04-01 09:19:46 +0200
committerPeter Wu <peter@lekensteyn.nl>2015-04-01 09:19:46 +0200
commitc42186671c9808a7192504f5556c48718607f7d3 (patch)
tree8c22e97ee151f3c95bb8b2d529788643099905b4 /spellchecker
parentf01e7e5769d568293bf31b15d7314740a0f281da (diff)
downloadassignment4-c42186671c9808a7192504f5556c48718607f7d3.tar.gz
Count unigrams
Diffstat (limited to 'spellchecker')
-rw-r--r--spellchecker/src/CorpusReader.java9
1 files changed, 9 insertions, 0 deletions
diff --git a/spellchecker/src/CorpusReader.java b/spellchecker/src/CorpusReader.java
index 536a41b..f815cfd 100644
--- a/spellchecker/src/CorpusReader.java
+++ b/spellchecker/src/CorpusReader.java
@@ -15,6 +15,7 @@ public class CorpusReader {
private HashMap<String, Integer> ngrams;
private Set<String> vocabulary;
+ private int unigramCount = 0;
public CorpusReader() throws IOException {
readNGrams();
@@ -56,6 +57,10 @@ public class CorpusReader {
try {
count = Integer.parseInt(s1);
ngrams.put(s2, count);
+ // unigram
+ if (s2.indexOf(' ') == -1) {
+ unigramCount += count;
+ }
} catch (NumberFormatException nfe) {
throw new NumberFormatException("NumberformatError: " + s1);
}
@@ -115,4 +120,8 @@ public class CorpusReader {
return smoothedCount;
}
+
+ public int getUnigramCount() {
+ return unigramCount;
+ }
}