summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--spellchecker/src/CorpusReader.java9
1 files changed, 9 insertions, 0 deletions
diff --git a/spellchecker/src/CorpusReader.java b/spellchecker/src/CorpusReader.java
index 536a41b..f815cfd 100644
--- a/spellchecker/src/CorpusReader.java
+++ b/spellchecker/src/CorpusReader.java
@@ -15,6 +15,7 @@ public class CorpusReader {
private HashMap<String, Integer> ngrams;
private Set<String> vocabulary;
+ private int unigramCount = 0;
public CorpusReader() throws IOException {
readNGrams();
@@ -56,6 +57,10 @@ public class CorpusReader {
try {
count = Integer.parseInt(s1);
ngrams.put(s2, count);
+ // unigram
+ if (s2.indexOf(' ') == -1) {
+ unigramCount += count;
+ }
} catch (NumberFormatException nfe) {
throw new NumberFormatException("NumberformatError: " + s1);
}
@@ -115,4 +120,8 @@ public class CorpusReader {
return smoothedCount;
}
+
+ public int getUnigramCount() {
+ return unigramCount;
+ }
}