diff options
Diffstat (limited to 'spellchecker/src/CorpusReader.java')
-rw-r--r-- | spellchecker/src/CorpusReader.java | 9 |
1 files changed, 9 insertions, 0 deletions
diff --git a/spellchecker/src/CorpusReader.java b/spellchecker/src/CorpusReader.java index 536a41b..f815cfd 100644 --- a/spellchecker/src/CorpusReader.java +++ b/spellchecker/src/CorpusReader.java @@ -15,6 +15,7 @@ public class CorpusReader { private HashMap<String, Integer> ngrams; private Set<String> vocabulary; + private int unigramCount = 0; public CorpusReader() throws IOException { readNGrams(); @@ -56,6 +57,10 @@ public class CorpusReader { try { count = Integer.parseInt(s1); ngrams.put(s2, count); + // unigram + if (s2.indexOf(' ') == -1) { + unigramCount += count; + } } catch (NumberFormatException nfe) { throw new NumberFormatException("NumberformatError: " + s1); } @@ -115,4 +120,8 @@ public class CorpusReader { return smoothedCount; } + + public int getUnigramCount() { + return unigramCount; + } } |