diff options
author | Peter Wu <peter@lekensteyn.nl> | 2015-04-01 09:19:46 +0200 |
---|---|---|
committer | Peter Wu <peter@lekensteyn.nl> | 2015-04-01 09:19:46 +0200 |
commit | c42186671c9808a7192504f5556c48718607f7d3 (patch) | |
tree | 8c22e97ee151f3c95bb8b2d529788643099905b4 /spellchecker | |
parent | f01e7e5769d568293bf31b15d7314740a0f281da (diff) | |
download | assignment4-c42186671c9808a7192504f5556c48718607f7d3.tar.gz |
Count unigrams
Diffstat (limited to 'spellchecker')
-rw-r--r-- | spellchecker/src/CorpusReader.java | 9 |
1 files changed, 9 insertions, 0 deletions
diff --git a/spellchecker/src/CorpusReader.java b/spellchecker/src/CorpusReader.java index 536a41b..f815cfd 100644 --- a/spellchecker/src/CorpusReader.java +++ b/spellchecker/src/CorpusReader.java @@ -15,6 +15,7 @@ public class CorpusReader { private HashMap<String, Integer> ngrams; private Set<String> vocabulary; + private int unigramCount = 0; public CorpusReader() throws IOException { readNGrams(); @@ -56,6 +57,10 @@ public class CorpusReader { try { count = Integer.parseInt(s1); ngrams.put(s2, count); + // unigram + if (s2.indexOf(' ') == -1) { + unigramCount += count; + } } catch (NumberFormatException nfe) { throw new NumberFormatException("NumberformatError: " + s1); } @@ -115,4 +120,8 @@ public class CorpusReader { return smoothedCount; } + + public int getUnigramCount() { + return unigramCount; + } } |