summaryrefslogtreecommitdiff
path: root/spellchecker/src/CorpusReader.java
diff options
context:
space:
mode:
Diffstat (limited to 'spellchecker/src/CorpusReader.java')
-rw-r--r--spellchecker/src/CorpusReader.java9
1 files changed, 6 insertions, 3 deletions
diff --git a/spellchecker/src/CorpusReader.java b/spellchecker/src/CorpusReader.java
index 2ad7e85..2e0a855 100644
--- a/spellchecker/src/CorpusReader.java
+++ b/spellchecker/src/CorpusReader.java
@@ -137,7 +137,8 @@ public class CorpusReader {
* @return
*/
public double getNgramProbability(String word, String ngram) {
- double a, b;
+ double a, b, p;
+
// special case: unigram has no prior ngram
if (ngram.isEmpty()) {
a = getNGramCount(word);
@@ -146,7 +147,7 @@ public class CorpusReader {
// apply add-1 smoothing under the assumption that there are many
// unigrams and this does not significantly affect the chance,
// it just ensures that it is non-zero.
- return (a + 1) / (b + 1);
+ p = (a + 1) / (b + 1);
} else {
// other ngram cases
a = getNGramCount(ngram + " " + word);
@@ -155,8 +156,10 @@ public class CorpusReader {
// apply smoothing, but add a smaller number because "b" is
// typically very small.
// TODO: Kneser-Ney smoothing?
- return (a + .001) / (b + 1);
+ p = (a + .001) / (b + 1);
}
+
+ return p;
}
/**