diff options
author | Peter Wu <peter@lekensteyn.nl> | 2015-04-01 02:54:48 +0200 |
---|---|---|
committer | Peter Wu <peter@lekensteyn.nl> | 2015-04-01 02:54:48 +0200 |
commit | 82eadc91f0f697103e965bd3600475df60e8282f (patch) | |
tree | 62826bc20e70f400d09572f3a2915d33e268dbc3 /spellchecker/src/SpellCorrector.java | |
parent | 61f7bd4f37dc85f49a1732af58f6bd42e556ad21 (diff) | |
download | assignment4-82eadc91f0f697103e965bd3600475df60e8282f.tar.gz |
Calculate noisy channel probability
Diffstat (limited to 'spellchecker/src/SpellCorrector.java')
-rw-r--r-- | spellchecker/src/SpellCorrector.java | 18 |
1 files changed, 18 insertions, 0 deletions
diff --git a/spellchecker/src/SpellCorrector.java b/spellchecker/src/SpellCorrector.java index 6c9225b..3fbd8cc 100644 --- a/spellchecker/src/SpellCorrector.java +++ b/spellchecker/src/SpellCorrector.java @@ -53,7 +53,25 @@ public class SpellCorrector { return; } + // Find the channel model probability (probability of the edit). + // P(x|w) = "corrections count given error" / "errors count" + double correctionCount, errorCount, p_channel; + correctionCount = (double) cmr.getConfusionCount(error, correct); + errorCount = cmr.getErrorsCount(error); + + // is this a known correction? + if (errorCount == 0.0) { + // no, + p_channel = 0.0; + } else { + // yes, + p_channel = correctionCount / errorCount; + } + + // Sum the probabilities as independent modifications can result in + // the same word ("acess" -> "access" by "a|ac", "e|ce"). double p = candidates.getOrDefault(word2, 0.0); + p += p_channel; candidates.put(word2, p); }; |