summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2015-04-01 02:54:48 +0200
committerPeter Wu <peter@lekensteyn.nl>2015-04-01 02:54:48 +0200
commit82eadc91f0f697103e965bd3600475df60e8282f (patch)
tree62826bc20e70f400d09572f3a2915d33e268dbc3
parent61f7bd4f37dc85f49a1732af58f6bd42e556ad21 (diff)
downloadassignment4-82eadc91f0f697103e965bd3600475df60e8282f.tar.gz
Calculate noisy channel probability
-rw-r--r--spellchecker/src/ConfusionMatrixReader.java11
-rw-r--r--spellchecker/src/SpellCorrector.java18
2 files changed, 29 insertions, 0 deletions
diff --git a/spellchecker/src/ConfusionMatrixReader.java b/spellchecker/src/ConfusionMatrixReader.java
index c9e79ab..90a8cef 100644
--- a/spellchecker/src/ConfusionMatrixReader.java
+++ b/spellchecker/src/ConfusionMatrixReader.java
@@ -60,4 +60,15 @@ public class ConfusionMatrixReader {
Integer count = confusionMatrix.get(error + "|" + correct);
return count == null ? 0 : count;
}
+
+ /**
+ * Given the error {@code error}, find the number of occurrences of the
+ * error.
+ *
+ * @param error
+ * @return
+ */
+ public int getErrorsCount(String error) {
+ return countMatrix.getOrDefault(error, 0);
+ }
}
diff --git a/spellchecker/src/SpellCorrector.java b/spellchecker/src/SpellCorrector.java
index 6c9225b..3fbd8cc 100644
--- a/spellchecker/src/SpellCorrector.java
+++ b/spellchecker/src/SpellCorrector.java
@@ -53,7 +53,25 @@ public class SpellCorrector {
return;
}
+ // Find the channel model probability (probability of the edit).
+ // P(x|w) = "corrections count given error" / "errors count"
+ double correctionCount, errorCount, p_channel;
+ correctionCount = (double) cmr.getConfusionCount(error, correct);
+ errorCount = cmr.getErrorsCount(error);
+
+ // is this a known correction?
+ if (errorCount == 0.0) {
+ // no,
+ p_channel = 0.0;
+ } else {
+ // yes,
+ p_channel = correctionCount / errorCount;
+ }
+
+ // Sum the probabilities as independent modifications can result in
+ // the same word ("acess" -> "access" by "a|ac", "e|ce").
double p = candidates.getOrDefault(word2, 0.0);
+ p += p_channel;
candidates.put(word2, p);
};