summaryrefslogtreecommitdiff
path: root/spellchecker/src/ConfusionMatrixReader.java
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2015-03-30 16:23:32 +0200
committerPeter Wu <peter@lekensteyn.nl>2015-03-30 16:23:32 +0200
commit8f57be0fbb66ef25254e949c577562d71912082a (patch)
treea5c3addc42899b4e8cc4530209e654ad3d812327 /spellchecker/src/ConfusionMatrixReader.java
parentd1e138822be7d2d393747eaac17f7aa5c265525a (diff)
downloadassignment4-8f57be0fbb66ef25254e949c577562d71912082a.tar.gz
Initial checkin of assignment4-0.0
Extracted from assignment4-0.0.zip (without build/ dirs). The CRLF in test-sentences.txt was replaced by LFs. https://www.win.tue.nl/~wstahw/edu/2ID90/assignments/4/assignment4-0.0.zip size 5189037 sha1 5e0a58acffbcac6e5f13837c98c2c0bb936304b5
Diffstat (limited to 'spellchecker/src/ConfusionMatrixReader.java')
-rw-r--r--spellchecker/src/ConfusionMatrixReader.java67
1 files changed, 67 insertions, 0 deletions
diff --git a/spellchecker/src/ConfusionMatrixReader.java b/spellchecker/src/ConfusionMatrixReader.java
new file mode 100644
index 0000000..b75da47
--- /dev/null
+++ b/spellchecker/src/ConfusionMatrixReader.java
@@ -0,0 +1,67 @@
+
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.HashMap;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+public class ConfusionMatrixReader {
+
+ final static String DATAFILE_LOC = "confusion_matrix.txt";
+ final private HashMap<String,Integer> confusionMatrix = new HashMap<>();
+ final private HashMap<String,Integer> countMatrix = new HashMap<>();
+ public ConfusionMatrixReader()
+ {
+ try {
+ readConfusionMatrix();
+ } catch (Exception ex) {
+ Logger.getLogger(ConfusionMatrixReader.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ }
+
+ private void readConfusionMatrix()
+ throws FileNotFoundException, IOException
+ {
+ FileInputStream fis;
+ fis = new FileInputStream(DATAFILE_LOC);
+ BufferedReader in = new BufferedReader(new InputStreamReader(fis));
+
+ while( in.ready() )
+ {
+ String line = in.readLine();
+ int space = line.lastIndexOf(' ');
+ String keys = line.substring(0,space);
+ try {
+ int count = Integer.parseInt(line.substring(space+1));
+ confusionMatrix.put(keys, count);
+
+ String key = keys.substring(0,keys.indexOf('|'));
+ Integer value = countMatrix.get(key);
+ if (value==null) {
+ value = 0;
+ }
+ countMatrix.put(key, value+count);
+ } catch(NumberFormatException e) {
+ System.err.println("problems with string <"+line+">");
+ }
+ }
+ }
+
+ /**
+ * Returns the count for the pair <error>|<correct> in the confusion
+ * matrix, e.g. "c|ct" is 36
+ *
+ * @param error
+ * @param correct
+ * @return
+ */
+ public int getConfusionCount(String error, String correct)
+ {
+ Integer count = confusionMatrix.get(error+"|"+correct);
+ return count==null?0:count;
+ }
+}