diff options
author | Peter Wu <peter@lekensteyn.nl> | 2015-03-30 16:23:32 +0200 |
---|---|---|
committer | Peter Wu <peter@lekensteyn.nl> | 2015-03-30 16:23:32 +0200 |
commit | 8f57be0fbb66ef25254e949c577562d71912082a (patch) | |
tree | a5c3addc42899b4e8cc4530209e654ad3d812327 /spellchecker/src/ConfusionMatrixReader.java | |
parent | d1e138822be7d2d393747eaac17f7aa5c265525a (diff) | |
download | assignment4-8f57be0fbb66ef25254e949c577562d71912082a.tar.gz |
Initial checkin of assignment4-0.0
Extracted from assignment4-0.0.zip (without build/ dirs).
The CRLF in test-sentences.txt was replaced by LFs.
https://www.win.tue.nl/~wstahw/edu/2ID90/assignments/4/assignment4-0.0.zip
size 5189037
sha1 5e0a58acffbcac6e5f13837c98c2c0bb936304b5
Diffstat (limited to 'spellchecker/src/ConfusionMatrixReader.java')
-rw-r--r-- | spellchecker/src/ConfusionMatrixReader.java | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/spellchecker/src/ConfusionMatrixReader.java b/spellchecker/src/ConfusionMatrixReader.java new file mode 100644 index 0000000..b75da47 --- /dev/null +++ b/spellchecker/src/ConfusionMatrixReader.java @@ -0,0 +1,67 @@ + + +import java.io.BufferedReader; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.HashMap; +import java.util.logging.Level; +import java.util.logging.Logger; + +public class ConfusionMatrixReader { + + final static String DATAFILE_LOC = "confusion_matrix.txt"; + final private HashMap<String,Integer> confusionMatrix = new HashMap<>(); + final private HashMap<String,Integer> countMatrix = new HashMap<>(); + public ConfusionMatrixReader() + { + try { + readConfusionMatrix(); + } catch (Exception ex) { + Logger.getLogger(ConfusionMatrixReader.class.getName()).log(Level.SEVERE, null, ex); + } + } + + private void readConfusionMatrix() + throws FileNotFoundException, IOException + { + FileInputStream fis; + fis = new FileInputStream(DATAFILE_LOC); + BufferedReader in = new BufferedReader(new InputStreamReader(fis)); + + while( in.ready() ) + { + String line = in.readLine(); + int space = line.lastIndexOf(' '); + String keys = line.substring(0,space); + try { + int count = Integer.parseInt(line.substring(space+1)); + confusionMatrix.put(keys, count); + + String key = keys.substring(0,keys.indexOf('|')); + Integer value = countMatrix.get(key); + if (value==null) { + value = 0; + } + countMatrix.put(key, value+count); + } catch(NumberFormatException e) { + System.err.println("problems with string <"+line+">"); + } + } + } + + /** + * Returns the count for the pair <error>|<correct> in the confusion + * matrix, e.g. "c|ct" is 36 + * + * @param error + * @param correct + * @return + */ + public int getConfusionCount(String error, String correct) + { + Integer count = confusionMatrix.get(error+"|"+correct); + return count==null?0:count; + } +} |