summaryrefslogtreecommitdiff
path: root/spellchecker/src/ConfusionMatrixReader.java
blob: 90a8cef6818906ebddb16700571b22f802b2e909 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.logging.Level;
import java.util.logging.Logger;

public class ConfusionMatrixReader {

    final static String DATAFILE_LOC = "confusion_matrix.txt";
    final private HashMap<String, Integer> confusionMatrix = new HashMap<>();
    final private HashMap<String, Integer> countMatrix = new HashMap<>();

    public ConfusionMatrixReader() {
        try {
            readConfusionMatrix();
        } catch (Exception ex) {
            Logger.getLogger(ConfusionMatrixReader.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    private void readConfusionMatrix()
            throws FileNotFoundException, IOException {
        FileInputStream fis;
        fis = new FileInputStream(DATAFILE_LOC);
        BufferedReader in = new BufferedReader(new InputStreamReader(fis));

        while (in.ready()) {
            String line = in.readLine();
            int space = line.lastIndexOf(' ');
            String keys = line.substring(0, space);
            try {
                int count = Integer.parseInt(line.substring(space + 1));
                confusionMatrix.put(keys, count);

                String key = keys.substring(0, keys.indexOf('|'));
                Integer value = countMatrix.get(key);
                if (value == null) {
                    value = 0;
                }
                countMatrix.put(key, value + count);
            } catch (NumberFormatException e) {
                System.err.println("problems with string <" + line + ">");
            }
        }
    }

    /**
     * Returns the count for the pair <error>|<correct> in the confusion matrix,
     * e.g. "c|ct" is 36
     *
     * @param error
     * @param correct
     * @return
     */
    public int getConfusionCount(String error, String correct) {
        Integer count = confusionMatrix.get(error + "|" + correct);
        return count == null ? 0 : count;
    }

    /**
     * Given the error {@code error}, find the number of occurrences of the
     * error.
     *
     * @param error
     * @return
     */
    public int getErrorsCount(String error) {
        return countMatrix.getOrDefault(error, 0);
    }
}