summaryrefslogtreecommitdiff
path: root/spellchecker/test/SpellCorrectorTest.java
blob: d65891467baa04f3c4d3c6b3688fc036382ec8f1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130

import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.junit.Test;
import static org.junit.Assert.*;
import org.junit.Before;

/**
 * @author Peter Wu
 */
public class SpellCorrectorTest {

    private CorpusReader fullCR;
    private ConfusionMatrixReader cmr;

    @Before
    public void setUp() throws IOException {
        fullCR = new MockCorpusReader();
        cmr = new ConfusionMatrixReader();
    }

    private void checkGetCandidateWords(CorpusReader cr, String word,
            Set<String> expResult)
            throws IOException {
        System.out.println("getCandidateWords(" + word + ")");
        SpellCorrector instance = new SpellCorrector(cr, cmr);
        Set<String> result = instance.getCandidateWords(word).keySet();
        // quick check: are the results of the same size?
        assertEquals(expResult.size(), result.size());
        // verbose test: are all letters as expected?
        assertEquals(expResult, result);
    }

    @Test
    public void testGetCandidateWords0() throws IOException {
        Set<String> words = new HashSet<>();

        // test for empty word (only a letter can be inserted)
        for (char c : SpellCorrector.ALPHABET) {
            // insertion
            words.add("" + c);
        }
        checkGetCandidateWords(fullCR, "", words);
    }

    @Test
    public void testGetCandidateWords1() throws IOException {
        Set<String> words = new HashSet<>();

        // test for a single letter word
        for (char c : SpellCorrector.ALPHABET) {
            // insertion before and after
            words.add(c + "a");
            words.add("a" + c);
            // substitution should replace the letter.
            words.add("" + c);
        }
        // deletion should not yield a string
        //words.add("");
        checkGetCandidateWords(fullCR, "a", words);
    }

    @Test
    public void testGetCandidateWords2() throws IOException {
        Set<String> words = new HashSet<>();
        // test for a two letter word
        for (char c : SpellCorrector.ALPHABET) {
            // insertion before and after
            words.add(c + "up");
            words.add("u" + c + "p");
            words.add("up" + c);
            // substitution should replace the letter.
            words.add(c + "p");
            words.add("u" + c);
        }
        // deletion
        words.add("p");
        words.add("u");
        // transposition
        words.add("pu");
        checkGetCandidateWords(fullCR, "up", words);
    }

    @Test
    public void testGetCandidateWords3() throws IOException {
        Set<String> words = new HashSet<>();
        // test for a three letter word
        for (char c : SpellCorrector.ALPHABET) {
            // insertion before and after
            words.add(c + "ups");
            words.add("u" + c + "ps");
            words.add("up" + c + "s");
            words.add("ups" + c);
            // substitution should replace the letter.
            words.add(c + "ps");
            words.add("u" + c + "s");
            words.add("up" + c);
        }
        // deletion
        words.add("ps");
        words.add("us");
        words.add("up");
        // transposition
        words.add("pus");
        words.add("usp");
        checkGetCandidateWords(fullCR, "ups", words);
    }

    /**
     * Fake CorpusReader which tests whether getCandidateWords can produce all
     * words.
     */
    private class MockCorpusReader extends CorpusReader {

        public MockCorpusReader() throws IOException {
            super();
        }

        @Override
        public boolean inVocabulary(String word) {
            return true;
        }

        @Override
        public HashSet<String> inVocabulary(Set<String> set) {
            return new HashSet<>(set);
        }
    }
}