summaryrefslogtreecommitdiff
path: root/spellchecker/test/SpellCorrectorTest.java
blob: 1438b04de3da5f36d73487b2ee6fa17bd32d635a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123

import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.junit.Test;
import static org.junit.Assert.*;
import org.junit.Before;

/**
 * @author Peter Wu
 */
public class SpellCorrectorTest {

    private CorpusReader fullCR;

    @Before
    public void setUp() throws IOException {
        fullCR = new MockCorpusReader();
    }

    private void checkGetCandidateWords(CorpusReader cr, String word,
            Set<String> expResult)
            throws IOException {
        System.out.println("getCandidateWords(" + word + ")");
        SpellCorrector instance = new SpellCorrector(cr, null);
        HashSet<String> result = instance.getCandidateWords(word);
        // quick check: are the results of the same size?
        assertEquals(expResult.size(), result.size());
        // verbose test: are all letters as expected?
        assertEquals(expResult, result);
    }

    @Test
    public void testGetCandidateWords0() throws IOException {
        Set<String> words = new HashSet<>();

        // test for empty word (only a letter can be inserted)
        for (char c : SpellCorrector.ALPHABET) {
            // insertion
            words.add("" + c);
        }
        checkGetCandidateWords(fullCR, "", words);
    }

    @Test
    public void testGetCandidateWords1() throws IOException {
        Set<String> words = new HashSet<>();

        // test for a single letter word
        for (char c : SpellCorrector.ALPHABET) {
            // insertion before and after
            words.add(c + "a");
            words.add("a" + c);
            // substitution should replace the letter.
            words.add("" + c);
        }
        // deletion should not yield a string
        //words.add("");
        checkGetCandidateWords(fullCR, "a", words);
    }

    @Test
    public void testGetCandidateWords2() throws IOException {
        Set<String> words = new HashSet<>();
        // test for a two letter word
        for (char c : SpellCorrector.ALPHABET) {
            // insertion before and after
            words.add(c + "up");
            words.add("u" + c + "p");
            words.add("up" + c);
            // substitution should replace the letter.
            words.add(c + "p");
            words.add("u" + c);
        }
        // deletion
        words.add("p");
        words.add("u");
        // transposition
        words.add("pu");
        checkGetCandidateWords(fullCR, "up", words);
    }

    @Test
    public void testGetCandidateWords3() throws IOException {
        Set<String> words = new HashSet<>();
        // test for a three letter word
        for (char c : SpellCorrector.ALPHABET) {
            // insertion before and after
            words.add(c + "ups");
            words.add("u" + c + "ps");
            words.add("up" + c + "s");
            words.add("ups" + c);
            // substitution should replace the letter.
            words.add(c + "ps");
            words.add("u" + c + "s");
            words.add("up" + c);
        }
        // deletion
        words.add("ps");
        words.add("us");
        words.add("up");
        // transposition
        words.add("pus");
        words.add("usp");
        checkGetCandidateWords(fullCR, "ups", words);
    }

    /**
     * Fake CorpusReader which tests whether getCandidateWords can produce all
     * words.
     */
    private class MockCorpusReader extends CorpusReader {

        public MockCorpusReader() throws IOException {
            super();
        }

        @Override
        public HashSet<String> inVocabulary(Set<String> set) {
            return new HashSet<>(set);
        }
    }
}