1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
|
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.junit.Test;
import static org.junit.Assert.*;
import org.junit.Before;
/**
* @author Peter Wu
*/
public class SpellCorrectorTest {
private CorpusReader fullCR;
@Before
public void setUp() throws IOException {
fullCR = new MockCorpusReader();
}
private void checkGetCandidateWords(CorpusReader cr, String word,
Set<String> expResult)
throws IOException {
System.out.println("getCandidateWords(" + word + ")");
SpellCorrector instance = new SpellCorrector(cr, null);
HashSet<String> result = instance.getCandidateWords(word);
// quick check: are the results of the same size?
assertEquals(expResult.size(), result.size());
// verbose test: are all letters as expected?
assertEquals(expResult, result);
}
@Test
public void testGetCandidateWords0() throws IOException {
Set<String> words = new HashSet<>();
// test for empty word (only a letter can be inserted)
for (char c : SpellCorrector.ALPHABET) {
// insertion
words.add("" + c);
}
checkGetCandidateWords(fullCR, "", words);
}
@Test
public void testGetCandidateWords1() throws IOException {
Set<String> words = new HashSet<>();
// test for a single letter word
for (char c : SpellCorrector.ALPHABET) {
// insertion before and after
words.add(c + "a");
words.add("a" + c);
// substitution should replace the letter.
words.add("" + c);
}
// deletion should not yield a string
//words.add("");
checkGetCandidateWords(fullCR, "a", words);
}
@Test
public void testGetCandidateWords2() throws IOException {
Set<String> words = new HashSet<>();
// test for a two letter word
for (char c : SpellCorrector.ALPHABET) {
// insertion before and after
words.add(c + "up");
words.add("u" + c + "p");
words.add("up" + c);
// substitution should replace the letter.
words.add(c + "p");
words.add("u" + c);
}
// deletion
words.add("p");
words.add("u");
// transposition
words.add("pu");
checkGetCandidateWords(fullCR, "up", words);
}
@Test
public void testGetCandidateWords3() throws IOException {
Set<String> words = new HashSet<>();
// test for a three letter word
for (char c : SpellCorrector.ALPHABET) {
// insertion before and after
words.add(c + "ups");
words.add("u" + c + "ps");
words.add("up" + c + "s");
words.add("ups" + c);
// substitution should replace the letter.
words.add(c + "ps");
words.add("u" + c + "s");
words.add("up" + c);
}
// deletion
words.add("ps");
words.add("us");
words.add("up");
// transposition
words.add("pus");
words.add("usp");
checkGetCandidateWords(fullCR, "ups", words);
}
/**
* Fake CorpusReader which tests whether getCandidateWords can produce all
* words.
*/
private class MockCorpusReader extends CorpusReader {
public MockCorpusReader() throws IOException {
super();
}
@Override
public boolean inVocabulary(String word) {
return true;
}
@Override
public HashSet<String> inVocabulary(Set<String> set) {
return new HashSet<>(set);
}
}
}
|