blob: 8ebb82995ed64e5df633585a55a3237312da2d98 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
|
import java.util.HashSet;
public class SpellCorrector {
final private CorpusReader cr;
final private ConfusionMatrixReader cmr;
final static char[] ALPHABET = "abcdefghijklmnopqrstuvwxyz'".toCharArray();
public SpellCorrector(CorpusReader cr, ConfusionMatrixReader cmr) {
this.cr = cr;
this.cmr = cmr;
}
public String correctPhrase(String phrase) {
if (phrase == null || phrase.length() == 0) {
throw new IllegalArgumentException("phrase must be non-empty.");
}
String[] words = phrase.split(" ");
String finalSuggestion = "";
/**
* CODE TO BE ADDED *
*/
return finalSuggestion.trim();
}
public double calculateChannelModelProbability(String suggested, String incorrect) {
/**
* CODE TO BE ADDED *
*/
return 0.0;
}
/**
* Gets all candidate words, resulting from a single addition, deletion.
* substitution or transposition.
*
* @param word The word to find candidates for.
* @return A set of words with candidate words.
*/
public HashSet<String> getCandidateWords(String word) {
HashSet<String> ListOfWords = new HashSet<>();
// generate words by insertion of a character
for (int i = 0; i <= word.length(); i++) {
// the word is split into [0..i] [i..n]
// if i == word.length, then the last part is empty
String head = word.substring(0, i);
String tail = i < word.length() ? word.substring(i) : "";
for (char c : ALPHABET) {
// insertion of a single character
ListOfWords.add(head + c + tail);
}
}
for (int i = 0; i < word.length(); i++) {
// the word is split into [0..i] [i..i+1] [i+1..n]
// if i == word.length() - 1, then the tail is empty.
String head = word.substring(0, i);
String tail = word.substring(i + 1);
for (char c : ALPHABET) {
// substitution
ListOfWords.add(head + c + tail);
}
// deletion of a single character (prevent adding empty words)
if (word.length() > 1) {
ListOfWords.add(head + tail);
}
}
// The Damerau-Levenshtein distance also includes transposition to
// account for spelling mistakes. This loop transposes the previous
// character with the current index.
for (int i = 1; i < word.length(); i++) {
// the word is split into [0..i-1] [i-1..i] [i..i+1] [i+1..n]
// if i == word.length() - 1, then the tail is empty.
String head = word.substring(0, i - 1);
char c1 = word.charAt(i - 1);
char c2 = word.charAt(i);
String tail = i + 1 < word.length() ? word.substring(i + 1) : "";
// transposition of two adjacent letters
ListOfWords.add(head + c2 + c1 + tail);
}
return cr.inVocabulary(ListOfWords);
}
}
|