summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2015-04-04 11:06:32 +0200
committerPeter Wu <peter@lekensteyn.nl>2015-04-04 11:06:32 +0200
commitaed75adb5b127e5943dd16b0e37a4d9b00d2ac12 (patch)
treef8e6d623f415b11a5fe825cb92fa3a0d136aedde
parenta6fbffe08e9217c16ef2afa0d69e8a7c527d5f8d (diff)
downloadassignment4-aed75adb5b127e5943dd16b0e37a4d9b00d2ac12.tar.gz
Save readonly state across calls
Can be cleaned up a bit... but functionality first now.
-rw-r--r--spellchecker/src/SpellCorrector.java84
1 files changed, 56 insertions, 28 deletions
diff --git a/spellchecker/src/SpellCorrector.java b/spellchecker/src/SpellCorrector.java
index e3f2955..0bc5b41 100644
--- a/spellchecker/src/SpellCorrector.java
+++ b/spellchecker/src/SpellCorrector.java
@@ -1,6 +1,8 @@
import java.util.HashMap;
+import java.util.HashSet;
import java.util.Map;
+import java.util.Set;
public class SpellCorrector {
@@ -38,50 +40,76 @@ public class SpellCorrector {
}
/**
+ * Converts a set of word indices which were modified to an array of length
+ * {@code n} of which elements are set to true when those should not be
+ * modified.
+ */
+ private boolean[] getReadonlyIndices(int n, Set<Integer> modifiedIndices) {
+ boolean[] words_readonly = new boolean[n];
+ for (Integer index : modifiedIndices) {
+ if (index > 0) {
+ words_readonly[index - 1] = true;
+ }
+ words_readonly[index] = true;
+ if (index + 1 < n) {
+ words_readonly[index + 1] = true;
+ }
+ }
+ return words_readonly;
+ }
+
+ /**
* Tries to find up to {@code maxTypos} number of misspelled words.
*
- * @param maxTypos The maximum number of look erroneous words for.
* @return The resulting rater containing probabilities and the corrected
* sentence or null if no suggestions are found.
*/
- private SentenceRater findCorrected(String[] words, int maxTypos) {
- SentenceRater rater = new SentenceRater(words);
-
+ private SentenceRater findBetterWord(String[] words,
+ Set<Integer> modifiedIndices) {
+ SentenceRater bestResult = null;
// find best word
for (int i = 0; i < words.length; i++) {
final int word_index = i;
String old_word = words[word_index];
+ SentenceRater rater = new SentenceRater(words,
+ getReadonlyIndices(words.length, modifiedIndices));
+
// try to find a better suggestion for this word.
Map<String, Double> candidates = getCandidateWords(old_word);
candidates.forEach((word, channel_probability) -> {
rater.tryWord(word_index, word, channel_probability);
});
- };
- // if a better word was found, use the change
- if (rater.hasBetterSuggestion()) {
- rater.saveSuggestion();
- // If some other errors are still possible, hunt for those!
- if (maxTypos > 1) {
- SentenceRater subResult;
- subResult = findCorrected(rater.getBestSentence(), maxTypos - 1);
- // Did it find a corrected word?
- if (subResult != null) {
- // Yes it did. Is this the best one so far?
- if (subResult.getBestScore() > rater.getBestScore()) {
- System.err.println("Subresult is better!");
- return subResult;
+ // if a better word was found, use the change
+ if (rater.hasBetterSuggestion()) {
+ rater.saveSuggestion();
+ bestResult = rater;
+
+ // If some other errors are still possible, hunt for those!
+ modifiedIndices.add(i);
+ if (modifiedIndices.size() < MAX_TYPOS) {
+ SentenceRater subResult;
+ subResult = findBetterWord(rater.getBestSentence(), modifiedIndices);
+ // Did it find a corrected word?
+ if (subResult != null) {
+ // Yes it did. Is this the best one so far?
+ if (subResult.getBestScore() > rater.getBestScore()) {
+ System.err.println("Subresult is better!");
+ bestResult = subResult;
+ } else {
+ System.err.println("Subresult is not better!");
+ }
+ } else {
+ System.err.println("no subresult found");
}
- System.err.println("Subresult is not better!");
- } else {
- System.err.println("no subresult found");
}
+ // make the context to be editable again
+ modifiedIndices.remove(i);
+ } else {
+ System.err.println("No suggestion found for " + old_word + ".");
}
- return rater;
- } else {
- System.err.println("No suggestion found.");
- return null;
}
+ return bestResult;
}
public String correctPhrase(String phrase) {
@@ -91,7 +119,7 @@ public class SpellCorrector {
String[] words = phrase.split(" ");
- SentenceRater rater = findCorrected(words, MAX_TYPOS);
+ SentenceRater rater = findBetterWord(words, new HashSet<>());
// if a better sentence is found, use it.
if (rater != null) {
words = rater.getBestSentence();
@@ -241,10 +269,10 @@ public class SpellCorrector {
private double sentence_probability;
private WordModification best_modification;
- public SentenceRater(String[] words) {
+ public SentenceRater(String[] words, boolean[] word_readonly) {
this.words = words.clone();
this.word_likelihoods = new double[words.length];
- this.words_readonly = new boolean[words.length];
+ this.words_readonly = word_readonly;
for (int i = 0; i < words.length; i++) {
word_likelihoods[i] = getWordLikelihood(i,
LM_PROBABILITY_UNMODIFIED);