From aed75adb5b127e5943dd16b0e37a4d9b00d2ac12 Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Sat, 4 Apr 2015 11:06:32 +0200 Subject: Save readonly state across calls Can be cleaned up a bit... but functionality first now. --- spellchecker/src/SpellCorrector.java | 84 ++++++++++++++++++++++++------------ 1 file changed, 56 insertions(+), 28 deletions(-) diff --git a/spellchecker/src/SpellCorrector.java b/spellchecker/src/SpellCorrector.java index e3f2955..0bc5b41 100644 --- a/spellchecker/src/SpellCorrector.java +++ b/spellchecker/src/SpellCorrector.java @@ -1,6 +1,8 @@ import java.util.HashMap; +import java.util.HashSet; import java.util.Map; +import java.util.Set; public class SpellCorrector { @@ -37,51 +39,77 @@ public class SpellCorrector { this.cmr = cmr; } + /** + * Converts a set of word indices which were modified to an array of length + * {@code n} of which elements are set to true when those should not be + * modified. + */ + private boolean[] getReadonlyIndices(int n, Set modifiedIndices) { + boolean[] words_readonly = new boolean[n]; + for (Integer index : modifiedIndices) { + if (index > 0) { + words_readonly[index - 1] = true; + } + words_readonly[index] = true; + if (index + 1 < n) { + words_readonly[index + 1] = true; + } + } + return words_readonly; + } + /** * Tries to find up to {@code maxTypos} number of misspelled words. * - * @param maxTypos The maximum number of look erroneous words for. * @return The resulting rater containing probabilities and the corrected * sentence or null if no suggestions are found. */ - private SentenceRater findCorrected(String[] words, int maxTypos) { - SentenceRater rater = new SentenceRater(words); - + private SentenceRater findBetterWord(String[] words, + Set modifiedIndices) { + SentenceRater bestResult = null; // find best word for (int i = 0; i < words.length; i++) { final int word_index = i; String old_word = words[word_index]; + SentenceRater rater = new SentenceRater(words, + getReadonlyIndices(words.length, modifiedIndices)); + // try to find a better suggestion for this word. Map candidates = getCandidateWords(old_word); candidates.forEach((word, channel_probability) -> { rater.tryWord(word_index, word, channel_probability); }); - }; - // if a better word was found, use the change - if (rater.hasBetterSuggestion()) { - rater.saveSuggestion(); - // If some other errors are still possible, hunt for those! - if (maxTypos > 1) { - SentenceRater subResult; - subResult = findCorrected(rater.getBestSentence(), maxTypos - 1); - // Did it find a corrected word? - if (subResult != null) { - // Yes it did. Is this the best one so far? - if (subResult.getBestScore() > rater.getBestScore()) { - System.err.println("Subresult is better!"); - return subResult; + // if a better word was found, use the change + if (rater.hasBetterSuggestion()) { + rater.saveSuggestion(); + bestResult = rater; + + // If some other errors are still possible, hunt for those! + modifiedIndices.add(i); + if (modifiedIndices.size() < MAX_TYPOS) { + SentenceRater subResult; + subResult = findBetterWord(rater.getBestSentence(), modifiedIndices); + // Did it find a corrected word? + if (subResult != null) { + // Yes it did. Is this the best one so far? + if (subResult.getBestScore() > rater.getBestScore()) { + System.err.println("Subresult is better!"); + bestResult = subResult; + } else { + System.err.println("Subresult is not better!"); + } + } else { + System.err.println("no subresult found"); } - System.err.println("Subresult is not better!"); - } else { - System.err.println("no subresult found"); } + // make the context to be editable again + modifiedIndices.remove(i); + } else { + System.err.println("No suggestion found for " + old_word + "."); } - return rater; - } else { - System.err.println("No suggestion found."); - return null; } + return bestResult; } public String correctPhrase(String phrase) { @@ -91,7 +119,7 @@ public class SpellCorrector { String[] words = phrase.split(" "); - SentenceRater rater = findCorrected(words, MAX_TYPOS); + SentenceRater rater = findBetterWord(words, new HashSet<>()); // if a better sentence is found, use it. if (rater != null) { words = rater.getBestSentence(); @@ -241,10 +269,10 @@ public class SpellCorrector { private double sentence_probability; private WordModification best_modification; - public SentenceRater(String[] words) { + public SentenceRater(String[] words, boolean[] word_readonly) { this.words = words.clone(); this.word_likelihoods = new double[words.length]; - this.words_readonly = new boolean[words.length]; + this.words_readonly = word_readonly; for (int i = 0; i < words.length; i++) { word_likelihoods[i] = getWordLikelihood(i, LM_PROBABILITY_UNMODIFIED); -- cgit v1.2.1