import java.util.HashMap;
import java.util.Map;

public class SpellCorrector {

    final private CorpusReader cr;
    final private ConfusionMatrixReader cmr;

    final static char[] ALPHABET = "abcdefghijklmnopqrstuvwxyz'".toCharArray();

    /**
     * Lambda values for interpolation of n-gram probabilities. The first value
     * is for unigrams, the second for bigrams, etc.
     */
    private final static double[] LAMBDAS = new double[]{.5, .5};
    /**
     * The language model probability for uncorrected words.
     */
    private final static double LM_PROBABILITY_UNMODIFIED = .95;

    public SpellCorrector(CorpusReader cr, ConfusionMatrixReader cmr) {
        this.cr = cr;
        this.cmr = cmr;
    }

    public String correctPhrase(String phrase) {
        if (phrase == null || phrase.length() == 0) {
            throw new IllegalArgumentException("phrase must be non-empty.");
        }

        String[] words = phrase.split(" ");

        SentenceRater rater = new SentenceRater(words);

        // Try to change 1 word and see whether the suggestions are OK.
        for (int attempt = 0; attempt < 3; attempt++) {
            for (int i = 0; i < words.length; i++) {
                final int word_index = i;
                String old_word = words[word_index];
                // try to find a better suggestion for this word.
                Map<String, Double> candidates = getCandidateWords(old_word);
                candidates.forEach((word, channel_probability) -> {
                    rater.tryWord(word_index, word, channel_probability);
                });
            };
            if (rater.hasBetterSuggestion()) {
                rater.saveSuggestion();
                // TODO: make this nicer
                words = rater.getBestSentence();
                System.err.println("Got new sentence: " + String.join(" ", words));
            } else {
                System.err.println("No suggestion found.");
                break;
            }
        }

        return String.join(" ", words);
    }

    /**
     * Gets all candidate words, resulting from a single insertion, deletion.
     * substitution or transposition.
     *
     * @param word The (wrong) word to find (corrected) candidates for.
     * @return Candidate words mapping to the noisy channel model probability.
     */
    public Map<String, Double> getCandidateWords(String word) {
        Map<String, Double> candidates = new HashMap<>();

        // tries to add word2 to the list of words. This word2 was generated
        // from "word" by changing "error" to "correct".
        TriFunction<String, String, String> push = (word2, error, correct) -> {
            if (!cr.inVocabulary(word2) || word.equals(word2)) {
                return;
            }

            // Find the channel model probability (probability of the edit).
            // P(x|w) = "corrections count given error" / "errors count"
            double correctionCount, errorCount, p_channel;
            correctionCount = (double) cmr.getConfusionCount(error, correct);
            errorCount = cmr.getErrorsCount(error);

            // add-one smoothing
            p_channel = (correctionCount + 1) / (errorCount + 1);

            // TODO: take the max instead of addition?
            // Sum the probabilities as independent modifications can result in
            // the same word ("acess" -> "access" by "a|ac", "e|ce").
            double p = candidates.getOrDefault(word2, 0.0);
            p += p_channel;

            candidates.put(word2, p);
        };

        makeWordInsertion(word, push);
        makeWordSubstitution(word, push);
        makeWordDeletion(word, push);
        makeWordTransposition(word, push);

        return candidates;
    }

    private void makeWordInsertion(String word,
            TriFunction<String, String, String> push) {
        // Generate words by insertion of a character
        // |p]en -> [ap]en, [p|en -> [pi]en, p[e|n -> p[ei]n, pe[n| -> pe[nm].
        for (int i = 0; i <= word.length(); i++) {
            String head, head_last, tail;
            // the word is split into [0..i] [i..n]
            head = word.substring(0, i);
            tail = word.substring(i);

            for (char c : ALPHABET) {
                if (i == 0) {
                    // insert "c" before current letter (i = 0)
                    head_last = word.substring(0);
                    push.call(head + c + tail, head_last, c + head_last);
                } else {
                    // insert "c" after current letter (i > 0)
                    head_last = word.substring(i - 1, i);
                    push.call(head + c + tail, head_last, head_last + c);
                }
            }
        }
    }

    private void makeWordSubstitution(String word,
            TriFunction<String, String, String> push) {
        // Generate words by substitution of a character.
        // |p]en -> [P]en, p|e]n -> p[E]n, pe|n] -> pe[N].
        for (int i = 0; i < word.length(); i++) {
            String head, middle, tail;
            // the word is split into [0..i] [i..i+1] [i+1..n]
            head = word.substring(0, i);
            middle = word.substring(i, i + 1);
            tail = word.substring(i + 1);

            for (char c : ALPHABET) {
                // substitution of "middle" with "c"
                push.call(head + c + tail, middle, "" + c);
            }
        }
    }

    private void makeWordDeletion(String word,
            TriFunction<String, String, String> push) {
        // Generate words by deletion of a character. Requires at least two
        // characters in the word (an empty string is invalid).
        // |pe]in -> [e]in, p|ei]n -> p[i]n, pe|in] -> pe[n], pe[i|n] -> pe[i].
        if (word.length() > 1) {
            for (int i = 0; i < word.length(); i++) {
                String head, middle, error, tail;
                // the word is split into [0..i] [i..i+1] [i+1..n]
                head = word.substring(0, i);
                middle = word.substring(i, i + 1);
                tail = word.substring(i + 1);

                if (i + 1 < word.length()) {
                    // Common case: deletion of the following character.
                    // p|ei]n -> p[i]n
                    error = word.substring(i, i + 2);
                } else {
                    // Last case: operate on the previous and next characters.
                    // pe[i|n] -> pe[i]
                    error = word.substring(i - 1, i + 1);
                }
                push.call(head + tail, error, middle);
            }
        }
    }

    private void makeWordTransposition(String word,
            TriFunction<String, String, String> push) {
        // The Damerau-Levenshtein distance also includes transposition of two
        // adjacent letters to account for spelling mistakes.
        // [p|e]n -> [ep]n, p[e|n] -> p[ne].
        for (int i = 1; i < word.length(); i++) {
            String head, middle, transposed, tail;
            // the word is split into [0..i-1] [i-1..i+1] [i+1..n]
            head = word.substring(0, i - 1);
            middle = word.substring(i - 1, i + 1);
            tail = word.substring(i + 1);

            transposed = "" + middle.charAt(1) + middle.charAt(0);

            push.call(head + transposed + tail, middle, transposed);
        }
    }

    private class SentenceRater {

        private final String[] words;
        private final double[] word_likelihoods;
        /**
         * Words that cannot be modified in further iterations.
         */
        private final boolean[] words_readonly;

        private double best_likelihood = Double.MIN_VALUE;
        private WordModification best_modification;

        public SentenceRater(String[] words) {
            this.words = words.clone();
            this.word_likelihoods = new double[words.length];
            this.words_readonly = new boolean[words.length];
            for (int i = 0; i < words.length; i++) {
                word_likelihoods[i] = getWordLikelihood(i, words[i],
                        LM_PROBABILITY_UNMODIFIED);
            }
        }

        /**
         * Calculates the probability that the word {@code word} is valid at
         * position {@code index}.
         */
        public double getWordLikelihood(int index, String word,
                double channel_probability) {
            String prev_word, ngram;
            double prior, score, p;
            // a suggested word not in the vocabulary is certainly wrong,
            // changed (or consequentive) words should also not be changed.
            if (!cr.inVocabulary(word) || words_readonly[index]) {
                return 0.0;
            }

            assert channel_probability > 0.0;

            // P(x|w) is given by language model (noisy channel probability).
            // Find prior P(w) = (freq(w) + .5) / N.
            // Then compute candidate score by P(w) * P(x|w)
            prior = (cr.getNGramCount(word) + .5) / cr.getUnigramCount();
            score = prior * channel_probability;

            // Now obtain n-gram probabilities. Use interpolation to combine
            // unigrams and bigrams.
            p = LAMBDAS[0] * cr.getSmoothedCount(word) / cr.getUnigramCount();

            // Add probability of bi-grams.
            // For words u and w, P(w|u) = P(u, w) / P(u).
            if (index > 0) {
                prev_word = words[index - 1];
                ngram = prev_word + " " + word;
                p += LAMBDAS[1] * cr.getSmoothedCount(ngram) / cr.getSmoothedCount(prev_word);
            }

            // Combine the candidate score with the n-gram probabilities.
            p *= score;
            assert p > 0.0 : "failed probability for " + word;
            return p;
        }

        /**
         * Tries to modify word at position {@code index} to word and calculates
         * the likelihood of the word. The best single-word modification is
         * remembered (the sentence itself will not be modified).
         */
        public void tryWord(int index, String word, double channel_probability) {
            double likelihood;

            // try the modification, calculate the result and restore.
            likelihood = getWordLikelihood(index, word, channel_probability);

            // look for the word which increases the likelihood the most
            // (that is, the difference of the old and new likelihood).
            likelihood -= word_likelihoods[index];

            if (likelihood > best_likelihood) {
                best_likelihood = likelihood;
                best_modification = new WordModification(index, word);
            }
        }

        /**
         * Returns the sentence, possibly changed.
         */
        public String[] getBestSentence() {
            String[] new_words = words.clone();
            if (best_modification != null) {
                new_words[best_modification.index] = best_modification.word;
            }
            return new_words;
        }

        /**
         * Returns true if it is likely that a word in the sentence can be
         * corrected.
         */
        public boolean hasBetterSuggestion() {
            return best_modification != null;
        }

        /**
         * Save the best suggestion so far, ensuring that future suggestions
         * won't overwrite this one.
         */
        public void saveSuggestion() {
            int index = best_modification.index;
            String word = best_modification.word;
            words[index] = word;
            if (index > 0) {
                words_readonly[index - 1] = true;
            }
            words_readonly[index] = true;
            if (index + 1 < words.length) {
                words_readonly[index + 1] = true;
            }
            best_modification = null;
            best_likelihood = Double.MIN_VALUE;
            word_likelihoods[index] = getWordLikelihood(index, word,
                    LM_PROBABILITY_UNMODIFIED);
        }

        private class WordModification {

            private final int index;
            private final String word;

            public WordModification(int index, String word) {
                this.index = index;
                this.word = word;
            }
        }
    }
}