/*
 * Decompiled with CFR 0.152.
 */
package ru.ispras.texterra.core.nlp.annotators.spelling;

import java.util.Collection;
import java.util.LinkedList;
import java.util.regex.Pattern;
import ru.ispras.modis.utils.collections.PackedStringSet;
import ru.ispras.texterra.core.nlp.annotators.IAnnotator;
import ru.ispras.texterra.core.nlp.datamodel.INLPDocument;
import ru.ispras.texterra.core.nlp.datamodel.IToken;
import ru.ispras.texterra.core.nlp.datamodel.spelling.Misspelling;

public class MisspellingDetector
implements IAnnotator<Misspelling> {
    private final PackedStringSet vocabulary;
    private static final Pattern word = Pattern.compile("[a-zA-Z]*");

    public MisspellingDetector(PackedStringSet vocabulary) {
        this.vocabulary = vocabulary;
    }

    @Override
    public Collection<Misspelling> annotate(INLPDocument doc) {
        LinkedList<Misspelling> res = new LinkedList<Misspelling>();
        for (IToken token : doc.getAnnotations(IToken.class)) {
            String value = ((String)token.getValue()).toLowerCase();
            if (!this.isMisspelled(value)) continue;
            res.add(new Misspelling(token));
        }
        return res;
    }

    private boolean isMisspelled(String value) {
        return word.matcher(value).matches() && !this.vocabulary.contains(value);
    }
}

