/*
 * Decompiled with CFR 0.152.
 */
package ru.ispras.texterra.core.nlp.annotators.lemmatizer;

import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.apache.log4j.Logger;
import ru.ispras.texterra.core.nlp.annotators.IAnnotatorTrainer;
import ru.ispras.texterra.core.nlp.annotators.lemmatizer.LemmaGenLemmatizer;
import ru.ispras.texterra.core.nlp.annotators.lemmatizer.model.CharTrieNode;
import ru.ispras.texterra.core.nlp.annotators.lemmatizer.model.LemmaGenModel;
import ru.ispras.texterra.core.nlp.annotators.lemmatizer.model.ModelTrainer;
import ru.ispras.texterra.core.nlp.annotators.lemmatizer.wordProcessors.IWordPreProcessor;
import ru.ispras.texterra.core.nlp.datamodel.FastInRelationNLPDocument;
import ru.ispras.texterra.core.nlp.datamodel.INLPDocument;
import ru.ispras.texterra.core.nlp.datamodel.IToken;
import ru.ispras.texterra.core.nlp.datamodel.Lemma;
import ru.ispras.texterra.core.nlp.datamodel.NLPDocumentHelper;
import ru.ispras.texterra.core.nlp.datamodel.pos.IPOSTag;
import ru.ispras.texterra.core.nlp.datamodel.pos.POSToken;
import ru.ispras.texterra.core.nlp.datamodel.relations.CoinsidesAnnotationRelationCacheBuilder;

public class LemmaGenLemmatizerTrainer
implements IAnnotatorTrainer<LemmaGenLemmatizer> {
    private static final Logger logger = Logger.getLogger(LemmaGenLemmatizerTrainer.class);
    private static final char END_OF_WORD_CHAR = '\u0000';
    private final IWordPreProcessor preProcessor;

    public LemmaGenLemmatizerTrainer(IWordPreProcessor preProcessor) {
        this.preProcessor = preProcessor;
    }

    @Override
    public LemmaGenLemmatizer train(Iterable<INLPDocument> documents) {
        LemmaGenModel model = new LemmaGenModel(this.preProcessor, this.buildModel(documents), '\u0000');
        return new LemmaGenLemmatizer(model);
    }

    private Map<IPOSTag, CharTrieNode> buildModel(Iterable<INLPDocument> documents) {
        Map<IPOSTag, List<ModelTrainer.WordLemma>> tagLemmas = this.collectTagLemmas(documents);
        Map<IPOSTag, CharTrieNode> model = this.train(tagLemmas);
        return model;
    }

    private Map<IPOSTag, CharTrieNode> train(Map<IPOSTag, List<ModelTrainer.WordLemma>> tagLemmas) {
        ModelTrainer modelTrainer = new ModelTrainer('\u0000');
        HashMap<IPOSTag, CharTrieNode> model = new HashMap<IPOSTag, CharTrieNode>();
        for (Map.Entry<IPOSTag, List<ModelTrainer.WordLemma>> pair : tagLemmas.entrySet()) {
            logger.debug((Object)(pair.getKey().getTag() + " : " + pair.getValue().size() + " word-lemma pairs"));
            model.put(pair.getKey(), modelTrainer.train(pair.getValue()));
        }
        if (logger.isDebugEnabled()) {
            logger.debug((Object)("model is created for POS Tags: " + model.keySet()));
        }
        return model;
    }

    private Map<IPOSTag, List<ModelTrainer.WordLemma>> collectTagLemmas(Iterable<INLPDocument> documents) {
        CoinsidesAnnotationRelationCacheBuilder cacheBuilder = new CoinsidesAnnotationRelationCacheBuilder();
        Map<IPOSTag, List<ModelTrainer.WordLemma>> tagLemmas = new HashMap<IPOSTag, List<ModelTrainer.WordLemma>>();
        for (INLPDocument doc : documents) {
            FastInRelationNLPDocument fastDocument = new FastInRelationNLPDocument(doc, Lemma.class, POSToken.class, cacheBuilder);
            fastDocument = new FastInRelationNLPDocument((INLPDocument)fastDocument, Lemma.class, IToken.class, cacheBuilder);
            tagLemmas = this.addTagLemmas(tagLemmas, fastDocument);
        }
        return tagLemmas;
    }

    private Map<IPOSTag, List<ModelTrainer.WordLemma>> addTagLemmas(Map<IPOSTag, List<ModelTrainer.WordLemma>> tagLemmas, INLPDocument doc) {
        for (Lemma lemma : doc.getAnnotations(Lemma.class)) {
            IToken token = NLPDocumentHelper.getCoincidingAnnotation(doc, lemma, IToken.class);
            POSToken posToken = NLPDocumentHelper.getCoincidingAnnotation(doc, lemma, POSToken.class);
            IPOSTag tag = (IPOSTag)posToken.getValue();
            List<ModelTrainer.WordLemma> current = tagLemmas.get(tag);
            if (current == null) {
                current = new LinkedList<ModelTrainer.WordLemma>();
                tagLemmas.put(tag, current);
            }
            String word = this.preProcessor.getLemma((String)token.getValue());
            String preprocessedLemma = this.preProcessor.getLemma((String)lemma.getValue());
            if (word.isEmpty()) continue;
            current.add(new ModelTrainer.WordLemma(word, preprocessedLemma));
        }
        return tagLemmas;
    }
}

