/*
 * Decompiled with CFR 0.152.
 */
package ru.ispras.texterra.core.nlp.annotators.pos.opennlp;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSSample;
import opennlp.tools.postag.POSTaggerFactory;
import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.TrainingParameters;
import ru.ispras.texterra.core.nlp.annotators.IAnnotatorTrainer;
import ru.ispras.texterra.core.nlp.annotators.pos.opennlp.OpenNLPPOSTagger;
import ru.ispras.texterra.core.nlp.datamodel.IAnnotation;
import ru.ispras.texterra.core.nlp.datamodel.INLPDocument;
import ru.ispras.texterra.core.nlp.datamodel.NLPDocumentHelper;
import ru.ispras.texterra.core.nlp.datamodel.Sentence;
import ru.ispras.texterra.core.nlp.datamodel.pos.POSToken;
import ru.ispras.texterra.core.nlp.datamodel.pos.morph.IMorphProperties;
import ru.ispras.texterra.core.nlp.datamodel.pos.morph.MorphPropertiesTagFactory;
import ru.ispras.texterra.utils.language.ITexterraLanguage;

public class OpenNLPPOSTaggerTrainer
implements IAnnotatorTrainer<OpenNLPPOSTagger> {
    private final MorphPropertiesTagFactory posTagFactory;
    private final ITexterraLanguage language;

    public OpenNLPPOSTaggerTrainer(MorphPropertiesTagFactory posTokenFactory, ITexterraLanguage language) {
        this.posTagFactory = posTokenFactory;
        this.language = language;
    }

    private POSModel trainModel(Iterable<INLPDocument> documents) {
        ObjectStream<POSSample> sampleStream = this.getPOSSamples(documents);
        try {
            return POSTaggerME.train((String)this.language.getLanguageTag(), sampleStream, (TrainingParameters)new TrainingParameters(), (POSTaggerFactory)new POSTaggerFactory());
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private ObjectStream<POSSample> getPOSSamples(Iterable<INLPDocument> documents) {
        final Iterator<INLPDocument> documentsIt = documents.iterator();
        return new ObjectStream<POSSample>(){
            private Iterator<POSSample> samplesIt;

            public void close() throws IOException {
            }

            public POSSample read() throws IOException {
                while (this.samplesIt == null || !this.samplesIt.hasNext()) {
                    if (!documentsIt.hasNext()) {
                        return null;
                    }
                    INLPDocument document = (INLPDocument)documentsIt.next();
                    Collection samples = OpenNLPPOSTaggerTrainer.this.getPOSSamples(document);
                    this.samplesIt = samples.iterator();
                }
                return this.samplesIt.next();
            }

            public void reset() throws IOException, UnsupportedOperationException {
                throw new UnsupportedOperationException();
            }
        };
    }

    private Collection<POSSample> getPOSSamples(INLPDocument document) {
        List sentences = document.getAnnotations(Sentence.class);
        ArrayList<POSSample> posSamples = new ArrayList<POSSample>(sentences.size());
        for (Sentence sentence : sentences) {
            posSamples.add(this.getPOSSample(NLPDocumentHelper.getContainedAnnotations((INLPDocument)document, (IAnnotation)sentence, POSToken.class)));
        }
        return posSamples;
    }

    private POSSample getPOSSample(List<POSToken> sentence) {
        ArrayList<String> words = new ArrayList<String>(sentence.size());
        ArrayList<String> tags = new ArrayList<String>(sentence.size());
        for (POSToken posToken : sentence) {
            words.add(posToken.getText());
            tags.add(((IMorphProperties)posToken.getValue()).getTag());
        }
        return new POSSample(words, tags);
    }

    public OpenNLPPOSTagger train(Iterable<INLPDocument> documents) {
        return new OpenNLPPOSTagger(this.posTagFactory, this.trainModel(documents));
    }
}

