/*
 * Decompiled with CFR 0.152.
 */
package ru.ispras.texterra.core.nlp.annotators.token.opennlp;

import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.util.Collection;
import java.util.LinkedList;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.util.Span;
import ru.ispras.texterra.core.nlp.annotators.ISerializableAnnotator;
import ru.ispras.texterra.core.nlp.datamodel.INLPDocument;
import ru.ispras.texterra.core.nlp.datamodel.IToken;
import ru.ispras.texterra.core.nlp.datamodel.Sentence;
import ru.ispras.texterra.core.nlp.datamodel.Token;

public final class OpenNLPTokenizer
implements ISerializableAnnotator<IToken> {
    private static final long serialVersionUID = 1862261944884349721L;
    private transient TokenizerModel model;

    public OpenNLPTokenizer(TokenizerModel tokenizerModel) {
        this.model = tokenizerModel;
    }

    public Collection<IToken> annotate(INLPDocument doc) {
        TokenizerME tokenizer = new TokenizerME(this.model);
        LinkedList<IToken> res = new LinkedList<IToken>();
        if (doc.hasAnnotations(Sentence.class)) {
            for (Sentence sentence : doc.getAnnotations(Sentence.class)) {
                int sentenceStart = sentence.getStart();
                res.addAll(this.processText((Tokenizer)tokenizer, doc, sentenceStart, sentence.getText()));
            }
        } else {
            res.addAll(this.processText((Tokenizer)tokenizer, doc, 0, doc.getText()));
        }
        return res;
    }

    private Collection<Token> processText(Tokenizer tokenizer, INLPDocument doc, int margin, String text) {
        LinkedList<Token> res = new LinkedList<Token>();
        for (Span s : tokenizer.tokenizePos(text)) {
            res.add(new Token(doc, margin + s.getStart(), margin + s.getEnd()));
        }
        return res;
    }

    private void writeObject(ObjectOutputStream out) throws IOException {
        out.defaultWriteObject();
        this.model.serialize((OutputStream)out);
    }

    private void readObject(ObjectInputStream in) throws ClassNotFoundException, IOException {
        in.defaultReadObject();
        this.model = new TokenizerModel((InputStream)in);
    }
}

