/*
 * Decompiled with CFR 0.152.
 */
package ru.ispras.texterra.core.nlp.annotators.ne.encoding;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import ru.ispras.texterra.core.nlp.annotators.ISerializableAnnotator;
import ru.ispras.texterra.core.nlp.datamodel.INLPDocument;
import ru.ispras.texterra.core.nlp.datamodel.encoding.BILUChunkEncoding;
import ru.ispras.texterra.core.nlp.datamodel.ne.INamedEntity;
import ru.ispras.texterra.core.nlp.datamodel.ne.NamedEntityToken;
import ru.ispras.texterra.core.nlp.datamodel.ne.encoding.INamedEntityEncoding;
import ru.ispras.texterra.core.nlp.datamodel.ne.encoding.NamedEntityEncoding;
import ru.ispras.texterra.core.nlp.datamodel.ne.encoding.NamedEntityEncodingToken;

public final class BILUNamedEntityDecoder
implements ISerializableAnnotator<NamedEntityToken> {
    private static final long serialVersionUID = -8117387283336900152L;
    private static final Set<BILUChunkEncoding> firstEncodings = new HashSet<BILUChunkEncoding>(Arrays.asList(BILUChunkEncoding.B, BILUChunkEncoding.U));
    private static final Set<BILUChunkEncoding> lastEncodings = new HashSet<BILUChunkEncoding>(Arrays.asList(BILUChunkEncoding.U, BILUChunkEncoding.L));

    @Override
    public Collection<NamedEntityToken> annotate(INLPDocument doc) {
        Collection<List<NamedEntityEncodingToken>> chunks = this.extractEntityChunks(doc);
        return this.createTokens(doc, chunks);
    }

    private Collection<List<NamedEntityEncodingToken>> extractEntityChunks(INLPDocument doc) {
        INamedEntity entity = null;
        ArrayList<List<NamedEntityEncodingToken>> chunks = new ArrayList<List<NamedEntityEncodingToken>>();
        for (NamedEntityEncodingToken token : doc.getAnnotations(NamedEntityEncodingToken.class)) {
            INamedEntityEncoding encoding = (INamedEntityEncoding)token.getValue();
            if (encoding instanceof NamedEntityEncoding) {
                NamedEntityEncoding enc = (NamedEntityEncoding)encoding;
                entity = this.startNewChunkIfNeeded(entity, chunks, enc);
                ((List)chunks.get(chunks.size() - 1)).add(token);
                continue;
            }
            entity = null;
        }
        return chunks;
    }

    private INamedEntity startNewChunkIfNeeded(INamedEntity entity, List<List<NamedEntityEncodingToken>> chunks, NamedEntityEncoding enc) {
        if (firstEncodings.contains(enc.getEncoding()) || !enc.getEntity().equals(entity)) {
            entity = this.getNextEntity(enc);
            chunks.add(new ArrayList());
        }
        return entity;
    }

    private INamedEntity getNextEntity(NamedEntityEncoding enc) {
        if (!lastEncodings.contains(enc.getEncoding())) {
            return enc.getEntity();
        }
        return null;
    }

    private Collection<NamedEntityToken> createTokens(INLPDocument doc, Collection<List<NamedEntityEncodingToken>> chunks) {
        ArrayList<NamedEntityToken> result = new ArrayList<NamedEntityToken>();
        for (List<NamedEntityEncodingToken> chunk : chunks) {
            int start = chunk.get(0).getStart();
            int end = chunk.get(chunk.size() - 1).getEnd();
            INamedEntity entity = ((NamedEntityEncoding)chunk.get(0).getValue()).getEntity();
            result.add(new NamedEntityToken(doc, start, end, entity));
        }
        return result;
    }
}

