/*
 * Decompiled with CFR 0.152.
 */
package ru.ispras.texterra.core.nlp.annotators.token.postprocess;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import ru.ispras.texterra.core.nlp.annotators.ISerializableAnnotator;
import ru.ispras.texterra.core.nlp.datamodel.INLPDocument;
import ru.ispras.texterra.core.nlp.datamodel.IToken;
import ru.ispras.texterra.core.nlp.datamodel.Token;

public class SpecialSymbolDetacher
implements ISerializableAnnotator<IToken> {
    private static final long serialVersionUID = -6330545937348466033L;
    private final Pattern splitPattern;

    public SpecialSymbolDetacher(String specialSymbolPattern) {
        this.splitPattern = Pattern.compile(specialSymbolPattern);
    }

    public Collection<IToken> annotate(INLPDocument doc) {
        ArrayList<IToken> result = new ArrayList<IToken>();
        for (IToken token : doc.getAnnotations(IToken.class)) {
            if (token instanceof Token) {
                result.addAll(this.split((Token)token));
                continue;
            }
            result.add(token);
        }
        return result;
    }

    private List<IToken> split(Token token) {
        String tokenValue = token.getValue();
        Matcher matcher = this.splitPattern.matcher(tokenValue);
        ArrayList<IToken> result = new ArrayList<IToken>();
        int shift = token.getStart();
        int position = 0;
        while (matcher.find()) {
            if (position < matcher.start()) {
                result.add((IToken)new Token(token.getAnnotatedText(), shift + position, shift + matcher.start()));
            }
            result.add((IToken)new Token(token.getAnnotatedText(), shift + matcher.start(), shift + matcher.end()));
            position = matcher.end();
        }
        if (position != tokenValue.length()) {
            result.add((IToken)new Token(token.getAnnotatedText(), shift + position, token.getEnd()));
        }
        return result;
    }
}

