/*
 * Decompiled with CFR 0.152.
 */
package ru.ispras.texterra.core.nlp.annotators.token;

import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import ru.ispras.texterra.core.nlp.annotators.ISerializableAnnotator;
import ru.ispras.texterra.core.nlp.datamodel.INLPDocument;
import ru.ispras.texterra.core.nlp.datamodel.IToken;
import ru.ispras.texterra.core.nlp.datamodel.Token;

public class SpaceSeparatingTokenizer
implements ISerializableAnnotator<IToken> {
    private static final long serialVersionUID = 2475860392656815251L;
    private static final Pattern spacePattern = Pattern.compile("\\s+");

    @Override
    public Collection<IToken> annotate(INLPDocument doc) {
        LinkedList<IToken> result = new LinkedList<IToken>();
        String text = doc.getText();
        Matcher matcher = spacePattern.matcher(text);
        int start = 0;
        while (matcher.find()) {
            this.addToken(result, doc, start, matcher.start());
            start = matcher.end();
        }
        this.addToken(result, doc, start, text.length());
        return result;
    }

    private void addToken(List<IToken> result, INLPDocument doc, int start, int end) {
        if (end > start) {
            result.add(new Token(doc, start, end));
        }
    }
}

