/*
 * Decompiled with CFR 0.152.
 */
package ru.ispras.texterra.core.nlp.annotators.ml.featureextractors.documents;

import java.util.Collections;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
import ru.ispras.ml.datamodel.Feature;
import ru.ispras.ml.datamodel.value.IValue;
import ru.ispras.ml.datamodel.value.IntValue;
import ru.ispras.ml.datamodel.value.Type;
import ru.ispras.ml.featureextractors.IFeatureExtractor;
import ru.ispras.texterra.core.nlp.datamodel.INLPDocument;

public class SubstringDocumentFeatureExtractor
implements IFeatureExtractor<INLPDocument> {
    private static final long serialVersionUID = -3778576781489933663L;
    private final String substring;

    public SubstringDocumentFeatureExtractor(String substring) {
        this.substring = substring;
    }

    public Map<Feature, IValue> extract(INLPDocument doc) {
        String text = doc.getText();
        return Collections.singletonMap(new Feature(this.substring, Type.INTEGER), new IntValue(StringUtils.countMatches((String)text, (String)this.substring)));
    }
}

