package de.uni_mannheim.informatik.dws.winter.webtables.detectors.tabletypeclassifier;

import de.uni_mannheim.informatik.dws.winter.preprocessing.datatypes.ColumnType;
import de.uni_mannheim.informatik.dws.winter.preprocessing.datatypes.DataType;
import de.uni_mannheim.informatik.dws.winter.webtables.detectors.TypeDetector;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.AnnotationPipeline;
import edu.stanford.nlp.pipeline.POSTaggerAnnotator;
import edu.stanford.nlp.pipeline.TokenizerAnnotator;
import edu.stanford.nlp.pipeline.WordsToSentencesAnnotator;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.Tokenizer;
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
import edu.stanford.nlp.time.TimeAnnotator;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Properties;

/* loaded from: input_file:de/uni_mannheim/informatik/dws/winter/webtables/detectors/tabletypeclassifier/TypeClassifier.class */
public class TypeClassifier implements TypeDetector {
    private static AnnotationPipeline pipeline;
    private Classifier classifier;
    private FeatureSet featureSet;

    public TypeClassifier() {
        pipeline = new AnnotationPipeline();
        this.classifier = new Classifier();
        this.featureSet = new FeatureSet(new MaxentTagger("de.uni_mannheim.informatik.dws.winter.webtables.detectors.tabletypeclassifier\\english-left3words-distsim.tagger"));
        initialize();
    }

    public void initialize() {
        Properties properties = new Properties();
        pipeline.addAnnotator(new TokenizerAnnotator(false) { // from class: de.uni_mannheim.informatik.dws.winter.webtables.detectors.tabletypeclassifier.TypeClassifier.1
            public Tokenizer<CoreLabel> getTokenizer(Reader reader) {
                return new PTBTokenizer(reader, new CoreLabelTokenFactory(), "");
            }
        });
        pipeline.addAnnotator(new WordsToSentencesAnnotator(false));
        pipeline.addAnnotator(new POSTaggerAnnotator(false));
        pipeline.addAnnotator(new TimeAnnotator("sutime", properties));
    }

    public HashMap<Integer, String> execute(String[][] strArr) throws FileNotFoundException, IOException {
        HashMap<Integer, String> hashMap = new HashMap<>();
        int i = 0;
        for (String[] strArr2 : transposeArray(strArr)) {
            hashMap.put(Integer.valueOf(i), detectTypeForColumn(strArr2, Integer.toString(i)).getType().toString());
            i++;
        }
        return hashMap;
    }

    public List<String> calculateFeatures(String[] strArr) {
        ArrayList arrayList = new ArrayList();
        this.featureSet.createFeatures(strArr, pipeline);
        arrayList.add(String.valueOf(this.featureSet.getPercentageofAlphabeticCharacters()));
        arrayList.add(String.valueOf(this.featureSet.getPercentageofPunctuationCharacters()));
        arrayList.add(String.valueOf(this.featureSet.getCellContentPattern()));
        arrayList.add(String.valueOf(this.featureSet.isContainPunctuationCharactersinHeaderCell()));
        arrayList.add(String.valueOf(this.featureSet.getPOSPatternofHeaderCell()));
        arrayList.add(String.valueOf(this.featureSet.getAverageCharacterLenghth()));
        arrayList.add(String.valueOf(this.featureSet.isIsDateorTime()));
        arrayList.add(String.valueOf(this.featureSet.isIsBooleanValue()));
        return arrayList;
    }

    public DataType predictDatatype(List<String> list) throws IOException {
        return this.classifier.classify(list);
    }

    public String[][] transposeArray(String[][] strArr) {
        String[][] strArr2 = new String[strArr[0].length][strArr.length];
        for (int i = 0; i < strArr[0].length; i++) {
            for (int i2 = 0; i2 < strArr.length; i2++) {
                strArr2[i][i2] = strArr[i2][i];
            }
        }
        return strArr2;
    }

    @Override // de.uni_mannheim.informatik.dws.winter.webtables.detectors.TypeDetector
    public ColumnType detectTypeForColumn(Object[] objArr, String str) {
        new ArrayList();
        DataType dataType = null;
        try {
            dataType = predictDatatype(calculateFeatures((String[]) objArr));
        } catch (IOException e) {
            e.printStackTrace();
        }
        return new ColumnType(dataType, null);
    }
}
