package de.uni_mannheim.informatik.dws.winter.webtables.detectors.tabletypeclassifier;

import de.uni_mannheim.informatik.dws.winter.webtables.parsers.StringNormalizer;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.AnnotationPipeline;
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
import edu.stanford.nlp.time.TimeAnnotations;
import java.text.DecimalFormat;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

/* loaded from: input_file:de/uni_mannheim/informatik/dws/winter/webtables/detectors/tabletypeclassifier/FeatureSet.class */
public class FeatureSet {
    private MaxentTagger maxentTagger;
    DecimalFormat df = new DecimalFormat("#.##");
    private double FractionofCellswithNumnericContent = -1.0d;
    private double AverageNumberofDataTokensinEachCell = -1.0d;
    private double AverageNumberofSpecialCharactersinEachCell = -1.0d;
    private double AverageNumberofPunctuationsinEachCell = -1.0d;
    private boolean IsAlphanumeric = false;
    private String CellContentPattern = null;
    private double PercentageofNumericCharacters = -1.0d;
    private double PercentageofAlphabeticCharacters = -1.0d;
    private double PercentageofSpecialCharacters = -1.0d;
    private double PercentageofPunctuationCharacters = -1.0d;
    private String POSPatternofColumn = null;
    private String POSPatternofHeaderCell = null;
    private int NumberofDistinctValuesinColumn = -1;
    private boolean HasHeaderCell = false;
    private boolean ContainSpecialCharactersinHeaderCell = false;
    private boolean ContainPunctuationCharactersinHeaderCell = false;
    private boolean IsDateorTime = false;
    private int AverageCharacterLenghth = -1;
    private boolean IsBooleanValue = false;
    OtherOperations otherOperations = new OtherOperations();

    public FeatureSet(MaxentTagger maxentTagger) {
        this.maxentTagger = maxentTagger;
    }

    public double getFractionofCellswithNumnericContent() {
        return this.FractionofCellswithNumnericContent;
    }

    public void setFractionofCellswithNumnericContent(double d) {
        this.FractionofCellswithNumnericContent = d;
    }

    public double getAverageNumberofDataTokensinEachCell() {
        return this.AverageNumberofDataTokensinEachCell;
    }

    public void setAverageNumberofDataTokensinEachCell(double d) {
        this.AverageNumberofDataTokensinEachCell = d;
    }

    public double getAverageNumberofSpecialCharactersinEachCell() {
        return this.AverageNumberofSpecialCharactersinEachCell;
    }

    public void setAverageNumberofSpecialCharactersinEachCell(double d) {
        this.AverageNumberofSpecialCharactersinEachCell = d;
    }

    public double getAverageNumberofPunctuationsinEachCell() {
        return this.AverageNumberofPunctuationsinEachCell;
    }

    public void setAverageNumberofPunctuationsinEachCell(double d) {
        this.AverageNumberofPunctuationsinEachCell = d;
    }

    public boolean isIsAlphanumeric() {
        return this.IsAlphanumeric;
    }

    public void setIsAlphanumeric(boolean z) {
        this.IsAlphanumeric = z;
    }

    public String getCellContentPattern() {
        return this.CellContentPattern;
    }

    public void setCellContentPattern(String str) {
        this.CellContentPattern = str;
    }

    public double getPercentageofNumericCharacters() {
        return this.PercentageofNumericCharacters;
    }

    public void setPercentageofNumericCharacters(double d) {
        this.PercentageofNumericCharacters = d;
    }

    public double getPercentageofAlphabeticCharacters() {
        return this.PercentageofAlphabeticCharacters;
    }

    public void setPercentageofAlphabeticCharacters(double d) {
        this.PercentageofAlphabeticCharacters = d;
    }

    public double getPercentageofSpecialCharacters() {
        return this.PercentageofSpecialCharacters;
    }

    public void setPercentageofSpecialCharacters(double d) {
        this.PercentageofSpecialCharacters = d;
    }

    public double getPercentageofPunctuationCharacters() {
        return this.PercentageofPunctuationCharacters;
    }

    public void setPercentageofPunctuationCharacters(double d) {
        this.PercentageofPunctuationCharacters = d;
    }

    public boolean isHasHeaderCell() {
        return this.HasHeaderCell;
    }

    public void setHasHeaderCell(boolean z) {
        this.HasHeaderCell = z;
    }

    public boolean isContainSpecialCharactersinHeaderCell() {
        return this.ContainSpecialCharactersinHeaderCell;
    }

    public void setContainSpecialCharactersinHeaderCell(boolean z) {
        this.ContainSpecialCharactersinHeaderCell = z;
    }

    public boolean isContainPunctuationCharactersinHeaderCell() {
        return this.ContainPunctuationCharactersinHeaderCell;
    }

    public void setContainPunctuationCharactersinHeaderCell(boolean z) {
        this.ContainPunctuationCharactersinHeaderCell = z;
    }

    public String getPOSPatternofColumn() {
        return this.POSPatternofColumn;
    }

    public void setPOSPatternofColumn(String str) {
        this.POSPatternofColumn = str;
    }

    public String getPOSPatternofHeaderCell() {
        return this.POSPatternofHeaderCell;
    }

    public void setPOSPatternofHeaderCell(String str) {
        this.POSPatternofHeaderCell = str;
    }

    public int getNumberofDistinctValuesinColumn() {
        return this.NumberofDistinctValuesinColumn;
    }

    public void setNumberofDistinctValuesinColumn(int i) {
        this.NumberofDistinctValuesinColumn = i;
    }

    public int getAverageCharacterLenghth() {
        return this.AverageCharacterLenghth;
    }

    public void setAverageCharacterLenghth(int i) {
        this.AverageCharacterLenghth = i;
    }

    public boolean isIsDateorTime() {
        return this.IsDateorTime;
    }

    public void setIsDateorTime(boolean z) {
        this.IsDateorTime = z;
    }

    public boolean isIsBooleanValue() {
        return this.IsBooleanValue;
    }

    public void setIsBooleanValue(boolean z) {
        this.IsBooleanValue = z;
    }

    public void createFeatures(String[] strArr, AnnotationPipeline annotationPipeline) {
        int i = 0;
        String str = "";
        Map<String, Integer> treeMap = new TreeMap();
        int i2 = 0;
        int i3 = 0;
        int i4 = 0;
        for (String str2 : strArr) {
            if (str2 != null) {
                if (i < 2) {
                    str = prepareHasHeaderCell(str2, str);
                }
                treeMap = prepareCellContentPattern(str2, treeMap);
                i2 = prepareAvgCharLength(str2, i2);
                i3 = prepareSUTParser(str2, annotationPipeline, i3);
                i4 = prepareBooleanValue(str2, i4);
                i++;
            }
        }
        validateHasHeaderCell(str);
        validateCellContentPattern(treeMap);
        setAverageCharacterLenghth(i2 / strArr.length);
        validateSUTParser(i3, strArr.length);
        validateBooleanValue(i4, strArr.length);
        if (isHasHeaderCell() && strArr[0] != null) {
            containPunctuationCharactersinHeaderCell(strArr[0]);
        }
        if (strArr[0] != null) {
            posPatternofHeaderCell(strArr[0]);
        }
        String columnContentWithoutSpaces = this.otherOperations.getColumnContentWithoutSpaces(strArr);
        validatePercentageofAlphabeticCharacters(columnContentWithoutSpaces);
        validatePercentageofPunctuationCharacters(columnContentWithoutSpaces);
    }

    private String prepareHasHeaderCell(String str, String str2) {
        if (!str.trim().isEmpty() && !str.trim().equals("-") && !str.trim().equals("--") && !str.trim().equals("---") && !str.trim().equals("n/a") && !str.trim().equals("N/A") && !str.trim().equals("(n/a)") && !str.trim().equals("Unknown") && !str.trim().equals("unknown") && !str.trim().equals("?") && !str.trim().equals("??") && !str.trim().equals(".")) {
            str2 = String.valueOf(str2) + str.replace("\\s", "").replaceAll("[a-zA-Z]+", "a").replaceAll("[0-9]+", "d").replaceAll("[^a-zA-z\\d\\s.!;():?,\\-'\"]+", "s").replaceAll("[\\s.!;():?,\\-'\"]+", "p") + " ";
        }
        return str2;
    }

    private void validateHasHeaderCell(String str) {
        if (str.trim().split("\\s").length < 2) {
            setHasHeaderCell(false);
        } else if (str.split("\\s")[0].equals(str.split("\\s")[1])) {
            setHasHeaderCell(false);
        } else {
            setHasHeaderCell(true);
        }
    }

    private void validatePercentageofAlphabeticCharacters(String str) {
        if (str.length() == 0) {
            setPercentageofAlphabeticCharacters(-1.0d);
        }
        String replaceAll = str.replaceAll("[^a-zA-Z]", "");
        if (replaceAll.length() == 0.0d) {
            setPercentageofAlphabeticCharacters(-1.0d);
            return;
        }
        double length = replaceAll.length() / str.length();
        if (length != 0.0d) {
            setPercentageofAlphabeticCharacters(length);
        } else {
            setPercentageofAlphabeticCharacters(-1.0d);
        }
    }

    private void validatePercentageofPunctuationCharacters(String str) {
        if (str.length() == 0) {
            setPercentageofPunctuationCharacters(-1.0d);
        }
        String replaceAll = str.replaceAll("[^\\s.!;():?,\\-'\"]+", "");
        if (replaceAll.length() == 0.0d) {
            setPercentageofPunctuationCharacters(-1.0d);
            return;
        }
        double length = replaceAll.length() / str.length();
        if (length != 0.0d) {
            setPercentageofPunctuationCharacters(length);
        } else {
            setPercentageofPunctuationCharacters(-1.0d);
        }
    }

    private Map<String, Integer> prepareCellContentPattern(String str, Map<String, Integer> map) {
        if (!str.trim().isEmpty() && !str.trim().equals("-") && !str.trim().equals("--") && !str.trim().equals("---") && !str.trim().equals("n/a") && !str.trim().equals("N/A") && !str.trim().equals("(n/a)") && !str.trim().equals("Unknown") && !str.trim().equals("unknown") && !str.trim().equals("?") && !str.trim().equals("??") && !str.trim().equals(".") && !str.trim().equals("null") && !str.trim().equals(StringNormalizer.nullValue) && !str.trim().equals("Null")) {
            String replaceAll = str.replace("\\s", "").replaceAll("[a-zA-Z]+", "a").replaceAll("[0-9]+", "d").replaceAll("[^a-zA-z\\d\\s.!;():?,\\-'\"]+", "s").replaceAll("[\\s.!;():?,\\-'\"]+", "p");
            if (map.containsKey(replaceAll)) {
                map.put(replaceAll, Integer.valueOf(map.get(replaceAll).intValue() + 1));
            } else {
                map.put(replaceAll, 1);
            }
        }
        return map;
    }

    private void validateCellContentPattern(Map<String, Integer> map) {
        if (map.isEmpty()) {
            setCellContentPattern(null);
        } else {
            setCellContentPattern((String) ((Map.Entry) this.otherOperations.entriesSortedByValues(map).last()).getKey());
        }
        map.clear();
    }

    public void containPunctuationCharactersinHeaderCell(String str) {
        if (str.trim().isEmpty() || str.trim().equals("-") || str.trim().equals("--") || str.trim().equals("---") || str.trim().equals("n/a") || str.trim().equals("N/A") || str.trim().equals("(n/a)") || str.trim().equals("Unknown") || str.trim().equals("unknown") || str.trim().equals("?") || str.trim().equals("??") || str.trim().equals(".") || str.trim().equals("null") || str.trim().equals(StringNormalizer.nullValue) || str.trim().equals("Null")) {
            setContainPunctuationCharactersinHeaderCell(false);
        } else if (str.replaceAll("[^\\s.!;():?,\\-'\"]+", "").trim().length() < 1) {
            setContainPunctuationCharactersinHeaderCell(false);
        } else {
            setContainPunctuationCharactersinHeaderCell(true);
        }
    }

    public void posPatternofHeaderCell(String str) {
        TreeMap treeMap = new TreeMap();
        if (str.trim().isEmpty() || str.trim().equals("-") || str.trim().equals("--") || str.trim().equals("---") || str.trim().equals("n/a") || str.trim().equals("N/A") || str.trim().equals("(n/a)") || str.trim().equals("Unknown") || str.trim().equals("unknown") || str.trim().equals("?") || str.trim().equals("??") || str.trim().equals(".") || str.trim().equals("null") || str.trim().equals(StringNormalizer.nullValue) || str.trim().equals("Null")) {
            setPOSPatternofHeaderCell(null);
        } else {
            String str2 = "";
            for (String str3 : this.maxentTagger.tagString(str).split("\\s")) {
                str2 = String.valueOf(str2) + str3.substring(str3.indexOf("_") + 1) + "-";
            }
            String replaceAll = str2.trim().substring(0, str2.length() - 1).replaceAll("--", "-");
            if (treeMap.containsKey(replaceAll)) {
                treeMap.put(replaceAll, Integer.valueOf(((Integer) treeMap.get(replaceAll)).intValue() + 1));
            } else {
                treeMap.put(replaceAll, 1);
            }
            setPOSPatternofHeaderCell((String) ((Map.Entry) this.otherOperations.entriesSortedByValues(treeMap).last()).getKey());
        }
        treeMap.clear();
    }

    private int prepareAvgCharLength(String str, int i) {
        if (!str.trim().isEmpty() && !str.trim().equals("-") && !str.trim().equals("--") && !str.trim().equals("---") && !str.trim().equals("n/a") && !str.trim().equals("N/A") && !str.trim().equals("(n/a)") && !str.trim().equals("Unknown") && !str.trim().equals("unknown") && !str.trim().equals("?") && !str.trim().equals("??") && !str.trim().equals(".") && !str.trim().equals("null") && !str.trim().equals(StringNormalizer.nullValue) && !str.trim().equals("Null")) {
            i += str.trim().length();
        }
        return i;
    }

    private int prepareSUTParser(String str, AnnotationPipeline annotationPipeline, int i) {
        if (!str.trim().isEmpty() && !str.trim().equals("-") && !str.trim().equals("--") && !str.trim().equals("---") && !str.trim().equals("n/a") && !str.trim().equals("N/A") && !str.trim().equals("(n/a)") && !str.trim().equals("Unknown") && !str.trim().equals("unknown") && !str.trim().equals("?") && !str.trim().equals("??") && !str.trim().equals(".") && !str.trim().equals("null") && !str.trim().equals(StringNormalizer.nullValue) && !str.trim().equals("Null")) {
            Annotation annotation = new Annotation(str);
            annotation.set(CoreAnnotations.DocDateAnnotation.class, "2013-07-14");
            annotationPipeline.annotate(annotation);
            List list = (List) annotation.get(TimeAnnotations.TimexAnnotations.class);
            if (list != null && !list.isEmpty()) {
                i++;
            }
        }
        return i;
    }

    private void validateSUTParser(int i, int i2) {
        if (i > i2 / 2) {
            setIsDateorTime(true);
        } else {
            setIsDateorTime(false);
        }
    }

    private int prepareBooleanValue(String str, int i) {
        if (!str.trim().isEmpty() && !str.trim().equals("-") && !str.trim().equals("--") && !str.trim().equals("---") && !str.trim().equals("n/a") && !str.trim().equals("N/A") && !str.trim().equals("(n/a)") && !str.trim().equals("Unknown") && !str.trim().equals("unknown") && !str.trim().equals("?") && !str.trim().equals("??") && !str.trim().equals(".") && !str.trim().equals("null") && !str.trim().equals(StringNormalizer.nullValue) && !str.trim().equals("Null") && (str.trim().equals("yes") || str.trim().equals("Yes") || str.trim().equals("YES") || str.trim().equals("no") || str.trim().equals("No") || str.trim().equals("NO") || str.trim().equals("1") || str.trim().equals("0") || str.trim().equals("true") || str.trim().equals("True") || str.trim().equals("TRUE") || str.trim().equals("false") || str.trim().equals("False") || str.trim().equals("FALSE"))) {
            i++;
        }
        return i;
    }

    private void validateBooleanValue(int i, int i2) {
        if (i > i2 / 2) {
            setIsBooleanValue(true);
        } else {
            setIsBooleanValue(false);
        }
    }
}
