package de.uni_mannheim.informatik.dws.winter.webtables.detectors;

import de.uni_mannheim.informatik.dws.winter.preprocessing.datatypes.DataType;
import de.uni_mannheim.informatik.dws.winter.utils.WinterLogManager;
import de.uni_mannheim.informatik.dws.winter.utils.query.Q;
import de.uni_mannheim.informatik.dws.winter.webtables.Table;
import de.uni_mannheim.informatik.dws.winter.webtables.TableColumn;
import de.uni_mannheim.informatik.dws.winter.webtables.TableRow;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.regex.Pattern;
import org.slf4j.Logger;

/* loaded from: input_file:de/uni_mannheim/informatik/dws/winter/webtables/detectors/TableKeyIdentification.class */
public class TableKeyIdentification {
    private double keyUniquenessThreshold;
    private static final Logger logger = WinterLogManager.getLogger();
    private static final Pattern prefLabelPattern = Pattern.compile("([^#]*#)?([a-z]{1,9})?prefLabel$");
    private static final Pattern namePattern = Pattern.compile("([^#]*#)?name$");
    private static final Pattern labelPattern = Pattern.compile("([^#]*#)?([a-z]{1,9})?label$");
    private static final Pattern titlePattern = Pattern.compile("([^#]*#)?([a-z]{1,9})?title$");
    private static final Pattern labelPattern2 = Pattern.compile("([^#]*#)?.*Label$");
    private static final Pattern namePattern2 = Pattern.compile("([^#]*#)?.*Name$");
    private static final Pattern titlePattern2 = Pattern.compile("([^#]*#)?.*Title$");
    private static final Pattern alternateNamePattern = Pattern.compile("([^#]*#)?([a-z]{1,9})?alternateName$");

    public double getKeyUniquenessThreshold() {
        return this.keyUniquenessThreshold;
    }

    public void setKeyUniquenessThreshold(double d) {
        this.keyUniquenessThreshold = d;
    }

    public void identifyKeys(Table table) {
        TableColumn tableColumn = null;
        ArrayList arrayList = new ArrayList(table.getColumns().size());
        ArrayList arrayList2 = new ArrayList(table.getColumns().size());
        for (int i = 0; i < table.getSchema().getSize(); i++) {
            int i2 = 0;
            int i3 = 0;
            ArrayList arrayList3 = new ArrayList(table.getSize());
            HashSet hashSet = new HashSet();
            Iterator<TableRow> it = table.getRows().iterator();
            while (it.hasNext()) {
                Object obj = it.next().get(i);
                if (obj != null) {
                    hashSet.add(obj);
                    arrayList3.add(Integer.valueOf(obj.toString().length()));
                } else {
                    i2++;
                }
                i3++;
            }
            double size = hashSet.size() / i3;
            double d = i2 / i3;
            arrayList.add(Double.valueOf(size - d));
            arrayList2.add(Double.valueOf(Q.average(arrayList3)));
            TableColumn tableColumn2 = table.getSchema().get(i);
            logger.trace(String.format("[%d]%s (%s) Uniqueness=%.4f; Nullness=%.4f; Combined=%.4f; Length=%.4f", Integer.valueOf(i), tableColumn2.getHeader(), tableColumn2.getDataType(), Double.valueOf(size), Double.valueOf(d), arrayList.get(arrayList.size() - 1), arrayList2.get(arrayList2.size() - 1)));
        }
        int size2 = table.getColumns().size() - 1;
        while (true) {
            if (size2 < 0) {
                break;
            }
            TableColumn tableColumn3 = table.getSchema().get(size2);
            if (tableColumn3.getDataType() == DataType.string) {
                if (prefLabelPattern.matcher(tableColumn3.getHeader()).matches()) {
                    tableColumn = tableColumn3;
                    break;
                }
                if (namePattern.matcher(tableColumn3.getHeader()).matches()) {
                    tableColumn = tableColumn3;
                    break;
                }
                if (labelPattern.matcher(tableColumn3.getHeader()).matches()) {
                    tableColumn = tableColumn3;
                }
                if (titlePattern.matcher(tableColumn3.getHeader()).matches()) {
                    tableColumn = tableColumn3;
                }
                if (labelPattern2.matcher(tableColumn3.getHeader()).matches()) {
                    tableColumn = tableColumn3;
                }
                if (namePattern2.matcher(tableColumn3.getHeader()).matches()) {
                    tableColumn = tableColumn3;
                }
                if (titlePattern2.matcher(tableColumn3.getHeader()).matches()) {
                    tableColumn = tableColumn3;
                }
                if (alternateNamePattern.matcher(tableColumn3.getHeader()).matches()) {
                    tableColumn = tableColumn3;
                }
            }
            size2--;
        }
        if (tableColumn != null) {
            int indexOf = table.getSchema().indexOf(tableColumn);
            if (((Double) arrayList.get(indexOf)).doubleValue() >= getKeyUniquenessThreshold() && ((Double) arrayList2.get(indexOf)).doubleValue() > 3.5d && ((Double) arrayList2.get(indexOf)).doubleValue() <= 200.0d) {
                table.setSubjectColumnIndex(indexOf);
                logger.trace(String.format("RegEx Header Match: '%s'", table.getSchema().get(indexOf).getHeader()));
                return;
            } else {
                tableColumn = null;
                logger.trace(String.format("RegEx Header Match: '%s' - insufficient", table.getSchema().get(indexOf).getHeader()));
            }
        }
        if (arrayList.isEmpty()) {
            logger.trace("no columns");
            return;
        }
        double d2 = -1.0d;
        int i4 = -1;
        for (int i5 = 0; i5 < arrayList.size(); i5++) {
            if (((Double) arrayList.get(i5)).doubleValue() > d2 && table.getSchema().get(i5).getDataType() == DataType.string && ((Double) arrayList2.get(i5)).doubleValue() > 3.5d && ((Double) arrayList2.get(i5)).doubleValue() <= 200.0d) {
                d2 = ((Double) arrayList.get(i5)).doubleValue();
                i4 = i5;
            }
        }
        if (tableColumn == null) {
            if (i4 == -1) {
                logger.trace("no columns that match criteria (data type, min length, max length)");
                return;
            }
            tableColumn = table.getSchema().get(i4);
        }
        int indexOf2 = table.getSchema().indexOf(tableColumn);
        if (((Double) arrayList.get(indexOf2)).doubleValue() < getKeyUniquenessThreshold()) {
            logger.trace(String.format("Most unique column: '%s' - insufficient (%.4f)", table.getSchema().get(indexOf2).getHeader(), arrayList.get(indexOf2)));
        } else {
            logger.trace(String.format("[TableKeyIdentification] Most unique column: '%s'", table.getSchema().get(indexOf2).getHeader()));
            table.setSubjectColumnIndex(indexOf2);
        }
    }
}
