package de.uni_mannheim.informatik.dws.winter.datafusion;

import au.com.bytecode.opencsv.CSVReader;
import au.com.bytecode.opencsv.CSVWriter;
import de.uni_mannheim.informatik.dws.winter.clustering.ConnectedComponentClusterer;
import de.uni_mannheim.informatik.dws.winter.model.Correspondence;
import de.uni_mannheim.informatik.dws.winter.model.Fusible;
import de.uni_mannheim.informatik.dws.winter.model.FusibleDataSet;
import de.uni_mannheim.informatik.dws.winter.model.Matchable;
import de.uni_mannheim.informatik.dws.winter.model.RecordGroup;
import de.uni_mannheim.informatik.dws.winter.model.RecordGroupFactory;
import de.uni_mannheim.informatik.dws.winter.model.Triple;
import de.uni_mannheim.informatik.dws.winter.processing.Processable;
import de.uni_mannheim.informatik.dws.winter.utils.query.Q;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;

/* loaded from: input_file:de/uni_mannheim/informatik/dws/winter/datafusion/CorrespondenceSet.class */
public class CorrespondenceSet<RecordType extends Matchable & Fusible<SchemaElementType>, SchemaElementType extends Matchable> {
    private Collection<RecordGroup<RecordType, SchemaElementType>> groups = new LinkedList();
    private Map<String, RecordGroup<RecordType, SchemaElementType>> recordIndex = new HashMap();
    private RecordGroupFactory<RecordType, SchemaElementType> groupFactory = new RecordGroupFactory<>();

    public void setGroupFactory(RecordGroupFactory<RecordType, SchemaElementType> recordGroupFactory) {
        this.groupFactory = recordGroupFactory;
    }

    public void loadCorrespondences(File file, FusibleDataSet<RecordType, SchemaElementType> fusibleDataSet, FusibleDataSet<RecordType, SchemaElementType> fusibleDataSet2) throws IOException {
        CSVReader cSVReader = new CSVReader(new FileReader(file));
        while (true) {
            String[] readNext = cSVReader.readNext();
            if (readNext == null) {
                cSVReader.close();
                return;
            }
            if (fusibleDataSet.getRecord(readNext[0]) == 0) {
                System.err.println(String.format("Record %s not found in first dataset", readNext[0]));
            } else if (fusibleDataSet2.getRecord(readNext[1]) == 0) {
                System.err.println(String.format("Record %s not found in second dataset", readNext[0]));
            } else {
                RecordGroup<RecordType, SchemaElementType> recordGroup = this.recordIndex.get(readNext[0]);
                RecordGroup<RecordType, SchemaElementType> recordGroup2 = this.recordIndex.get(readNext[1]);
                if (recordGroup == null && recordGroup2 == null) {
                    RecordGroup<RecordType, SchemaElementType> createRecordGroup = this.groupFactory.createRecordGroup();
                    createRecordGroup.addRecord(readNext[0], fusibleDataSet);
                    createRecordGroup.addRecord(readNext[1], fusibleDataSet2);
                    this.recordIndex.put(readNext[0], createRecordGroup);
                    this.recordIndex.put(readNext[1], createRecordGroup);
                    this.groups.add(createRecordGroup);
                } else if (recordGroup != null && recordGroup2 == null) {
                    recordGroup.addRecord(readNext[1], fusibleDataSet2);
                    this.recordIndex.put(readNext[1], recordGroup);
                } else if (recordGroup != null || recordGroup2 == null) {
                    recordGroup.mergeWith(recordGroup2);
                    Iterator<String> it = recordGroup2.getRecordIds().iterator();
                    while (it.hasNext()) {
                        this.recordIndex.put(it.next(), recordGroup);
                    }
                } else {
                    recordGroup2.addRecord(readNext[0], fusibleDataSet);
                    this.recordIndex.put(readNext[0], recordGroup2);
                }
            }
        }
    }

    public void loadCorrespondences(File file, FusibleDataSet<RecordType, SchemaElementType> fusibleDataSet) throws IOException {
        CSVReader cSVReader = new CSVReader(new FileReader(file));
        int i = 0;
        while (true) {
            String[] readNext = cSVReader.readNext();
            if (readNext == null) {
                break;
            }
            if (fusibleDataSet.getRecord(readNext[0]) == 0) {
                i++;
            } else {
                RecordGroup<RecordType, SchemaElementType> recordGroup = this.recordIndex.get(readNext[1]);
                if (recordGroup == null) {
                    RecordGroup<RecordType, SchemaElementType> createRecordGroup = this.groupFactory.createRecordGroup();
                    createRecordGroup.addRecord(readNext[0], fusibleDataSet);
                    this.recordIndex.put(readNext[1], createRecordGroup);
                    this.groups.add(createRecordGroup);
                } else {
                    recordGroup.addRecord(readNext[0], fusibleDataSet);
                    this.recordIndex.put(readNext[0], recordGroup);
                }
            }
        }
        cSVReader.close();
        if (i > 0) {
            System.err.println(String.format("Skipped %,d records (not found in provided dataset)", Integer.valueOf(i)));
        }
    }

    public void createFromCorrespondences(Processable<Correspondence<RecordType, Matchable>> processable, FusibleDataSet<RecordType, SchemaElementType> fusibleDataSet, FusibleDataSet<RecordType, SchemaElementType> fusibleDataSet2) {
        HashMap hashMap = new HashMap();
        ConnectedComponentClusterer connectedComponentClusterer = new ConnectedComponentClusterer();
        for (Correspondence<RecordType, Matchable> correspondence : processable.get()) {
            connectedComponentClusterer.addEdge(new Triple(correspondence.getFirstRecord(), correspondence.getSecondRecord(), Double.valueOf(correspondence.getSimilarityScore())));
            hashMap.put(correspondence.getFirstRecord().getIdentifier(), fusibleDataSet);
            hashMap.put(correspondence.getSecondRecord().getIdentifier(), fusibleDataSet2);
        }
        for (Collection<Matchable> collection : connectedComponentClusterer.createResult().keySet()) {
            RecordGroup<RecordType, SchemaElementType> createRecordGroup = this.groupFactory.createRecordGroup();
            for (Matchable matchable : collection) {
                createRecordGroup.addRecord(matchable.getIdentifier(), (FusibleDataSet) hashMap.get(matchable.getIdentifier()));
                this.recordIndex.put(matchable.getIdentifier(), createRecordGroup);
            }
            this.groups.add(createRecordGroup);
        }
    }

    public Collection<RecordGroup<RecordType, SchemaElementType>> getRecordGroups() {
        return this.groups;
    }

    public void writeGroupSizeDistribution(File file) throws IOException {
        HashMap hashMap = new HashMap();
        Iterator<RecordGroup<RecordType, SchemaElementType>> it = this.groups.iterator();
        while (it.hasNext()) {
            int size = it.next().getSize();
            Integer num = (Integer) hashMap.get(Integer.valueOf(size));
            if (num == null) {
                num = 0;
            }
            hashMap.put(Integer.valueOf(size), Integer.valueOf(num.intValue() + 1));
        }
        CSVWriter cSVWriter = new CSVWriter(new FileWriter(file));
        cSVWriter.writeNext(new String[]{"Group Size", "Frequency"});
        Iterator it2 = hashMap.keySet().iterator();
        while (it2.hasNext()) {
            int intValue = ((Integer) it2.next()).intValue();
            cSVWriter.writeNext(new String[]{Integer.toString(intValue), Integer.toString(((Integer) hashMap.get(Integer.valueOf(intValue))).intValue())});
        }
        cSVWriter.close();
    }

    public void printGroupSizeDistribution() throws IOException {
        HashMap hashMap = new HashMap();
        Iterator<RecordGroup<RecordType, SchemaElementType>> it = this.groups.iterator();
        while (it.hasNext()) {
            int size = it.next().getSize();
            Integer num = (Integer) hashMap.get(Integer.valueOf(size));
            if (num == null) {
                num = 0;
            }
            hashMap.put(Integer.valueOf(size), Integer.valueOf(num.intValue() + 1));
        }
        System.out.println("Group Size Distribtion of " + this.groups.size() + " groups:");
        System.out.println("\tGroup Size | Frequency ");
        System.out.println("\t———————————————————————");
        Iterator it2 = Q.sort(hashMap.keySet()).iterator();
        while (it2.hasNext()) {
            int intValue = ((Integer) it2.next()).intValue();
            String num2 = Integer.toString(intValue);
            System.out.print("\t");
            for (int i = 0; i < 10 - num2.length(); i++) {
                System.out.print(" ");
            }
            System.out.print(num2);
            System.out.print(" |");
            String num3 = Integer.toString(((Integer) hashMap.get(Integer.valueOf(intValue))).intValue());
            for (int i2 = 0; i2 < 10 - num3.length(); i2++) {
                System.out.print(" ");
            }
            System.out.println(num3);
        }
    }
}
