package de.uni_mannheim.informatik.dws.winter.usecase.movies;

import de.uni_mannheim.informatik.dws.winter.matching.MatchingEngine;
import de.uni_mannheim.informatik.dws.winter.matching.MatchingEvaluator;
import de.uni_mannheim.informatik.dws.winter.matching.algorithms.RuleLearner;
import de.uni_mannheim.informatik.dws.winter.matching.blockers.StandardRecordBlocker;
import de.uni_mannheim.informatik.dws.winter.matching.blockers.generators.StaticBlockingKeyGenerator;
import de.uni_mannheim.informatik.dws.winter.matching.rules.LinearCombinationMatchingRule;
import de.uni_mannheim.informatik.dws.winter.model.Correspondence;
import de.uni_mannheim.informatik.dws.winter.model.HashedDataSet;
import de.uni_mannheim.informatik.dws.winter.model.MatchingGoldStandard;
import de.uni_mannheim.informatik.dws.winter.model.Performance;
import de.uni_mannheim.informatik.dws.winter.model.defaultmodel.Attribute;
import de.uni_mannheim.informatik.dws.winter.model.defaultmodel.RecordCSVFormatter;
import de.uni_mannheim.informatik.dws.winter.model.io.CSVCorrespondenceFormatter;
import de.uni_mannheim.informatik.dws.winter.processing.Processable;
import de.uni_mannheim.informatik.dws.winter.usecase.movies.identityresolution.MovieBlockingKeyByDecadeGenerator;
import de.uni_mannheim.informatik.dws.winter.usecase.movies.identityresolution.MovieBlockingKeyByYearGenerator;
import de.uni_mannheim.informatik.dws.winter.usecase.movies.identityresolution.MovieDateComparator10Years;
import de.uni_mannheim.informatik.dws.winter.usecase.movies.identityresolution.MovieTitleComparatorEqual;
import de.uni_mannheim.informatik.dws.winter.usecase.movies.identityresolution.MovieTitleComparatorJaccard;
import de.uni_mannheim.informatik.dws.winter.usecase.movies.identityresolution.MovieTitleComparatorLevenshtein;
import de.uni_mannheim.informatik.dws.winter.usecase.movies.model.Movie;
import de.uni_mannheim.informatik.dws.winter.usecase.movies.model.MovieXMLReader;
import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;

/* loaded from: input_file:de/uni_mannheim/informatik/dws/winter/usecase/movies/Movies_IdentityResolution_Main.class */
public class Movies_IdentityResolution_Main {
    public static void main(String[] strArr) throws Exception {
        HashedDataSet hashedDataSet = new HashedDataSet();
        new MovieXMLReader().loadFromXML(new File("usecase/movie/input/academy_awards.xml"), "/movies/movie", hashedDataSet);
        HashedDataSet hashedDataSet2 = new HashedDataSet();
        new MovieXMLReader().loadFromXML(new File("usecase/movie/input/actors.xml"), "/movies/movie", hashedDataSet2);
        LinearCombinationMatchingRule linearCombinationMatchingRule = new LinearCombinationMatchingRule(0.7d);
        linearCombinationMatchingRule.addComparator(new MovieTitleComparatorJaccard(), 0.8d);
        linearCombinationMatchingRule.addComparator(new MovieDateComparator10Years(), 0.2d);
        Processable runIdentityResolution = new MatchingEngine().runIdentityResolution(hashedDataSet, hashedDataSet2, null, linearCombinationMatchingRule, new StandardRecordBlocker(new MovieBlockingKeyByDecadeGenerator()));
        new CSVCorrespondenceFormatter().writeCSV(new File("usecase/movie/output/academy_awards_2_actors_correspondences.csv"), runIdentityResolution);
        MatchingGoldStandard matchingGoldStandard = new MatchingGoldStandard();
        matchingGoldStandard.loadFromCSVFile(new File("usecase/movie/goldstandard/gs_academy_awards_2_actors_v2.csv"));
        Performance evaluateMatching = new MatchingEvaluator(true).evaluateMatching(runIdentityResolution.get(), matchingGoldStandard);
        System.out.println("Academy Awards <-> Actors");
        System.out.println(String.format("Precision: %.4f\nRecall: %.4f\nF1: %.4f", Double.valueOf(evaluateMatching.getPrecision()), Double.valueOf(evaluateMatching.getRecall()), Double.valueOf(evaluateMatching.getF1())));
    }

    public static void createDatasetToTrain() throws Exception {
        HashedDataSet hashedDataSet = new HashedDataSet();
        new MovieXMLReader().loadFromXML(new File("usecase/movie/input/academy_awards.xml"), "/movies/movie", hashedDataSet);
        HashedDataSet hashedDataSet2 = new HashedDataSet();
        new MovieXMLReader().loadFromXML(new File("usecase/movie/input/actors.xml"), "/movies/movie", hashedDataSet2);
        MatchingGoldStandard matchingGoldStandard = new MatchingGoldStandard();
        matchingGoldStandard.loadFromCSVFile(new File("usecase/movie/goldstandard/gs_academy_awards_2_actors.csv"));
        LinearCombinationMatchingRule linearCombinationMatchingRule = new LinearCombinationMatchingRule(0.0d);
        linearCombinationMatchingRule.addComparator(new MovieTitleComparatorLevenshtein(), 0.5d);
        linearCombinationMatchingRule.addComparator(new MovieDateComparator10Years(), 0.5d);
        new RecordCSVFormatter().writeCSV(new File("usecase/movie/output/optimisation/academy_awards_2_actors_features.csv"), new RuleLearner().generateTrainingDataForLearning(hashedDataSet, hashedDataSet2, matchingGoldStandard, linearCombinationMatchingRule, null));
    }

    public static void firstMatching() throws Exception {
        HashedDataSet hashedDataSet = new HashedDataSet();
        new MovieXMLReader().loadFromXML(new File("usecase/movie/input/academy_awards.xml"), "/movies/movie", hashedDataSet);
        HashedDataSet hashedDataSet2 = new HashedDataSet();
        new MovieXMLReader().loadFromXML(new File("usecase/movie/input/actors.xml"), "/movies/movie", hashedDataSet2);
        LinearCombinationMatchingRule linearCombinationMatchingRule = new LinearCombinationMatchingRule(0.0d);
        linearCombinationMatchingRule.addComparator(new MovieTitleComparatorEqual(), 1.0d);
        linearCombinationMatchingRule.addComparator(new MovieDateComparator10Years(), 1.0d);
        linearCombinationMatchingRule.normalizeWeights();
        Processable runIdentityResolution = new MatchingEngine().runIdentityResolution(hashedDataSet, hashedDataSet2, null, linearCombinationMatchingRule, new StandardRecordBlocker(new StaticBlockingKeyGenerator()));
        new CSVCorrespondenceFormatter().writeCSV(new File("usecase/movie/output/academy_awards_2_actors_correspondences.csv"), runIdentityResolution);
        MatchingGoldStandard matchingGoldStandard = new MatchingGoldStandard();
        matchingGoldStandard.loadFromCSVFile(new File("usecase/movie/goldstandard/gs_academy_awards_2_actors_test.csv"));
        Performance evaluateMatching = new MatchingEvaluator(true).evaluateMatching(runIdentityResolution.get(), matchingGoldStandard);
        System.out.println("Academy Awards <-> Actors");
        System.out.println(String.format("Precision: %.4f\nRecall: %.4f\nF1: %.4f", Double.valueOf(evaluateMatching.getPrecision()), Double.valueOf(evaluateMatching.getRecall()), Double.valueOf(evaluateMatching.getF1())));
    }

    public static void runWhole() throws Exception {
        LinearCombinationMatchingRule linearCombinationMatchingRule = new LinearCombinationMatchingRule(-1.497d, 0.5d);
        linearCombinationMatchingRule.addComparator(new MovieTitleComparatorLevenshtein(), 1.849d);
        linearCombinationMatchingRule.addComparator(new MovieDateComparator10Years(), 0.822d);
        StandardRecordBlocker standardRecordBlocker = new StandardRecordBlocker(new MovieBlockingKeyByYearGenerator());
        MatchingEngine matchingEngine = new MatchingEngine();
        HashedDataSet hashedDataSet = new HashedDataSet();
        HashedDataSet hashedDataSet2 = new HashedDataSet();
        HashedDataSet hashedDataSet3 = new HashedDataSet();
        new MovieXMLReader().loadFromXML(new File("usecase/movie/input/academy_awards.xml"), "/movies/movie", hashedDataSet);
        new MovieXMLReader().loadFromXML(new File("usecase/movie/input/actors.xml"), "/movies/movie", hashedDataSet2);
        new MovieXMLReader().loadFromXML(new File("usecase/movie/input/golden_globes.xml"), "/movies/movie", hashedDataSet3);
        Processable runIdentityResolution = matchingEngine.runIdentityResolution(hashedDataSet, hashedDataSet2, null, linearCombinationMatchingRule, standardRecordBlocker);
        Processable runIdentityResolution2 = matchingEngine.runIdentityResolution(hashedDataSet2, hashedDataSet3, null, linearCombinationMatchingRule, standardRecordBlocker);
        new CSVCorrespondenceFormatter().writeCSV(new File("usecase/movie/output/academy_awards_2_actors_correspondences.csv"), runIdentityResolution);
        new CSVCorrespondenceFormatter().writeCSV(new File("usecase/movie/output/actors_2_golden_globes_correspondences.csv"), runIdentityResolution2);
        printCorrespondences(new ArrayList(runIdentityResolution2.get()));
        MatchingGoldStandard matchingGoldStandard = new MatchingGoldStandard();
        matchingGoldStandard.loadFromCSVFile(new File("usecase/movie/goldstandard/gs_academy_awards_2_actors.csv"));
        new RecordCSVFormatter().writeCSV(new File("usecase/movie/output/optimisation/academy_awards_2_actors_features.csv"), new RuleLearner().generateTrainingDataForLearning(hashedDataSet, hashedDataSet2, matchingGoldStandard, linearCombinationMatchingRule, null));
        MatchingGoldStandard matchingGoldStandard2 = new MatchingGoldStandard();
        matchingGoldStandard2.loadFromCSVFile(new File("usecase/movie/goldstandard/gs_academy_awards_2_actors_test.csv"));
        MatchingGoldStandard matchingGoldStandard3 = new MatchingGoldStandard();
        matchingGoldStandard3.loadFromCSVFile(new File("usecase/movie/goldstandard/gs_actors_2_golden_globes.csv"));
        MatchingEvaluator matchingEvaluator = new MatchingEvaluator(true);
        Performance evaluateMatching = matchingEvaluator.evaluateMatching(runIdentityResolution.get(), matchingGoldStandard2);
        Performance evaluateMatching2 = matchingEvaluator.evaluateMatching(runIdentityResolution2.get(), matchingGoldStandard3);
        System.out.println("Academy Awards <-> Actors");
        System.out.println(String.format("Precision: %.4f\nRecall: %.4f\nF1: %.4f", Double.valueOf(evaluateMatching.getPrecision()), Double.valueOf(evaluateMatching.getRecall()), Double.valueOf(evaluateMatching.getF1())));
        System.out.println("Actors <-> Golden Globes");
        System.out.println(String.format("Precision: %.4f\nRecall: %.4f\nF1: %.4f", Double.valueOf(evaluateMatching2.getPrecision()), Double.valueOf(evaluateMatching2.getRecall()), Double.valueOf(evaluateMatching2.getF1())));
    }

    private static void printCorrespondences(List<Correspondence<Movie, Attribute>> list) {
        Collections.sort(list, new Comparator<Correspondence<Movie, Attribute>>() { // from class: de.uni_mannheim.informatik.dws.winter.usecase.movies.Movies_IdentityResolution_Main.1
            @Override // java.util.Comparator
            public int compare(Correspondence<Movie, Attribute> correspondence, Correspondence<Movie, Attribute> correspondence2) {
                int compare = Double.compare(correspondence.getSimilarityScore(), correspondence2.getSimilarityScore());
                return compare != 0 ? -compare : correspondence.getFirstRecord().getTitle().compareTo(correspondence2.getFirstRecord().getTitle());
            }
        });
        for (Correspondence<Movie, Attribute> correspondence : list) {
            System.out.println(String.format("%s,%s,|\t\t%.2f\t[%s] %s (%s) <--> [%s] %s (%s)", correspondence.getFirstRecord().getIdentifier(), correspondence.getSecondRecord().getIdentifier(), Double.valueOf(correspondence.getSimilarityScore()), correspondence.getFirstRecord().getIdentifier(), correspondence.getFirstRecord().getTitle(), correspondence.getFirstRecord().getDate().toString("YYYY-MM-DD"), correspondence.getSecondRecord().getIdentifier(), correspondence.getSecondRecord().getTitle(), correspondence.getSecondRecord().getDate().toString("YYYY-MM-DD")));
        }
    }
}
