package de.tudarmstadt.ukp.wikipedia.parser.statistics;

import de.tudarmstadt.ukp.wikipedia.api.DatabaseConfiguration;
import de.tudarmstadt.ukp.wikipedia.api.Page;
import de.tudarmstadt.ukp.wikipedia.api.WikiConstants;
import de.tudarmstadt.ukp.wikipedia.api.Wikipedia;
import de.tudarmstadt.ukp.wikipedia.parser.ParsedPage;
import de.tudarmstadt.ukp.wikipedia.parser.SectionContainer;
import de.tudarmstadt.ukp.wikipedia.parser.Table;
import de.tudarmstadt.ukp.wikipedia.parser.TableElement;
import de.tudarmstadt.ukp.wikipedia.parser.Template;
import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParser;
import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParserFactory;
import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.ShowTemplateNamesAndParameters;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import org.hibernate.hql.internal.classic.ParserHelper;

/* loaded from: input_file:de/tudarmstadt/ukp/wikipedia/parser/statistics/Statistics.class */
public class Statistics {
    public static final String path = "./data/parsedpage/statistics/";
    static long nrOfPages;
    static int nrOfTables;
    static int nrOfTemplates;
    static int nrOfAnalyzedPages;
    static List<Integer> templateNrOfOccurence;
    static List<String> templateNameOfFirstOccurence;
    static List<String> templateNames;
    static List<String> pagesWithTableSections;
    static final int skipPages = 0;
    static final long offsetTime = 0;
    static final boolean debug = false;

    public static void main(String[] strArr) throws Exception {
        DatabaseConfiguration databaseConfiguration = new DatabaseConfiguration();
        databaseConfiguration.setDatabase("wikiapi_en");
        databaseConfiguration.setHost("bender.ukp.informatik.tu-darmstadt.de");
        databaseConfiguration.setUser("student");
        databaseConfiguration.setPassword("student");
        databaseConfiguration.setLanguage(WikiConstants.Language.english);
        Wikipedia wikipedia = new Wikipedia(databaseConfiguration);
        MediaWikiParserFactory mediaWikiParserFactory = new MediaWikiParserFactory();
        mediaWikiParserFactory.setTemplateParserClass(ShowTemplateNamesAndParameters.class);
        mediaWikiParserFactory.setShowImageText(true);
        mediaWikiParserFactory.setShowMathTagContent(true);
        mediaWikiParserFactory.setDeleteTags(false);
        mediaWikiParserFactory.getImageIdentifers().add("IMAGE");
        mediaWikiParserFactory.setCalculateSrcSpans(false);
        MediaWikiParser createParser = mediaWikiParserFactory.createParser();
        nrOfPages = wikipedia.getMetaData().getNumberOfPages();
        nrOfTables = 0;
        nrOfTemplates = 0;
        templateNames = new ArrayList();
        templateNameOfFirstOccurence = new ArrayList();
        templateNrOfOccurence = new ArrayList();
        pagesWithTableSections = new ArrayList();
        long time = new Date().getTime();
        nrOfAnalyzedPages = 0;
        System.out.println("ANALYSING ...");
        for (Page page : wikipedia.getArticles()) {
            nrOfAnalyzedPages++;
            if (nrOfAnalyzedPages < 1) {
                System.out.println("Skipped: " + page.getPageId());
            } else {
                if (nrOfAnalyzedPages % 1024 == 0) {
                    long time2 = (new Date().getTime() - time) + 0;
                    System.out.println(percentString(nrOfAnalyzedPages, nrOfPages) + " -> " + nrOfAnalyzedPages + " of " + nrOfPages + " pages in " + (time2 / 1000) + "sec -> " + ((((time2 * nrOfPages) / nrOfAnalyzedPages) - time2) / 60000) + "min left");
                }
                String plainTitle = page.getTitle().getPlainTitle();
                ParsedPage parse = createParser.parse(page.getText());
                if (parse != null) {
                    parse.setName(plainTitle);
                    for (Template template : parse.getTemplates()) {
                        nrOfTemplates++;
                        String lowerCase = template.getName().toLowerCase();
                        if (lowerCase.startsWith("vorlage:")) {
                            lowerCase = lowerCase.substring(8);
                        } else if (lowerCase.startsWith("template:")) {
                            lowerCase = lowerCase.substring(9);
                        }
                        int indexOf = templateNames.indexOf(lowerCase);
                        if (indexOf != -1) {
                            templateNrOfOccurence.set(indexOf, Integer.valueOf(templateNrOfOccurence.get(indexOf).intValue() + 1));
                        } else {
                            templateNrOfOccurence.add(1);
                            templateNames.add(lowerCase);
                            templateNameOfFirstOccurence.add(parse.getName());
                            new ArrayList().add(parse.getName());
                        }
                    }
                    if (parse.nrOfTables() != 0) {
                        nrOfTables++;
                    }
                    boolean z = true;
                    for (Table table : parse.getTables()) {
                        if (z) {
                            for (int i = 0; i < table.nrOfTableElements(); i++) {
                                TableElement tableElement = table.getTableElement(i);
                                if (tableElement.nrOfSections() > 1 || tableElement.getSection(0).getClass() == SectionContainer.class) {
                                    pagesWithTableSections.add(parse.getName());
                                    z = false;
                                    break;
                                }
                            }
                        }
                    }
                }
            }
        }
        System.out.println("Finished.");
        sortTemplates();
        writeFiles("statistics");
        restructureTemplateNames();
        sortTemplates();
        writeTemplates("statistics.restructured");
        System.out.println("check the Results ;-)\nnow...");
    }

    private static void sortTemplates() {
        System.out.println("Sort Template List");
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        for (int i = 0; i < templateNrOfOccurence.size(); i++) {
            int intValue = templateNrOfOccurence.get(i).intValue();
            int i2 = 0;
            while (i2 < arrayList2.size() && intValue < ((Integer) arrayList2.get(i2)).intValue()) {
                i2++;
            }
            arrayList2.add(i2, Integer.valueOf(intValue));
            arrayList.add(i2, templateNames.get(i));
            arrayList3.add(i2, templateNameOfFirstOccurence.get(i));
        }
        templateNames = arrayList;
        templateNrOfOccurence = arrayList2;
        templateNameOfFirstOccurence = arrayList3;
    }

    private static void writeFiles(String str) throws IOException {
        System.out.print("writeFiles() " + str);
        writeTemplates(str);
        wirteTables(str);
    }

    private static void writeTemplates(String str) throws IOException {
        BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(path + str + ".template"));
        bufferedWriter.write("Analyzed Pages: " + nrOfAnalyzedPages + "\n\n");
        bufferedWriter.write("Found " + nrOfTemplates + " Templates\n");
        bufferedWriter.write("Found " + templateNames.size() + " different Templates\n\n");
        int i = 0;
        for (int i2 = 0; i2 < templateNames.size(); i2++) {
            int intValue = templateNrOfOccurence.get(i2).intValue();
            i += intValue;
            bufferedWriter.write(intValue + " x {{" + templateNames.get(i2) + "}}");
            bufferedWriter.write(" @" + templateNameOfFirstOccurence.get(i2));
            bufferedWriter.write(" sum=" + i);
            bufferedWriter.write("\n");
        }
        bufferedWriter.close();
    }

    private static void wirteTables(String str) throws IOException {
        BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(path + str + ".table"));
        int size = pagesWithTableSections.size();
        bufferedWriter.write("Analyzed Pages: " + nrOfAnalyzedPages + "\n\nFound " + nrOfTables + " Tables\n-> " + percentString(nrOfTables, nrOfAnalyzedPages) + " @Pages\n\nFound " + size + " Sections in Tables\n-> " + percentString(size, nrOfTables) + " @Tables\n-> " + percentString(size, nrOfAnalyzedPages) + " @Pages\n\n");
        bufferedWriter.write("-=Pages with Tables and Sections---------------------------------------------------\n");
        Iterator<String> it = pagesWithTableSections.iterator();
        while (it.hasNext()) {
            bufferedWriter.write(it.next() + "\n");
        }
        bufferedWriter.close();
        System.out.println(" --> OK");
    }

    private static void restructureTemplateNames() {
        System.out.println("restructure Template Names");
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        for (int i = 0; i < templateNames.size(); i++) {
            String str = templateNames.get(i);
            int indexOf = str.indexOf(32);
            int indexOf2 = str.indexOf(95);
            if (indexOf == -1 || (indexOf2 != -1 && indexOf2 < indexOf)) {
                indexOf = indexOf2;
            }
            if (indexOf != -1) {
                str = str.substring(0, indexOf);
            }
            int indexOf3 = arrayList.indexOf(str);
            if (indexOf3 != -1) {
                arrayList2.set(indexOf3, Integer.valueOf(((Integer) arrayList2.get(indexOf3)).intValue() + templateNrOfOccurence.get(i).intValue()));
            } else {
                arrayList.add(str);
                arrayList2.add(templateNrOfOccurence.get(i));
                arrayList3.add(templateNameOfFirstOccurence.get(i));
            }
        }
        templateNames = arrayList;
        templateNrOfOccurence = arrayList2;
        templateNameOfFirstOccurence = arrayList3;
    }

    private static String percentString(long j, long j2) {
        long j3 = j2 > 0 ? (j * 10000) / j2 : 0L;
        return (j3 / 100) + ParserHelper.PATH_SEPARATORS + ((j3 / 10) % 10) + "" + (j3 % 10) + "%";
    }
}
