package edu.berkeley.cs.nlp.ocular.eval;

import edu.berkeley.cs.nlp.ocular.data.Document;
import edu.berkeley.cs.nlp.ocular.eval.Evaluator;
import edu.berkeley.cs.nlp.ocular.font.Font;
import edu.berkeley.cs.nlp.ocular.gsm.GlyphSubstitutionModel;
import edu.berkeley.cs.nlp.ocular.lm.CodeSwitchLanguageModel;
import edu.berkeley.cs.nlp.ocular.main.FonttrainTranscribeShared;
import edu.berkeley.cs.nlp.ocular.model.CharacterTemplate;
import edu.berkeley.cs.nlp.ocular.model.DecodeState;
import edu.berkeley.cs.nlp.ocular.model.DecoderEM;
import edu.berkeley.cs.nlp.ocular.model.em.DenseBigramTransitionModel;
import edu.berkeley.cs.nlp.ocular.train.FontTrainer;
import edu.berkeley.cs.nlp.ocular.util.Tuple2;
import java.io.File;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.List;
import java.util.Map;
import java.util.Set;
import tberg.murphy.indexer.Indexer;

/* loaded from: input_file:main/ocular_2.12-0.3-SNAPSHOT.jar:edu/berkeley/cs/nlp/ocular/eval/BasicMultiDocumentTranscriber.class */
public class BasicMultiDocumentTranscriber implements MultiDocumentTranscriber {
    private List<Document> documents;
    private String inputDocPath;
    private String outputPath;
    private Set<FonttrainTranscribeShared.OutputFormat> outputFormats;
    private DecoderEM decoderEM;
    private SingleDocumentEvaluatorAndOutputPrinter docOutputPrinterAndEvaluator;
    private Indexer<String> charIndexer;
    private boolean skipFailedDocs;

    public BasicMultiDocumentTranscriber(List<Document> list, String str, String str2, Set<FonttrainTranscribeShared.OutputFormat> set, DecoderEM decoderEM, SingleDocumentEvaluatorAndOutputPrinter singleDocumentEvaluatorAndOutputPrinter, Indexer<String> indexer, boolean z) {
        this.documents = list;
        this.inputDocPath = str;
        this.outputPath = str2;
        this.outputFormats = set;
        this.decoderEM = decoderEM;
        this.docOutputPrinterAndEvaluator = singleDocumentEvaluatorAndOutputPrinter;
        this.charIndexer = indexer;
        this.skipFailedDocs = z;
    }

    @Override // edu.berkeley.cs.nlp.ocular.eval.MultiDocumentTranscriber
    public void transcribe(Font font, CodeSwitchLanguageModel codeSwitchLanguageModel, GlyphSubstitutionModel glyphSubstitutionModel) {
        transcribe(0, 0, font, codeSwitchLanguageModel, glyphSubstitutionModel);
    }

    @Override // edu.berkeley.cs.nlp.ocular.eval.MultiDocumentTranscriber
    public void transcribe(int i, int i2, Font font, CodeSwitchLanguageModel codeSwitchLanguageModel, GlyphSubstitutionModel glyphSubstitutionModel) {
        int size = this.documents.size();
        CharacterTemplate[] loadTemplates = FontTrainer.loadTemplates(font, this.charIndexer);
        DenseBigramTransitionModel denseBigramTransitionModel = new DenseBigramTransitionModel(codeSwitchLanguageModel);
        double d = 0.0d;
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        for (int i3 = 0; i3 < size; i3++) {
            Document document = this.documents.get(i3);
            System.out.println((i > 0 ? "Training iteration " + i + ", " : "") + (i2 > 0 ? "batch " + i2 + ", " : "") + "Transcribing eval document " + (i3 + 1) + " of " + size + ":  " + document.baseName() + "    " + new SimpleDateFormat("yyyy/MM/dd HH:mm:ss").format(Calendar.getInstance().getTime()));
            try {
                Tuple2<DecodeState[][], Double> computeEStep = this.decoderEM.computeEStep(document, false, codeSwitchLanguageModel, glyphSubstitutionModel, loadTemplates, denseBigramTransitionModel);
                DecodeState[][] decodeStateArr = computeEStep._1;
                d += computeEStep._2.doubleValue();
                Tuple2<Map<String, Evaluator.EvalSuffStats>, Map<String, Evaluator.EvalSuffStats>> evaluateAndPrintTranscription = this.docOutputPrinterAndEvaluator.evaluateAndPrintTranscription(i, i2, document, decodeStateArr, this.inputDocPath, this.outputPath, this.outputFormats, codeSwitchLanguageModel);
                if (evaluateAndPrintTranscription._1 != null) {
                    arrayList.add(Tuple2.Tuple2(document.baseName(), evaluateAndPrintTranscription._1));
                }
                if (evaluateAndPrintTranscription._2 != null) {
                    arrayList2.add(Tuple2.Tuple2(document.baseName(), evaluateAndPrintTranscription._2));
                }
            } catch (RuntimeException e) {
                if (!this.skipFailedDocs) {
                    throw e;
                }
                System.err.println("DOCUMENT FAILED! Skipping " + document.baseName());
                e.printStackTrace();
            }
        }
        System.out.println("Iteration " + i + ", batch " + i2 + ": eval avg joint log prob: " + (d / size));
        if (new File(this.inputDocPath).isDirectory()) {
            String str = this.outputPath + "/all_transcriptions/" + new File(this.inputDocPath).getName() + "/eval";
            if (i > 0) {
                str = str + "_iter-" + i;
            }
            if (i2 > 0) {
                str = str + "_batch-" + i2;
            }
            if (!arrayList.isEmpty()) {
                EvalPrinter.printEvaluation(arrayList, str + "_diplomatic.txt");
            }
            if (arrayList2.isEmpty()) {
                return;
            }
            EvalPrinter.printEvaluation(arrayList2, str + "_normalized.txt");
        }
    }
}
