package edu.berkeley.cs.nlp.ocular.main;

import edu.berkeley.cs.nlp.ocular.data.Document;
import edu.berkeley.cs.nlp.ocular.data.LazyRawImageLoader;
import edu.berkeley.cs.nlp.ocular.eval.BasicMultiDocumentTranscriber;
import edu.berkeley.cs.nlp.ocular.eval.BasicSingleDocumentEvaluatorAndOutputPrinter;
import edu.berkeley.cs.nlp.ocular.font.Font;
import edu.berkeley.cs.nlp.ocular.gsm.BasicGlyphSubstitutionModel;
import edu.berkeley.cs.nlp.ocular.gsm.GlyphSubstitutionModel;
import edu.berkeley.cs.nlp.ocular.lm.CodeSwitchLanguageModel;
import edu.berkeley.cs.nlp.ocular.main.FonttrainTranscribeShared;
import edu.berkeley.cs.nlp.ocular.model.DecoderEM;
import edu.berkeley.cs.nlp.ocular.train.FontTrainer;
import edu.berkeley.cs.nlp.ocular.util.FileUtil;
import java.io.File;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.jocl.CL;
import tberg.murphy.fig.Option;
import tberg.murphy.indexer.Indexer;

/* loaded from: input_file:main/ocular_2.12-0.3-SNAPSHOT.jar:edu/berkeley/cs/nlp/ocular/main/Transcribe.class */
public class Transcribe extends FonttrainTranscribeShared {

    @Option(gloss = "If true, for each doc the outputPath will be checked for an existing transcription and if one is found then the document will be skipped.")
    public static boolean skipAlreadyTranscribedDocs = false;

    @Option(gloss = "If true, an exception will be thrown if all of the input documents have already been transcribed (and thus the job has nothing to do).  Ignored unless -skipAlreadyTranscribedDocs=true.")
    public static boolean failIfAllDocsAlreadyTranscribed = false;

    @Option(gloss = "Update the font during transcription based on the new input documents?")
    public static boolean updateFont = false;

    public static void main(String[] strArr) {
        System.out.println("Transcribe");
        OcularRunnable transcribe = new Transcribe();
        transcribe.doMain(transcribe, strArr);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // edu.berkeley.cs.nlp.ocular.main.FonttrainTranscribeShared, edu.berkeley.cs.nlp.ocular.main.LineExtractionOptions, edu.berkeley.cs.nlp.ocular.main.OcularRunnable
    public void validateOptions() {
        super.validateOptions();
        if (updateFont && outputFontPath == null) {
            throw new IllegalArgumentException("-outputFontPath is required when -updateFont is true.");
        }
        if (!updateFont && outputFontPath != null) {
            throw new IllegalArgumentException("-outputFontPath not permitted when -updateFont is false.");
        }
        if (evalBatches && !updateFont) {
            throw new IllegalArgumentException("-evalBatches doesn't make sense when -updateFont is false.");
        }
        if (updateFont != (outputFontPath != null)) {
            throw new IllegalArgumentException("-updateFont is not as expected");
        }
    }

    @Override // edu.berkeley.cs.nlp.ocular.main.OcularRunnable
    public void run(List<String> list) {
        Set<FonttrainTranscribeShared.OutputFormat> parseOutputFormats = parseOutputFormats();
        CodeSwitchLanguageModel loadInputLM = loadInputLM();
        Font loadInputFont = loadInputFont();
        BasicGlyphSubstitutionModel.BasicGlyphSubstitutionModelFactory makeGsmFactory = makeGsmFactory(loadInputLM);
        GlyphSubstitutionModel loadInitialGSM = loadInitialGSM(makeGsmFactory);
        Indexer<String> characterIndexer = loadInputLM.getCharacterIndexer();
        Indexer<String> languageIndexer = loadInputLM.getLanguageIndexer();
        DecoderEM makeDecoder = makeDecoder(characterIndexer);
        BasicSingleDocumentEvaluatorAndOutputPrinter basicSingleDocumentEvaluatorAndOutputPrinter = new BasicSingleDocumentEvaluatorAndOutputPrinter(characterIndexer, languageIndexer, allowGlyphSubstitution, true, list);
        List<String> inputDocPathList = getInputDocPathList();
        List<Document> loadDocuments = LazyRawImageLoader.loadDocuments(inputDocPathList, extractedLinesPath, numDocs, numDocsToSkip, uniformLineHeight, binarizeThreshold, crop);
        if (loadDocuments.isEmpty()) {
            throw new NoDocumentsFoundException();
        }
        String lowestCommonPath = FileUtil.lowestCommonPath(inputDocPathList);
        if (skipAlreadyTranscribedDocs) {
            int size = loadDocuments.size();
            Iterator<Document> it = loadDocuments.iterator();
            while (it.hasNext()) {
                Document next = it.next();
                String diplomaticTranscriptionOutputFile = BasicSingleDocumentEvaluatorAndOutputPrinter.diplomaticTranscriptionOutputFile(BasicSingleDocumentEvaluatorAndOutputPrinter.makeOutputFilenameBase(next, lowestCommonPath, outputPath));
                if (new File(diplomaticTranscriptionOutputFile).exists()) {
                    System.out.println("  Skipping " + next.baseName() + " since it was already transcribed: [" + diplomaticTranscriptionOutputFile + "]");
                    it.remove();
                }
            }
            if (loadDocuments.isEmpty()) {
                String str = "The input path contains " + size + " documents, but all have already been transcribed, so there is nothing remaining for this job to do.  (This is due to setting -skipAlreadyTranscribedDocs=true.)";
                if (failIfAllDocsAlreadyTranscribed) {
                    throw new NoDocumentsToProcessException(str);
                }
                System.out.println("WARNING: " + str);
            }
        }
        if (outputFontPath != null) {
            new FontTrainer().doFontTrainPass(0, loadDocuments, loadInputFont, loadInputLM, loadInitialGSM, outputFontPath, outputLmPath, outputGsmPath, makeDecoder, makeGsmFactory, basicSingleDocumentEvaluatorAndOutputPrinter, 0, updateDocBatchSize > 0 ? updateDocBatchSize : loadDocuments.size(), true, false, numMstepThreads, lowestCommonPath, outputPath, parseOutputFormats, makeEvalSetEvaluator(characterIndexer, makeDecoder, basicSingleDocumentEvaluatorAndOutputPrinter), CL.CL_INT_MAX, evalBatches, skipFailedDocs);
        } else {
            System.out.println("Transcribing input data      " + new SimpleDateFormat("yyyy/MM/dd HH:mm:ss").format(Calendar.getInstance().getTime()));
            new BasicMultiDocumentTranscriber(loadDocuments, lowestCommonPath, outputPath, parseOutputFormats, makeDecoder, basicSingleDocumentEvaluatorAndOutputPrinter, characterIndexer, skipFailedDocs).transcribe(loadInputFont, loadInputLM, loadInitialGSM);
        }
    }
}
