package edu.berkeley.cs.nlp.ocular.data;

import edu.berkeley.cs.nlp.ocular.data.textreader.Charset;
import edu.berkeley.cs.nlp.ocular.image.ImageUtils;
import edu.berkeley.cs.nlp.ocular.image.Visualizer;
import edu.berkeley.cs.nlp.ocular.preprocessing.Binarizer;
import edu.berkeley.cs.nlp.ocular.preprocessing.Cropper;
import edu.berkeley.cs.nlp.ocular.preprocessing.LineExtractor;
import edu.berkeley.cs.nlp.ocular.preprocessing.Straightener;
import edu.berkeley.cs.nlp.ocular.util.CollectionHelper;
import edu.berkeley.cs.nlp.ocular.util.FileUtil;
import java.awt.image.BufferedImage;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import tberg.murphy.fileio.f;

/* loaded from: input_file:main/ocular_2.12-0.3-SNAPSHOT.jar:edu/berkeley/cs/nlp/ocular/data/LazyRawImageDocument.class */
public abstract class LazyRawImageDocument implements Document {
    private final String inputPath;
    private final int lineHeight;
    private final double binarizeThreshold;
    private final boolean crop;
    private String extractedLinesPath;
    private ImageUtils.PixelType[][][] observations = (ImageUtils.PixelType[][][]) null;
    private String[][] diplomaticTextLines = (String[][]) null;
    private boolean diplomaticTextLinesLoaded = false;
    private String[][] normalizedTextLines = (String[][]) null;
    private boolean normalizedTextLinesLoaded = false;
    private List<String> normalizedText = null;
    private boolean normalizedTextLoaded = false;

    public LazyRawImageDocument(String str, int i, double d, boolean z, String str2) {
        this.extractedLinesPath = null;
        this.inputPath = str;
        this.lineHeight = i;
        this.binarizeThreshold = d;
        this.crop = z;
        this.extractedLinesPath = str2;
    }

    @Override // edu.berkeley.cs.nlp.ocular.data.Document
    public final ImageUtils.PixelType[][][] loadLineImages() {
        if (this.observations == null) {
            if (this.extractedLinesPath == null) {
                this.observations = doLoadObservationsFromFile();
            } else if (extractionFilesPresent()) {
                this.observations = doLoadObservationsFromLineExtractionFiles();
            } else {
                this.observations = doLoadObservationsFromFile();
                writeExtractedLineImagesAggregateFile();
                writeIndividualExtractedLineImageFiles();
            }
        }
        return this.observations;
    }

    /* JADX WARN: Type inference failed for: r0v16, types: [edu.berkeley.cs.nlp.ocular.image.ImageUtils$PixelType[][], edu.berkeley.cs.nlp.ocular.image.ImageUtils$PixelType[][][]] */
    private ImageUtils.PixelType[][][] doLoadObservationsFromFile() {
        double[][] straighten = Straightener.straighten(ImageUtils.getLevels(doLoadBufferedImage()));
        double[][] crop = this.crop ? Cropper.crop(straighten, this.binarizeThreshold) : straighten;
        Binarizer.binarizeGlobal(this.binarizeThreshold, crop);
        List<double[][]> extractLines = LineExtractor.extractLines(crop);
        ?? r0 = new ImageUtils.PixelType[extractLines.size()];
        for (int i = 0; i < extractLines.size(); i++) {
            r0[i] = imageToObservation(ImageUtils.makeImage(extractLines.get(i)));
        }
        return r0;
    }

    /* JADX WARN: Type inference failed for: r0v14, types: [edu.berkeley.cs.nlp.ocular.image.ImageUtils$PixelType[][], edu.berkeley.cs.nlp.ocular.image.ImageUtils$PixelType[][][]] */
    private ImageUtils.PixelType[][][] doLoadObservationsFromLineExtractionFiles() {
        System.out.println("Loading pre-extracted line images from " + leLineDir());
        final Pattern compile = Pattern.compile("line(\\d+)\\." + ext());
        File[] listFiles = new File(leLineDir()).listFiles(new FilenameFilter() { // from class: edu.berkeley.cs.nlp.ocular.data.LazyRawImageDocument.1
            @Override // java.io.FilenameFilter
            public boolean accept(File file, String str) {
                return compile.matcher(str).matches();
            }
        });
        if (listFiles == null) {
            throw new RuntimeException("lineImageFiles is null");
        }
        if (listFiles.length == 0) {
            throw new RuntimeException("lineImageFiles.length == 0");
        }
        Arrays.sort(listFiles);
        ?? r0 = new ImageUtils.PixelType[listFiles.length];
        for (int i = 0; i < listFiles.length; i++) {
            Matcher matcher = compile.matcher(listFiles[i].getName());
            if (matcher.find() && Integer.valueOf(matcher.group(1)).intValue() != i) {
                throw new RuntimeException("Trying to load lines from " + leLineDir() + " but the file for line " + i + " is missing (found " + matcher.group(1) + " instead).");
            }
            String fullLeLinePath = fullLeLinePath(i);
            System.out.println("    Loading pre-extracted line from " + fullLeLinePath);
            try {
                r0[i] = imageToObservation(f.readImage(fullLeLinePath));
            } catch (Exception e) {
                throw new RuntimeException("Couldn't read line image from: " + fullLeLinePath, e);
            }
        }
        return r0;
    }

    private ImageUtils.PixelType[][] imageToObservation(BufferedImage bufferedImage) {
        return this.lineHeight >= 0 ? ImageUtils.getPixelTypes(ImageUtils.resampleImage(bufferedImage, this.lineHeight)) : ImageUtils.getPixelTypes(bufferedImage);
    }

    public void writeExtractedLineImagesAggregateFile(String str) {
        System.out.println("Writing file line-extraction image to: " + str);
        new File(str).getAbsoluteFile().getParentFile().mkdirs();
        f.writeImage(str, Visualizer.renderLineExtraction(this.observations));
    }

    public void writeExtractedLineImagesAggregateFile() {
        writeExtractedLineImagesAggregateFile(multilineExtractionImagePath());
    }

    public void writeIndividualExtractedLineImageFiles() {
        new File(leLineDir()).mkdirs();
        for (int i = 0; i < this.observations.length; i++) {
            ImageUtils.PixelType[][] pixelTypeArr = this.observations[i];
            String fullLeLinePath = fullLeLinePath(i);
            System.out.println("  Writing individual line-extraction image to: " + fullLeLinePath);
            f.writeImage(fullLeLinePath, Visualizer.renderLineExtraction(pixelTypeArr));
        }
    }

    private boolean extractionFilesPresent() {
        File file = new File(fullLeLinePath(0));
        System.out.println("Looking for extractions in [" + file + "]. " + (file.exists() ? "Found" : "Not found") + ".");
        return file.exists();
    }

    /* JADX WARN: Type inference failed for: r0v14, types: [java.lang.String[], java.lang.String[][]] */
    private String[][] loadTextFile(File file, String str) {
        if (!file.exists()) {
            System.out.println("No evaluation " + str + " text found at " + file + "  (This is only a problem if you were trying to provide a gold " + str + " transcription to check accuracy.)");
            return (String[][]) null;
        }
        System.out.println("Evaluation " + str + " text found at " + file);
        ArrayList arrayList = new ArrayList();
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
            while (bufferedReader.ready()) {
                arrayList.add(Charset.readNormalizeCharacters(bufferedReader.readLine()));
            }
            bufferedReader.close();
            ?? r0 = new String[arrayList.size()];
            for (int i = 0; i < r0.length; i++) {
                List list = (List) arrayList.get(i);
                r0[i] = (String[]) list.toArray(new String[list.size()]);
            }
            return r0;
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    @Override // edu.berkeley.cs.nlp.ocular.data.Document
    public String[][] loadDiplomaticTextLines() {
        if (!this.diplomaticTextLinesLoaded) {
            this.diplomaticTextLines = loadTextFile(new File(baseName().replaceAll("\\.[^.]*$", "") + ".txt"), "diplomatic");
        }
        this.diplomaticTextLinesLoaded = true;
        return this.diplomaticTextLines;
    }

    @Override // edu.berkeley.cs.nlp.ocular.data.Document
    public String[][] loadNormalizedTextLines() {
        if (!this.normalizedTextLinesLoaded) {
            this.normalizedTextLines = loadTextFile(new File(baseName().replaceAll("\\.[^.]*$", "") + "_normalized.txt"), "normalized");
        }
        this.normalizedTextLinesLoaded = true;
        return this.normalizedTextLines;
    }

    @Override // edu.berkeley.cs.nlp.ocular.data.Document
    public List<String> loadNormalizedText() {
        if (!this.normalizedTextLoaded && loadNormalizedTextLines() != null) {
            this.normalizedText = new ArrayList();
            for (String[] strArr : loadNormalizedTextLines()) {
                for (String str : strArr) {
                    if (!Charset.SPACE.equals(str) || (!this.normalizedText.isEmpty() && !Charset.SPACE.equals(CollectionHelper.last(this.normalizedText)))) {
                        this.normalizedText.add(str);
                    }
                }
                if (!this.normalizedText.isEmpty() && !Charset.SPACE.equals(CollectionHelper.last(this.normalizedText))) {
                    this.normalizedText.add(Charset.SPACE);
                }
            }
            if (Charset.SPACE.equals(CollectionHelper.last(this.normalizedText))) {
                this.normalizedText.remove(this.normalizedText.size() - 1);
            }
        }
        this.normalizedTextLoaded = true;
        return this.normalizedText;
    }

    private String multilineExtractionImagePath() {
        return fullLePreExt() + "." + ext();
    }

    private String leLineDir() {
        return fullLePreExt() + "_" + ext();
    }

    private String fileParent() {
        return FileUtil.removeCommonPathPrefixOfParents(new File(this.inputPath), file())._2;
    }

    private String fullLePreExt() {
        return this.extractedLinesPath + "/" + fileParent() + "/" + preext() + "-line_extract";
    }

    private String fullLeLinePath(int i) {
        return String.format(leLineDir() + "/line%02d." + ext(), Integer.valueOf(i));
    }

    protected abstract File file();

    protected abstract BufferedImage doLoadBufferedImage();

    protected abstract String preext();

    protected abstract String ext();
}
