package edu.berkeley.cs.nlp.ocular.output;

import edu.berkeley.cs.nlp.ocular.data.Document;
import edu.berkeley.cs.nlp.ocular.data.textreader.Charset;
import edu.berkeley.cs.nlp.ocular.gsm.GlyphChar;
import edu.berkeley.cs.nlp.ocular.model.DecodeState;
import edu.berkeley.cs.nlp.ocular.model.transition.SparseTransitionModel;
import edu.berkeley.cs.nlp.ocular.util.StringHelper;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import tberg.murphy.fileio.f;
import tberg.murphy.indexer.Indexer;
import tberg.murphy.util.Iterators;

/* loaded from: input_file:main/ocular_2.12-0.3-SNAPSHOT.jar:edu/berkeley/cs/nlp/ocular/output/AltoOutputWriter.class */
public class AltoOutputWriter {
    private Indexer<String> charIndexer;
    private Indexer<String> langIndexer;
    private int spaceCharIndex;
    private int hyphenCharIndex;

    public AltoOutputWriter(Indexer<String> indexer, Indexer<String> indexer2) {
        this.charIndexer = indexer;
        this.langIndexer = indexer2;
        this.spaceCharIndex = indexer.getIndex(Charset.SPACE);
        this.hyphenCharIndex = indexer.getIndex(Charset.HYPHEN);
    }

    public void write(int i, List<DecodeState>[] listArr, Document document, String str, String str2, List<String> list, boolean z, double d) {
        String str3 = str + (z ? "_norm" : "_dipl") + ".alto.xml";
        SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'hh:mm:ss");
        String baseName = document.baseName();
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
        stringBuffer.append("<alto xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xmlns=\"http://www.loc.gov/standards/alto/ns-v3#\" xsi:schemaLocation=\"http://www.loc.gov/standards/alto/ns-v3# http://www.loc.gov/standards/alto/v3/alto.xsd\" xmlns:emop=\"http://emop.tamu.edu\">\n");
        stringBuffer.append("  <Description>\n");
        stringBuffer.append("    <MeasurementUnit>pixel</MeasurementUnit>\n");
        stringBuffer.append("    <sourceImageInformation>\n");
        stringBuffer.append("      <fileName>" + imagePathToFilename(baseName) + "</fileName>\n");
        stringBuffer.append("    </sourceImageInformation>\n");
        stringBuffer.append("    <OCRProcessing ID=\"Ocular0.0.3\">\n");
        stringBuffer.append("      <preProcessingStep></preProcessingStep>\n");
        stringBuffer.append("      <ocrProcessingStep>\n");
        stringBuffer.append("\t\t <processingDateTime>" + simpleDateFormat.format(new Date()) + "</processingDateTime>\n");
        stringBuffer.append("      \t <processingStepSettings>" + StringHelper.join(list, Charset.SPACE) + "</processingStepSettings>\n");
        stringBuffer.append("        <processingSoftware>\n");
        stringBuffer.append("          <softwareCreator>Taylor Berg-Kirkpatrick, Greg Durrett, Dan Klein, Dan Garrette, Hannah Alpert-Abrams</softwareCreator>\n");
        stringBuffer.append("          <softwareName>Ocular</softwareName>\n");
        stringBuffer.append("          <softwareVersion>0.0.3</softwareVersion>\n");
        stringBuffer.append("        </processingSoftware>\n");
        stringBuffer.append("       </ocrProcessingStep>\n");
        stringBuffer.append("    </OCRProcessing>\n");
        stringBuffer.append("  </Description>\n");
        stringBuffer.append("  <Layout>\n");
        stringBuffer.append("    <Page ID=\"" + imageFilenameToId(baseName) + "\"  PHYSICAL_IMG_NR=\"" + imageFilenameToIdNumber(baseName) + "\">\n");
        stringBuffer.append("      <PrintSpace>\n");
        stringBuffer.append("        <TextBlock ID=\"par_1\">\n");
        boolean z2 = false;
        int i2 = 0;
        for (int i3 = 0; i3 < i; i3++) {
            StringBuffer stringBuffer2 = new StringBuffer();
            boolean z3 = true;
            Iterator concat = Iterators.concat(listArr[i3].iterator(), Iterators.oneItemIterator(null));
            ArrayList arrayList = new ArrayList();
            int i4 = 0;
            while (true) {
                int i5 = i4;
                if (concat.hasNext()) {
                    DecodeState decodeState = (DecodeState) concat.next();
                    boolean z4 = decodeState != null ? decodeState.ts.getLmCharIndex() == this.spaceCharIndex && decodeState.ts.getGlyphChar().templateCharIndex == this.spaceCharIndex : true;
                    if (z4 == z2 || (decodeState != null ? decodeState.ts.getLmCharIndex() != this.hyphenCharIndex && Charset.isPunctuationChar(this.charIndexer.getObject(decodeState.ts.getLmCharIndex())) : false) || !concat.hasNext()) {
                        if (z2) {
                            if (!arrayList.isEmpty()) {
                                int languageIndex = ((DecodeState) arrayList.get(0)).ts.getLanguageIndex();
                                String object = languageIndex >= 0 ? this.langIndexer.getObject(languageIndex) : "None";
                                StringBuffer stringBuffer3 = new StringBuffer();
                                StringBuffer stringBuffer4 = new StringBuffer();
                                Iterator it = arrayList.iterator();
                                while (it.hasNext()) {
                                    SparseTransitionModel.TransitionState transitionState = ((DecodeState) it.next()).ts;
                                    if (!transitionState.getGlyphChar().isElided()) {
                                        stringBuffer3.append(Charset.unescapeChar(this.charIndexer.getObject(transitionState.getGlyphChar().templateCharIndex)));
                                    }
                                    if (transitionState.getGlyphChar().glyphType != GlyphChar.GlyphType.DOUBLED) {
                                        switch (transitionState.getType()) {
                                            case RMRGN_HPHN_INIT:
                                                stringBuffer4.append(Charset.HYPHEN);
                                                break;
                                            case LMRGN:
                                            case RMRGN:
                                                stringBuffer4.append(Charset.SPACE);
                                                break;
                                            case TMPL:
                                                stringBuffer4.append(Charset.unescapeChar(this.charIndexer.getObject(transitionState.getLmCharIndex())));
                                                break;
                                        }
                                    }
                                }
                                String trim = stringBuffer3.toString().trim();
                                String trim2 = stringBuffer4.toString().trim();
                                if (!trim.isEmpty()) {
                                    stringBuffer2.append("      <String ID=\"word_" + i2 + "\" WIDTH=\"" + i5 + "\" CONTENT=\"" + escapeCharactersForValidation(z ? trim2 : trim) + "\" LANG=\"" + object + "\"");
                                    if (trim2.equals(trim)) {
                                        stringBuffer2.append("/> \n");
                                    } else {
                                        stringBuffer2.append("> \n");
                                        if (z) {
                                            stringBuffer2.append("          <ALTERNATIVE PURPOSE=\"Diplomatic\">" + escapeCharactersForValidation(trim) + "</ALTERNATIVE>\n");
                                        } else {
                                            stringBuffer2.append("          <ALTERNATIVE PURPOSE=\"Normalization\">" + escapeCharactersForValidation(trim2) + "</ALTERNATIVE>\n");
                                        }
                                        stringBuffer2.append("      </String>\n");
                                    }
                                    z3 = false;
                                    i2++;
                                }
                            }
                        } else if (!z3 && i5 > 0) {
                            stringBuffer2.append("      <SP WIDTH=\"" + i5 + "\"/>\n");
                        }
                        arrayList.clear();
                        i5 = 0;
                        z2 = !z4;
                    }
                    arrayList.add(decodeState);
                    i4 = i5 + (decodeState != null ? decodeState.charAndPadWidth : 0);
                } else {
                    if (stringBuffer2.length() > 0) {
                        stringBuffer.append("    <TextLine ID=\"line_" + (i3 + 1) + "\">\n");
                        stringBuffer.append(stringBuffer2);
                        stringBuffer.append("    </TextLine>\n");
                    }
                }
            }
        }
        stringBuffer.append("</TextBlock>\n");
        stringBuffer.append("</PrintSpace>\n");
        stringBuffer.append("</Page>\n");
        stringBuffer.append("</Layout>\n");
        stringBuffer.append("</alto>\n");
        String stringBuffer5 = stringBuffer.toString();
        System.out.println("Writing alto output to " + str3);
        f.writeString(str3, stringBuffer5);
    }

    private String imageFilenameToId(String str) {
        Matcher matcher = Pattern.compile("(pl_[a-z]+_\\d+_\\d+).*").matcher(str);
        return matcher.find() ? matcher.group(1) : "Error: page ID unknown";
    }

    private String imageFilenameToIdNumber(String str) {
        Matcher matcher = Pattern.compile("pl_[a-z]+_\\d+_(\\d+).*").matcher(str);
        return matcher.find() ? matcher.group(1) : "Error: ID Number unknown";
    }

    private String imagePathToFilename(String str) {
        Matcher matcher = Pattern.compile(".*(pl_[a-z]+_\\d+_\\d+.*)").matcher(str);
        return matcher.find() ? matcher.group(1) : "Error: filename unknown";
    }

    private String escapeCharactersForValidation(String str) {
        return str.replace("&", "&amp;").replace(">", "&gt;").replace("<", "&lt;").replace("'", "&apos;").replace("\"", "&quot;").replace("P̃", "P&#0303;").replace("p̃", "p&#0303;").replace("Q̃", "Q&#0303;").replace("q̃", "q&#0303;");
    }
}
