package edu.berkeley.cs.nlp.ocular.model.em;

import tberg.murphy.gpu.CudaUtil;

/* loaded from: input_file:main/ocular_2.12-0.3-SNAPSHOT.jar:edu/berkeley/cs/nlp/ocular/model/em/DefaultInnerLoop.class */
public class DefaultInnerLoop implements EmissionCacheInnerLoop {
    int numThreads;
    float[][] whiteTemplates;
    float[][] blackTemplates;
    int[] templateNumIndices;
    int[] templateIndicesOffsets;
    int maxTemplateWidth;
    int minTemplateWidth;

    public DefaultInnerLoop(int i) {
        this.numThreads = i;
    }

    @Override // edu.berkeley.cs.nlp.ocular.model.em.EmissionCacheInnerLoop
    public void startup(float[][] fArr, float[][] fArr2, int[] iArr, int[] iArr2, int i, int i2, int i3, int i4) {
        this.whiteTemplates = fArr;
        this.blackTemplates = fArr2;
        this.templateNumIndices = iArr;
        this.templateIndicesOffsets = iArr2;
        this.maxTemplateWidth = i2;
        this.minTemplateWidth = i;
    }

    @Override // edu.berkeley.cs.nlp.ocular.model.em.EmissionCacheInnerLoop
    public void shutdown() {
    }

    @Override // edu.berkeley.cs.nlp.ocular.model.em.EmissionCacheInnerLoop
    public void compute(float[] fArr, float[] fArr2, float[] fArr3, int i) {
        for (int i2 = this.minTemplateWidth; i2 <= this.maxTemplateWidth; i2++) {
            float[] fArr4 = this.whiteTemplates[i2 - this.minTemplateWidth];
            float[] fArr5 = this.blackTemplates[i2 - this.minTemplateWidth];
            for (int i3 = 0; i3 < (i - i2) + 1; i3++) {
                for (int i4 = 0; i4 < this.templateNumIndices[i2 - this.minTemplateWidth]; i4++) {
                    float f = 0.0f;
                    for (int i5 = 0; i5 < i2 * 30; i5++) {
                        f += fArr2[(i3 * 30) + i5] * fArr4[(i4 * i2 * 30) + i5];
                    }
                    int flatten = (this.templateIndicesOffsets[i2 - this.minTemplateWidth] * i) + CudaUtil.flatten(i, this.templateNumIndices[i2 - this.minTemplateWidth], i3, i4);
                    fArr[flatten] = fArr[flatten] + f;
                }
            }
            for (int i6 = 0; i6 < (i - i2) + 1; i6++) {
                for (int i7 = 0; i7 < this.templateNumIndices[i2 - this.minTemplateWidth]; i7++) {
                    float f2 = 0.0f;
                    for (int i8 = 0; i8 < i2 * 30; i8++) {
                        f2 += fArr3[(i6 * 30) + i8] * fArr5[(i7 * i2 * 30) + i8];
                    }
                    int flatten2 = (this.templateIndicesOffsets[i2 - this.minTemplateWidth] * i) + CudaUtil.flatten(i, this.templateNumIndices[i2 - this.minTemplateWidth], i6, i7);
                    fArr[flatten2] = fArr[flatten2] + f2;
                }
            }
        }
    }

    @Override // edu.berkeley.cs.nlp.ocular.model.em.EmissionCacheInnerLoop
    public int numOuterThreads() {
        return this.numThreads;
    }

    @Override // edu.berkeley.cs.nlp.ocular.model.em.EmissionCacheInnerLoop
    public int numPopulateThreads() {
        return 1;
    }
}
