package org.graalvm.compiler.lir.amd64;

import java.util.EnumSet;
import jdk.vm.ci.amd64.AMD64;
import jdk.vm.ci.amd64.AMD64Kind;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.code.RegisterValue;
import jdk.vm.ci.code.ValueUtil;
import jdk.vm.ci.meta.JavaKind;
import jdk.vm.ci.meta.Value;
import org.graalvm.compiler.asm.Label;
import org.graalvm.compiler.asm.amd64.AMD64Address;
import org.graalvm.compiler.asm.amd64.AMD64Assembler;
import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler;
import org.graalvm.compiler.asm.amd64.AVXKind;
import org.graalvm.compiler.core.common.Stride;
import org.graalvm.compiler.debug.GraalError;
import org.graalvm.compiler.lir.LIRInstruction;
import org.graalvm.compiler.lir.LIRInstructionClass;
import org.graalvm.compiler.lir.Opcode;
import org.graalvm.compiler.lir.StubPort;
import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
import org.graalvm.compiler.lir.gen.LIRGeneratorTool;

@Opcode("VECTORIZED_MISMATCH")
@StubPort(path = "src/hotspot/cpu/x86/macroAssembler_x86.cpp", lineStart = 6510, lineEnd = 6728, commit = "77e21c57ce00463db4cc3d87f93729cbfe2c96b4", sha1 = "128d88224b8fc7fa9283072966a28c14fdc1eda5")
/* loaded from: input_file:org/graalvm/compiler/lir/amd64/AMD64VectorizedMismatchOp.class */
public final class AMD64VectorizedMismatchOp extends AMD64ComplexVectorOp {
    public static final LIRInstructionClass<AMD64VectorizedMismatchOp> TYPE = LIRInstructionClass.create(AMD64VectorizedMismatchOp.class);
    private static final Register REG_ARRAY_A = AMD64.rsi;
    private static final Register REG_ARRAY_B = AMD64.rdi;
    private static final Register REG_LENGTH = AMD64.rdx;
    private static final Register REG_STRIDE = AMD64.rcx;
    private static final int ONES_16 = 65535;
    private static final int ONES_32 = -1;

    @LIRInstruction.Def({LIRInstruction.OperandFlag.REG})
    private Value resultValue;

    @LIRInstruction.Use({LIRInstruction.OperandFlag.REG})
    private Value arrayAValue;

    @LIRInstruction.Use({LIRInstruction.OperandFlag.REG})
    private Value arrayBValue;

    @LIRInstruction.Use({LIRInstruction.OperandFlag.REG})
    private Value lengthValue;

    @LIRInstruction.Alive({LIRInstruction.OperandFlag.REG})
    private Value strideValue;

    @LIRInstruction.Temp({LIRInstruction.OperandFlag.REG})
    private Value arrayAValueTemp;

    @LIRInstruction.Temp({LIRInstruction.OperandFlag.REG})
    private Value arrayBValueTemp;

    @LIRInstruction.Temp({LIRInstruction.OperandFlag.REG})
    private Value lengthValueTemp;

    @LIRInstruction.Temp({LIRInstruction.OperandFlag.REG})
    Value[] temp;

    @LIRInstruction.Temp({LIRInstruction.OperandFlag.REG})
    Value[] vectorTemp;

    private AMD64VectorizedMismatchOp(LIRGeneratorTool lIRGeneratorTool, EnumSet<AMD64.CPUFeature> enumSet, Value value, Value value2, Value value3, Value value4, Value value5) {
        super(TYPE, lIRGeneratorTool, enumSet, AVXKind.AVXSize.YMM);
        this.resultValue = value;
        this.arrayAValueTemp = value2;
        this.arrayAValue = value2;
        this.arrayBValueTemp = value3;
        this.arrayBValue = value3;
        this.lengthValueTemp = value4;
        this.lengthValue = value4;
        this.strideValue = value5;
        this.temp = allocateTempRegisters(lIRGeneratorTool, AMD64Kind.QWORD, 2);
        this.vectorTemp = allocateVectorRegisters(lIRGeneratorTool, JavaKind.Byte, 3);
    }

    public static AMD64VectorizedMismatchOp movParamsAndCreate(LIRGeneratorTool lIRGeneratorTool, EnumSet<AMD64.CPUFeature> enumSet, Value value, Value value2, Value value3, Value value4, Value value5) {
        RegisterValue asValue = REG_ARRAY_A.asValue(value2.getValueKind());
        RegisterValue asValue2 = REG_ARRAY_B.asValue(value3.getValueKind());
        RegisterValue asValue3 = REG_LENGTH.asValue(value4.getValueKind());
        RegisterValue asValue4 = REG_STRIDE.asValue(value4.getValueKind());
        lIRGeneratorTool.emitMove(asValue, value2);
        lIRGeneratorTool.emitMove(asValue2, value3);
        lIRGeneratorTool.emitMove(asValue3, value4);
        lIRGeneratorTool.emitMove(asValue4, value5);
        return new AMD64VectorizedMismatchOp(lIRGeneratorTool, enumSet, value, asValue, asValue2, asValue3, asValue4);
    }

    @Override // org.graalvm.compiler.lir.amd64.AMD64LIRInstruction
    public void emitCode(CompilationResultBuilder compilationResultBuilder, AMD64MacroAssembler aMD64MacroAssembler) {
        Register asRegister = ValueUtil.asRegister(this.resultValue);
        Register asRegister2 = ValueUtil.asRegister(this.arrayAValue);
        Register asRegister3 = ValueUtil.asRegister(this.arrayBValue);
        Register asRegister4 = ValueUtil.asRegister(this.lengthValue);
        Register asRegister5 = ValueUtil.asRegister(this.temp[0]);
        Register asRegister6 = ValueUtil.asRegister(this.temp[1]);
        Label label = new Label();
        Label label2 = new Label();
        Register asRegister7 = ValueUtil.asRegister(this.vectorTemp[0]);
        Register asRegister8 = ValueUtil.asRegister(this.vectorTemp[1]);
        Register asRegister9 = ValueUtil.asRegister(this.vectorTemp[2]);
        int bytes = this.vectorSize.getBytes();
        Stride stride = Stride.S1;
        Label label3 = new Label();
        Label label4 = new Label();
        Label label5 = new Label();
        Label label6 = new Label();
        Label label7 = new Label();
        Label label8 = new Label();
        Label label9 = new Label();
        Label label10 = new Label();
        Label label11 = new Label();
        GraalError.guarantee(ValueUtil.asRegister(this.strideValue).equals(AMD64.rcx), "stride must be in rcx for shift op");
        aMD64MacroAssembler.shlq(asRegister4);
        aMD64MacroAssembler.xorq(asRegister, asRegister);
        aMD64MacroAssembler.movq(asRegister5, asRegister4);
        aMD64MacroAssembler.andq(asRegister5, bytes - 1);
        aMD64MacroAssembler.andqAndJcc(asRegister4, -bytes, AMD64Assembler.ConditionFlag.Zero, label5, false);
        if (supports(AMD64.CPUFeature.AVX)) {
            aMD64MacroAssembler.align(compilationResultBuilder.target.wordSize * 2);
            aMD64MacroAssembler.bind(label3);
            aMD64MacroAssembler.movdqu(this.vectorSize, asRegister7, new AMD64Address(asRegister2, asRegister, stride));
            aMD64MacroAssembler.movdqu(this.vectorSize, asRegister8, new AMD64Address(asRegister3, asRegister, stride));
            aMD64MacroAssembler.pxor(this.vectorSize, asRegister9, asRegister7, asRegister8);
            aMD64MacroAssembler.ptest(this.vectorSize, asRegister9);
            aMD64MacroAssembler.jccb(AMD64Assembler.ConditionFlag.NotZero, label4);
            aMD64MacroAssembler.addq(asRegister, bytes);
            aMD64MacroAssembler.subqAndJcc(asRegister4, bytes, AMD64Assembler.ConditionFlag.NotZero, label3, true);
            aMD64MacroAssembler.leaq(asRegister, new AMD64Address(asRegister, asRegister5, stride, -bytes));
            aMD64MacroAssembler.movdqu(this.vectorSize, asRegister7, new AMD64Address(asRegister2, asRegister, stride));
            aMD64MacroAssembler.movdqu(this.vectorSize, asRegister8, new AMD64Address(asRegister3, asRegister, stride));
            aMD64MacroAssembler.pxor(this.vectorSize, asRegister9, asRegister7, asRegister8);
            aMD64MacroAssembler.ptest(this.vectorSize, asRegister9);
            aMD64MacroAssembler.jcc(AMD64Assembler.ConditionFlag.Zero, label2);
            aMD64MacroAssembler.align(compilationResultBuilder.target.wordSize * 2);
            aMD64MacroAssembler.bind(label4);
            AMD64Assembler.VexRVMOp.VPCMPEQB.emit(aMD64MacroAssembler, this.vectorSize, asRegister9, asRegister7, asRegister8);
            aMD64MacroAssembler.pmovmsk(this.vectorSize, asRegister6, asRegister9);
            aMD64MacroAssembler.notq(asRegister6);
            bsfq(aMD64MacroAssembler, asRegister6, asRegister6);
            aMD64MacroAssembler.addq(asRegister, asRegister6);
            aMD64MacroAssembler.jmp(label);
        } else {
            aMD64MacroAssembler.align(compilationResultBuilder.target.wordSize * 2);
            aMD64MacroAssembler.bind(label3);
            aMD64MacroAssembler.movdqu(this.vectorSize, asRegister7, new AMD64Address(asRegister2, asRegister, stride));
            aMD64MacroAssembler.movdqu(this.vectorSize, asRegister8, new AMD64Address(asRegister3, asRegister, stride));
            aMD64MacroAssembler.pcmpeq(this.vectorSize, stride, asRegister7, asRegister8);
            aMD64MacroAssembler.pmovmsk(this.vectorSize, asRegister6, asRegister7);
            aMD64MacroAssembler.xorlAndJcc(asRegister6, this.vectorSize == AVXKind.AVXSize.XMM ? 65535 : -1, AMD64Assembler.ConditionFlag.NotZero, label4, true);
            aMD64MacroAssembler.addq(asRegister, bytes);
            aMD64MacroAssembler.subqAndJcc(asRegister4, bytes, AMD64Assembler.ConditionFlag.NotZero, label3, true);
            aMD64MacroAssembler.leaq(asRegister, new AMD64Address(asRegister, asRegister5, stride, -bytes));
            aMD64MacroAssembler.movdqu(this.vectorSize, asRegister7, new AMD64Address(asRegister2, asRegister, stride));
            aMD64MacroAssembler.movdqu(this.vectorSize, asRegister8, new AMD64Address(asRegister3, asRegister, stride));
            aMD64MacroAssembler.pcmpeq(this.vectorSize, stride, asRegister7, asRegister8);
            aMD64MacroAssembler.pmovmsk(this.vectorSize, asRegister6, asRegister7);
            aMD64MacroAssembler.xorlAndJcc(asRegister6, this.vectorSize == AVXKind.AVXSize.XMM ? 65535 : -1, AMD64Assembler.ConditionFlag.Zero, label2, false);
            aMD64MacroAssembler.align(compilationResultBuilder.target.wordSize * 2);
            aMD64MacroAssembler.bind(label4);
            bsfq(aMD64MacroAssembler, asRegister6, asRegister6);
            aMD64MacroAssembler.addq(asRegister, asRegister6);
            aMD64MacroAssembler.jmp(label);
        }
        aMD64MacroAssembler.align(compilationResultBuilder.target.wordSize * 2);
        aMD64MacroAssembler.bind(label5);
        if (supportsAVX2AndYMM()) {
            aMD64MacroAssembler.cmpqAndJcc(asRegister5, AVXKind.AVXSize.XMM.getBytes(), AMD64Assembler.ConditionFlag.Less, label6, false);
            aMD64MacroAssembler.movdqu(AVXKind.AVXSize.XMM, asRegister7, new AMD64Address(asRegister2));
            aMD64MacroAssembler.pcmpeq(AVXKind.AVXSize.XMM, stride, asRegister7, new AMD64Address(asRegister3));
            aMD64MacroAssembler.movdqu(AVXKind.AVXSize.XMM, asRegister8, new AMD64Address(asRegister2, asRegister5, stride, -AVXKind.AVXSize.XMM.getBytes()));
            aMD64MacroAssembler.pcmpeq(AVXKind.AVXSize.XMM, stride, asRegister8, new AMD64Address(asRegister3, asRegister5, stride, -AVXKind.AVXSize.XMM.getBytes()));
            AMD64Assembler.VexRVMIOp.VPERM2I128.emit(aMD64MacroAssembler, AVXKind.AVXSize.YMM, asRegister7, asRegister8, asRegister7, 2);
            aMD64MacroAssembler.pmovmsk(AVXKind.AVXSize.YMM, asRegister, asRegister7);
            aMD64MacroAssembler.xorlAndJcc(asRegister, -1, AMD64Assembler.ConditionFlag.Zero, label2, false);
            bsfq(aMD64MacroAssembler, asRegister, asRegister);
            aMD64MacroAssembler.leaq(asRegister6, new AMD64Address(asRegister, asRegister5, Stride.S1, -AVXKind.AVXSize.YMM.getBytes()));
            aMD64MacroAssembler.cmpq(asRegister, AVXKind.AVXSize.XMM.getBytes());
            aMD64MacroAssembler.cmovq(AMD64Assembler.ConditionFlag.Greater, asRegister, asRegister6);
            aMD64MacroAssembler.jmp(label);
        }
        aMD64MacroAssembler.bind(label6);
        aMD64MacroAssembler.cmpqAndJcc(asRegister5, AVXKind.AVXSize.QWORD.getBytes(), AMD64Assembler.ConditionFlag.Less, label8, true);
        aMD64MacroAssembler.movq(asRegister, new AMD64Address(asRegister2));
        aMD64MacroAssembler.xorqAndJcc(asRegister, new AMD64Address(asRegister3), AMD64Assembler.ConditionFlag.Zero, label7, true);
        bsfq(aMD64MacroAssembler, asRegister, asRegister);
        aMD64MacroAssembler.shrq(asRegister, 3);
        aMD64MacroAssembler.jmp(label);
        aMD64MacroAssembler.bind(label7);
        aMD64MacroAssembler.movq(asRegister, new AMD64Address(asRegister2, asRegister5, stride, -AVXKind.AVXSize.QWORD.getBytes()));
        aMD64MacroAssembler.xorqAndJcc(asRegister, new AMD64Address(asRegister3, asRegister5, stride, -AVXKind.AVXSize.QWORD.getBytes()), AMD64Assembler.ConditionFlag.Zero, label2, true);
        bsfq(aMD64MacroAssembler, asRegister, asRegister);
        aMD64MacroAssembler.shrl(asRegister, 3);
        aMD64MacroAssembler.leaq(asRegister, new AMD64Address(asRegister, asRegister5, Stride.S1, -AVXKind.AVXSize.QWORD.getBytes()));
        aMD64MacroAssembler.jmpb(label);
        aMD64MacroAssembler.bind(label8);
        aMD64MacroAssembler.cmpqAndJcc(asRegister5, AVXKind.AVXSize.DWORD.getBytes(), AMD64Assembler.ConditionFlag.Less, label10, true);
        aMD64MacroAssembler.movl(asRegister, new AMD64Address(asRegister2));
        aMD64MacroAssembler.xorlAndJcc(asRegister, new AMD64Address(asRegister3), AMD64Assembler.ConditionFlag.Zero, label9, true);
        bsfq(aMD64MacroAssembler, asRegister, asRegister);
        aMD64MacroAssembler.shrl(asRegister, 3);
        aMD64MacroAssembler.jmpb(label);
        aMD64MacroAssembler.bind(label9);
        aMD64MacroAssembler.movl(asRegister, new AMD64Address(asRegister2, asRegister5, stride, -AVXKind.AVXSize.DWORD.getBytes()));
        aMD64MacroAssembler.xorlAndJcc(asRegister, new AMD64Address(asRegister3, asRegister5, stride, -AVXKind.AVXSize.DWORD.getBytes()), AMD64Assembler.ConditionFlag.Zero, label2, true);
        bsfq(aMD64MacroAssembler, asRegister, asRegister);
        aMD64MacroAssembler.shrl(asRegister, 3);
        aMD64MacroAssembler.leaq(asRegister, new AMD64Address(asRegister, asRegister5, Stride.S1, -AVXKind.AVXSize.DWORD.getBytes()));
        aMD64MacroAssembler.jmpb(label);
        aMD64MacroAssembler.bind(label10);
        aMD64MacroAssembler.testqAndJcc(asRegister5, asRegister5, AMD64Assembler.ConditionFlag.Zero, label2, true);
        aMD64MacroAssembler.bind(label11);
        aMD64MacroAssembler.movzbl(asRegister6, new AMD64Address(asRegister2, asRegister, stride));
        aMD64MacroAssembler.movzbl(asRegister4, new AMD64Address(asRegister3, asRegister, stride));
        aMD64MacroAssembler.cmplAndJcc(asRegister6, asRegister4, AMD64Assembler.ConditionFlag.NotEqual, label, true);
        aMD64MacroAssembler.incl(asRegister);
        aMD64MacroAssembler.decqAndJcc(asRegister5, AMD64Assembler.ConditionFlag.NotZero, label11, true);
        aMD64MacroAssembler.align(compilationResultBuilder.target.wordSize * 2);
        aMD64MacroAssembler.bind(label2);
        aMD64MacroAssembler.movq(asRegister, -1L);
        aMD64MacroAssembler.align(compilationResultBuilder.target.wordSize * 2);
        aMD64MacroAssembler.bind(label);
        aMD64MacroAssembler.sarq(asRegister);
    }
}
