Implement VMULL, VMLSL, VRSHR, VQRSHRN, VQRSHRUN AArch32 instructions + other fixes (#977)
* Implement VMULL, VMLSL, VQRSHRN, VQRSHRUN AArch32 instructions plus other fixes * Re-align opcode table * Re-enable undefined, use subclasses to fix checks * Add test and fix VRSHR instruction * PR feedback
This commit is contained in:
parent
89ccec197e
commit
c26f3774bd
14 changed files with 873 additions and 288 deletions
|
@ -6,13 +6,20 @@
|
|||
{
|
||||
Q = ((opCode >> 24) & 0x1) != 0;
|
||||
F = ((opCode >> 8) & 0x1) != 0;
|
||||
Size = ((opCode >> 20) & 0x3);
|
||||
Size = (opCode >> 20) & 0x3;
|
||||
|
||||
RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64;
|
||||
|
||||
if (Size == 1)
|
||||
{
|
||||
Vm = ((opCode >> 3) & 0x1) | ((opCode >> 4) & 0x2) | ((opCode << 2) & 0x1c);
|
||||
}
|
||||
else /* if (Size == 2) */
|
||||
{
|
||||
Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e);
|
||||
}
|
||||
|
||||
if (DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vn) || Size == 0 || (Size == 1 && F))
|
||||
if (GetType() == typeof(OpCode32SimdRegElem) && DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vn) || Size == 0 || (Size == 1 && F))
|
||||
{
|
||||
Instruction = InstDescriptor.Undefined;
|
||||
}
|
||||
|
|
19
ARMeilleure/Decoders/OpCode32SimdRegElemLong.cs
Normal file
19
ARMeilleure/Decoders/OpCode32SimdRegElemLong.cs
Normal file
|
@ -0,0 +1,19 @@
|
|||
namespace ARMeilleure.Decoders
|
||||
{
|
||||
class OpCode32SimdRegElemLong : OpCode32SimdRegElem
|
||||
{
|
||||
public OpCode32SimdRegElemLong(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
|
||||
{
|
||||
Q = false;
|
||||
F = false;
|
||||
|
||||
RegisterSize = RegisterSize.Simd64;
|
||||
|
||||
// (Vd & 1) != 0 || Size == 3 are also invalid, but they are checked on encoding.
|
||||
if (Size == 0)
|
||||
{
|
||||
Instruction = InstDescriptor.Undefined;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
14
ARMeilleure/Decoders/OpCode32SimdRegLong.cs
Normal file
14
ARMeilleure/Decoders/OpCode32SimdRegLong.cs
Normal file
|
@ -0,0 +1,14 @@
|
|||
namespace ARMeilleure.Decoders
|
||||
{
|
||||
class OpCode32SimdRegLong : OpCode32SimdReg
|
||||
{
|
||||
public bool Polynomial { get; private set; }
|
||||
|
||||
public OpCode32SimdRegLong(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
|
||||
{
|
||||
Q = false;
|
||||
RegisterSize = RegisterSize.Simd64;
|
||||
Polynomial = ((opCode >> 9) & 0x1) != 0;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -35,7 +35,7 @@
|
|||
Instruction = InstDescriptor.Undefined;
|
||||
}
|
||||
|
||||
if (DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm))
|
||||
if (GetType() == typeof(OpCode32SimdShImm) && DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm))
|
||||
{
|
||||
Instruction = InstDescriptor.Undefined;
|
||||
}
|
||||
|
|
7
ARMeilleure/Decoders/OpCode32SimdShImmNarrow.cs
Normal file
7
ARMeilleure/Decoders/OpCode32SimdShImmNarrow.cs
Normal file
|
@ -0,0 +1,7 @@
|
|||
namespace ARMeilleure.Decoders
|
||||
{
|
||||
class OpCode32SimdShImmNarrow : OpCode32SimdShImm
|
||||
{
|
||||
public OpCode32SimdShImmNarrow(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { }
|
||||
}
|
||||
}
|
|
@ -818,6 +818,7 @@ namespace ARMeilleure.Decoders
|
|||
SetA32("<<<<11100x00xxxxxxxx101xx1x0xxxx", InstName.Vmls, InstEmit32.Vmls_S, typeof(OpCode32SimdRegS));
|
||||
SetA32("111100100x10xxxxxxxx1101xxx1xxxx", InstName.Vmls, InstEmit32.Vmls_V, typeof(OpCode32SimdReg));
|
||||
SetA32("111100110xxxxxxxxxxx1001xxx0xxxx", InstName.Vmls, InstEmit32.Vmls_I, typeof(OpCode32SimdReg));
|
||||
SetA32("1111001x1x<<xxxxxxx01010x0x0xxxx", InstName.Vmlsl, InstEmit32.Vmlsl_I, typeof(OpCode32SimdRegLong));
|
||||
SetA32("<<<<11100xx0xxxxxxxx1011xxx10000", InstName.Vmov, InstEmit32.Vmov_G1, typeof(OpCode32SimdMovGpElem)); // From gen purpose.
|
||||
SetA32("<<<<1110xxx1xxxxxxxx1011xxx10000", InstName.Vmov, InstEmit32.Vmov_G1, typeof(OpCode32SimdMovGpElem)); // To gen purpose.
|
||||
SetA32("<<<<1100010xxxxxxxxx101000x1xxxx", InstName.Vmov, InstEmit32.Vmov_G2, typeof(OpCode32SimdMovGpDouble)); // To/from gen purpose x2 and single precision x2.
|
||||
|
@ -836,9 +837,13 @@ namespace ARMeilleure.Decoders
|
|||
SetA32("<<<<11101111xxxxxxxx101000010000", InstName.Vmrs, InstEmit32.Vmrs, typeof(OpCode32SimdSpecial));
|
||||
SetA32("<<<<11101110xxxxxxxx101000010000", InstName.Vmsr, InstEmit32.Vmsr, typeof(OpCode32SimdSpecial));
|
||||
SetA32("1111001x1x<<xxxxxxxx100xx1x0xxxx", InstName.Vmul, InstEmit32.Vmul_1, typeof(OpCode32SimdRegElem));
|
||||
SetA32("1111001x0xxxxxxxxxxx1001xxx1xxxx", InstName.Vmul, InstEmit32.Vmul_I, typeof(OpCode32SimdReg));
|
||||
SetA32("111100100x<<xxxxxxxx1001xxx1xxxx", InstName.Vmul, InstEmit32.Vmul_I, typeof(OpCode32SimdReg));
|
||||
SetA32("111100110x00xxxxxxxx1001xxx1xxxx", InstName.Vmul, InstEmit32.Vmul_I, typeof(OpCode32SimdReg));
|
||||
SetA32("<<<<11100x10xxxxxxxx101xx0x0xxxx", InstName.Vmul, InstEmit32.Vmul_S, typeof(OpCode32SimdRegS));
|
||||
SetA32("111100110x00xxxxxxxx1101xxx1xxxx", InstName.Vmul, InstEmit32.Vmul_V, typeof(OpCode32SimdReg));
|
||||
SetA32("1111001x1x<<xxxxxxx01010x1x0xxxx", InstName.Vmull, InstEmit32.Vmull_1, typeof(OpCode32SimdRegElemLong));
|
||||
SetA32("1111001x1x<<xxxxxxx01100x0x0xxxx", InstName.Vmull, InstEmit32.Vmull_I, typeof(OpCode32SimdRegLong));
|
||||
SetA32("111100101x00xxxxxxx01110x0x0xxxx", InstName.Vmull, InstEmit32.Vmull_I, typeof(OpCode32SimdRegLong)); // Polynomial
|
||||
SetA32("1111001x1x000xxxxxxx0xx00x11xxxx", InstName.Vmvn, InstEmit32.Vmvn_I, typeof(OpCode32SimdImm)); // D/Q vector I32.
|
||||
SetA32("1111001x1x000xxxxxxx10x00x11xxxx", InstName.Vmvn, InstEmit32.Vmvn_I, typeof(OpCode32SimdImm));
|
||||
SetA32("1111001x1x000xxxxxxx110x0x11xxxx", InstName.Vmvn, InstEmit32.Vmvn_I, typeof(OpCode32SimdImm));
|
||||
|
@ -851,18 +856,21 @@ namespace ARMeilleure.Decoders
|
|||
SetA32("1111001x1x000xxxxxxx0xx10x01xxxx", InstName.Vorr, InstEmit32.Vorr_II, typeof(OpCode32SimdImm));
|
||||
SetA32("111100100x<<xxxxxxxx1011x0x1xxxx", InstName.Vpadd, InstEmit32.Vpadd_I, typeof(OpCode32SimdReg));
|
||||
SetA32("111100110x00xxxxxxxx1101x0x0xxxx", InstName.Vpadd, InstEmit32.Vpadd_V, typeof(OpCode32SimdReg));
|
||||
SetA32("1111001x1x>>>xxxxxxx100101x1xxx0", InstName.Vqrshrn, InstEmit32.Vqrshrn, typeof(OpCode32SimdShImmNarrow));
|
||||
SetA32("111100111x>>>xxxxxxx100001x1xxx0", InstName.Vqrshrun, InstEmit32.Vqrshrun, typeof(OpCode32SimdShImmNarrow));
|
||||
SetA32("111100111x111011xxxx010x0xx0xxxx", InstName.Vrecpe, InstEmit32.Vrecpe, typeof(OpCode32SimdSqrte));
|
||||
SetA32("111100100x00xxxxxxxx1111xxx1xxxx", InstName.Vrecps, InstEmit32.Vrecps, typeof(OpCode32SimdReg));
|
||||
SetA32("111100111x11xx00xxxx000<<xx0xxxx", InstName.Vrev, InstEmit32.Vrev, typeof(OpCode32SimdRev));
|
||||
SetA32("111111101x1110xxxxxx101x01x0xxxx", InstName.Vrint, InstEmit32.Vrint_RM, typeof(OpCode32SimdCvtFI));
|
||||
SetA32("<<<<11101x110110xxxx101x11x0xxxx", InstName.Vrint, InstEmit32.Vrint_Z, typeof(OpCode32SimdCvtFI));
|
||||
SetA32("1111001x1x>>>xxxxxxx0010>xx1xxxx", InstName.Vrshr, InstEmit32.Vrshr, typeof(OpCode32SimdShImm));
|
||||
SetA32("111100111x111011xxxx010x1xx0xxxx", InstName.Vrsqrte, InstEmit32.Vrsqrte, typeof(OpCode32SimdSqrte));
|
||||
SetA32("111100100x10xxxxxxxx1111xxx1xxxx", InstName.Vrsqrts, InstEmit32.Vrsqrts, typeof(OpCode32SimdReg));
|
||||
SetA32("111111100xxxxxxxxxxx101xx0x0xxxx", InstName.Vsel, InstEmit32.Vsel, typeof(OpCode32SimdSel));
|
||||
SetA32("111100101x>>>xxxxxxx0101>xx1xxxx", InstName.Vshl, InstEmit32.Vshl, typeof(OpCode32SimdShImm));
|
||||
SetA32("1111001x0xxxxxxxxxxx0100xxx0xxxx", InstName.Vshl, InstEmit32.Vshl_I, typeof(OpCode32SimdReg));
|
||||
SetA32("1111001x1x>>>xxxxxxx0000>xx1xxxx", InstName.Vshr, InstEmit32.Vshr, typeof(OpCode32SimdShImm));
|
||||
SetA32("111100101x>>>xxxxxxx100000x1xxx0", InstName.Vshrn, InstEmit32.Vshrn, typeof(OpCode32SimdShImm));
|
||||
SetA32("111100101x>>>xxxxxxx100000x1xxx0", InstName.Vshrn, InstEmit32.Vshrn, typeof(OpCode32SimdShImmNarrow));
|
||||
SetA32("<<<<11101x110001xxxx101x11x0xxxx", InstName.Vsqrt, InstEmit32.Vsqrt_S, typeof(OpCode32SimdS));
|
||||
SetA32("111101001x00xxxxxxxx<<00xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, typeof(OpCode32SimdMemSingle));
|
||||
SetA32("111101000x00xxxxxxxx0111xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, typeof(OpCode32SimdMemPair)); // Regs = 1.
|
||||
|
|
|
@ -128,8 +128,6 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
public static void Cmp(ArmEmitterContext context)
|
||||
{
|
||||
IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
|
||||
|
||||
Operand n = GetAluN(context);
|
||||
Operand m = GetAluM(context, setCarry: false);
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
using ARMeilleure.IntermediateRepresentation;
|
||||
using ARMeilleure.Translation;
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
|
||||
using static ARMeilleure.Instructions.InstEmitFlowHelper;
|
||||
using static ARMeilleure.Instructions.InstEmitHelper;
|
||||
|
@ -113,20 +114,13 @@ namespace ARMeilleure.Instructions
|
|||
Operand insert = GetIntA32(context, op.Rt);
|
||||
|
||||
// Zero extend into an I64, then replicate. Saves the most time over elementwise inserts.
|
||||
switch (op.Size)
|
||||
insert = op.Size switch
|
||||
{
|
||||
case 2:
|
||||
insert = context.Multiply(context.ZeroExtend32(OperandType.I64, insert), Const(0x0000000100000001u));
|
||||
break;
|
||||
case 1:
|
||||
insert = context.Multiply(context.ZeroExtend16(OperandType.I64, insert), Const(0x0001000100010001u));
|
||||
break;
|
||||
case 0:
|
||||
insert = context.Multiply(context.ZeroExtend8(OperandType.I64, insert), Const(0x0101010101010101u));
|
||||
break;
|
||||
default:
|
||||
throw new InvalidOperationException("Unknown Vdup Size.");
|
||||
}
|
||||
2 => context.Multiply(context.ZeroExtend32(OperandType.I64, insert), Const(0x0000000100000001u)),
|
||||
1 => context.Multiply(context.ZeroExtend16(OperandType.I64, insert), Const(0x0001000100010001u)),
|
||||
0 => context.Multiply(context.ZeroExtend8(OperandType.I64, insert), Const(0x0101010101010101u)),
|
||||
_ => throw new InvalidOperationException($"Invalid Vdup size \"{op.Size}\".")
|
||||
};
|
||||
|
||||
InsertScalar(context, op.Vd, insert);
|
||||
if (op.Q)
|
||||
|
@ -142,20 +136,13 @@ namespace ARMeilleure.Instructions
|
|||
Operand insert = EmitVectorExtractZx32(context, op.Vm >> 1, ((op.Vm & 1) << (3 - op.Size)) + op.Index, op.Size);
|
||||
|
||||
// Zero extend into an I64, then replicate. Saves the most time over elementwise inserts.
|
||||
switch (op.Size)
|
||||
insert = op.Size switch
|
||||
{
|
||||
case 2:
|
||||
insert = context.Multiply(context.ZeroExtend32(OperandType.I64, insert), Const(0x0000000100000001u));
|
||||
break;
|
||||
case 1:
|
||||
insert = context.Multiply(context.ZeroExtend16(OperandType.I64, insert), Const(0x0001000100010001u));
|
||||
break;
|
||||
case 0:
|
||||
insert = context.Multiply(context.ZeroExtend8(OperandType.I64, insert), Const(0x0101010101010101u));
|
||||
break;
|
||||
default:
|
||||
throw new InvalidOperationException("Unknown Vdup Size.");
|
||||
}
|
||||
2 => context.Multiply(context.ZeroExtend32(OperandType.I64, insert), Const(0x0000000100000001u)),
|
||||
1 => context.Multiply(context.ZeroExtend16(OperandType.I64, insert), Const(0x0001000100010001u)),
|
||||
0 => context.Multiply(context.ZeroExtend8(OperandType.I64, insert), Const(0x0101010101010101u)),
|
||||
_ => throw new InvalidOperationException($"Invalid Vdup size \"{op.Size}\".")
|
||||
};
|
||||
|
||||
InsertScalar(context, op.Vd, insert);
|
||||
if (op.Q)
|
||||
|
@ -575,75 +562,6 @@ namespace ARMeilleure.Instructions
|
|||
}
|
||||
}
|
||||
|
||||
public static void Vmul_S(ArmEmitterContext context)
|
||||
{
|
||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||
{
|
||||
EmitScalarBinaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd);
|
||||
}
|
||||
else if (Optimizations.FastFP)
|
||||
{
|
||||
EmitScalarBinaryOpF32(context, (op1, op2) => context.Multiply(op1, op2));
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitScalarBinaryOpF32(context, (op1, op2) =>
|
||||
{
|
||||
return EmitSoftFloatCall(context, SoftFloat32.FPMul, SoftFloat64.FPMul, op1, op2);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vmul_V(ArmEmitterContext context)
|
||||
{
|
||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||
{
|
||||
EmitVectorBinaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd);
|
||||
}
|
||||
else if (Optimizations.FastFP)
|
||||
{
|
||||
EmitVectorBinaryOpF32(context, (op1, op2) => context.Multiply(op1, op2));
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorBinaryOpF32(context, (op1, op2) =>
|
||||
{
|
||||
return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulFpscr, SoftFloat64.FPMulFpscr, op1, op2);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vmul_I(ArmEmitterContext context)
|
||||
{
|
||||
if ((context.CurrOp as OpCode32SimdReg).U) throw new NotImplementedException("Polynomial mode not implemented");
|
||||
EmitVectorBinaryOpSx32(context, (op1, op2) => context.Multiply(op1, op2));
|
||||
}
|
||||
|
||||
public static void Vmul_1(ArmEmitterContext context)
|
||||
{
|
||||
OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
|
||||
|
||||
if (op.F)
|
||||
{
|
||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||
{
|
||||
EmitVectorByScalarOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd);
|
||||
}
|
||||
else if (Optimizations.FastFP)
|
||||
{
|
||||
EmitVectorByScalarOpF32(context, (op1, op2) => context.Multiply(op1, op2));
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorByScalarOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulFpscr, SoftFloat64.FPMulFpscr, op1, op2));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorByScalarOpI32(context, (op1, op2) => context.Multiply(op1, op2), false);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vmla_S(ArmEmitterContext context)
|
||||
{
|
||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||
|
@ -786,6 +704,111 @@ namespace ARMeilleure.Instructions
|
|||
}
|
||||
}
|
||||
|
||||
public static void Vmlsl_I(ArmEmitterContext context)
|
||||
{
|
||||
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
|
||||
|
||||
EmitVectorTernaryLongOpI32(context, (opD, op1, op2) => context.Subtract(opD, context.Multiply(op1, op2)), !op.U);
|
||||
}
|
||||
|
||||
public static void Vmul_S(ArmEmitterContext context)
|
||||
{
|
||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||
{
|
||||
EmitScalarBinaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd);
|
||||
}
|
||||
else if (Optimizations.FastFP)
|
||||
{
|
||||
EmitScalarBinaryOpF32(context, (op1, op2) => context.Multiply(op1, op2));
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitScalarBinaryOpF32(context, (op1, op2) =>
|
||||
{
|
||||
return EmitSoftFloatCall(context, SoftFloat32.FPMul, SoftFloat64.FPMul, op1, op2);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vmul_V(ArmEmitterContext context)
|
||||
{
|
||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||
{
|
||||
EmitVectorBinaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd);
|
||||
}
|
||||
else if (Optimizations.FastFP)
|
||||
{
|
||||
EmitVectorBinaryOpF32(context, (op1, op2) => context.Multiply(op1, op2));
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorBinaryOpF32(context, (op1, op2) =>
|
||||
{
|
||||
return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulFpscr, SoftFloat64.FPMulFpscr, op1, op2);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vmul_I(ArmEmitterContext context)
|
||||
{
|
||||
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
|
||||
|
||||
if (op.U) // This instruction is always signed, U indicates polynomial mode.
|
||||
{
|
||||
EmitVectorBinaryOpZx32(context, (op1, op2) => EmitPolynomialMultiply(context, op1, op2, 8 << op.Size));
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorBinaryOpSx32(context, (op1, op2) => context.Multiply(op1, op2));
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vmul_1(ArmEmitterContext context)
|
||||
{
|
||||
OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
|
||||
|
||||
if (op.F)
|
||||
{
|
||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||
{
|
||||
EmitVectorByScalarOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd);
|
||||
}
|
||||
else if (Optimizations.FastFP)
|
||||
{
|
||||
EmitVectorByScalarOpF32(context, (op1, op2) => context.Multiply(op1, op2));
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorByScalarOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulFpscr, SoftFloat64.FPMulFpscr, op1, op2));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorByScalarOpI32(context, (op1, op2) => context.Multiply(op1, op2), false);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vmull_1(ArmEmitterContext context)
|
||||
{
|
||||
OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
|
||||
|
||||
EmitVectorByScalarLongOpI32(context, (op1, op2) => context.Multiply(op1, op2), !op.U);
|
||||
}
|
||||
|
||||
public static void Vmull_I(ArmEmitterContext context)
|
||||
{
|
||||
OpCode32SimdRegLong op = (OpCode32SimdRegLong)context.CurrOp;
|
||||
|
||||
if (op.Polynomial)
|
||||
{
|
||||
EmitVectorBinaryLongOpI32(context, (op1, op2) => EmitPolynomialMultiply(context, op1, op2, 8 << op.Size), false);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitVectorBinaryLongOpI32(context, (op1, op2) => context.Multiply(op1, op2), !op.U);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vpadd_V(ArmEmitterContext context)
|
||||
{
|
||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||
|
@ -1157,5 +1180,27 @@ namespace ARMeilleure.Instructions
|
|||
EmitVectorBinaryOpSimd32(context, genericEmit);
|
||||
}
|
||||
}
|
||||
|
||||
private static Operand EmitPolynomialMultiply(ArmEmitterContext context, Operand op1, Operand op2, int eSize)
|
||||
{
|
||||
Debug.Assert(eSize <= 32);
|
||||
|
||||
Operand result = eSize == 32 ? Const(0L) : Const(0);
|
||||
|
||||
if (eSize == 32)
|
||||
{
|
||||
op1 = context.ZeroExtend32(OperandType.I64, op1);
|
||||
op2 = context.ZeroExtend32(OperandType.I64, op2);
|
||||
}
|
||||
|
||||
for (int i = 0; i < eSize; i++)
|
||||
{
|
||||
Operand mask = context.BitwiseAnd(op1, Const(op1.Type, 1L << i));
|
||||
|
||||
result = context.BitwiseExclusiveOr(result, context.Multiply(op2, mask));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -57,7 +57,6 @@ namespace ARMeilleure.Instructions
|
|||
// From dreg.
|
||||
vec = GetVecA32(reg >> 1);
|
||||
insert = context.VectorInsert(vec, value, reg & 1);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -69,6 +68,11 @@ namespace ARMeilleure.Instructions
|
|||
context.Copy(vec, insert);
|
||||
}
|
||||
|
||||
public static Operand ExtractElement(ArmEmitterContext context, int reg, int size, bool signed)
|
||||
{
|
||||
return EmitVectorExtract32(context, reg >> (4 - size), reg & ((16 >> size) - 1), size, signed);
|
||||
}
|
||||
|
||||
public static void EmitVectorImmUnaryOp32(ArmEmitterContext context, Func1I emit)
|
||||
{
|
||||
IOpCode32SimdImm op = (IOpCode32SimdImm)context.CurrOp;
|
||||
|
@ -250,6 +254,57 @@ namespace ARMeilleure.Instructions
|
|||
context.Copy(GetVecA32(op.Qd), res);
|
||||
}
|
||||
|
||||
public static void EmitVectorBinaryLongOpI32(ArmEmitterContext context, Func2I emit, bool signed)
|
||||
{
|
||||
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
|
||||
|
||||
Operand res = context.VectorZero();
|
||||
|
||||
int elems = op.GetBytesCount() >> op.Size;
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
|
||||
Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
|
||||
|
||||
if (op.Size == 2)
|
||||
{
|
||||
ne = signed ? context.SignExtend32(OperandType.I64, ne) : context.ZeroExtend32(OperandType.I64, ne);
|
||||
me = signed ? context.SignExtend32(OperandType.I64, me) : context.ZeroExtend32(OperandType.I64, me);
|
||||
}
|
||||
|
||||
res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
|
||||
}
|
||||
|
||||
context.Copy(GetVecA32(op.Qd), res);
|
||||
}
|
||||
|
||||
public static void EmitVectorTernaryLongOpI32(ArmEmitterContext context, Func3I emit, bool signed)
|
||||
{
|
||||
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
|
||||
|
||||
Operand res = context.VectorZero();
|
||||
|
||||
int elems = op.GetBytesCount() >> op.Size;
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size + 1, signed);
|
||||
Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
|
||||
Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
|
||||
|
||||
if (op.Size == 2)
|
||||
{
|
||||
ne = signed ? context.SignExtend32(OperandType.I64, ne) : context.ZeroExtend32(OperandType.I64, ne);
|
||||
me = signed ? context.SignExtend32(OperandType.I64, me) : context.ZeroExtend32(OperandType.I64, me);
|
||||
}
|
||||
|
||||
res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1);
|
||||
}
|
||||
|
||||
context.Copy(GetVecA32(op.Qd), res);
|
||||
}
|
||||
|
||||
public static void EmitVectorTernaryOpI32(ArmEmitterContext context, Func3I emit, bool signed)
|
||||
{
|
||||
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
|
||||
|
@ -330,7 +385,7 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
|
||||
|
||||
Operand m = EmitVectorExtract32(context, op.Vm >> (4 - op.Size), op.Vm & ((1 << (4 - op.Size)) - 1), op.Size, signed);
|
||||
Operand m = ExtractElement(context, op.Vm, op.Size, signed);
|
||||
|
||||
Operand res = GetVecA32(op.Qd);
|
||||
|
||||
|
@ -340,7 +395,37 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
|
||||
|
||||
res = EmitVectorInsert(context, res, emit(ne, m), op.In + index, op.Size);
|
||||
res = EmitVectorInsert(context, res, emit(ne, m), op.Id + index, op.Size);
|
||||
}
|
||||
|
||||
context.Copy(GetVecA32(op.Qd), res);
|
||||
}
|
||||
|
||||
public static void EmitVectorByScalarLongOpI32(ArmEmitterContext context, Func2I emit, bool signed)
|
||||
{
|
||||
OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
|
||||
|
||||
Operand m = ExtractElement(context, op.Vm, op.Size, signed);
|
||||
|
||||
if (op.Size == 2)
|
||||
{
|
||||
m = signed ? context.SignExtend32(OperandType.I64, m) : context.ZeroExtend32(OperandType.I64, m);
|
||||
}
|
||||
|
||||
Operand res = context.VectorZero();
|
||||
|
||||
int elems = op.GetBytesCount() >> op.Size;
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
|
||||
|
||||
if (op.Size == 2)
|
||||
{
|
||||
ne = signed ? context.SignExtend32(OperandType.I64, ne) : context.ZeroExtend32(OperandType.I64, ne);
|
||||
}
|
||||
|
||||
res = EmitVectorInsert(context, res, emit(ne, m), index, op.Size + 1);
|
||||
}
|
||||
|
||||
context.Copy(GetVecA32(op.Qd), res);
|
||||
|
@ -454,7 +539,7 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
// Narrow
|
||||
|
||||
public static void EmitVectorUnaryNarrowOp32(ArmEmitterContext context, Func1I emit)
|
||||
public static void EmitVectorUnaryNarrowOp32(ArmEmitterContext context, Func1I emit, bool signed = false)
|
||||
{
|
||||
OpCode32Simd op = (OpCode32Simd)context.CurrOp;
|
||||
|
||||
|
@ -465,7 +550,7 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
Operand m = EmitVectorExtract32(context, op.Qm, index, op.Size + 1, false);
|
||||
Operand m = EmitVectorExtract32(context, op.Qm, index, op.Size + 1, signed);
|
||||
|
||||
res = EmitVectorInsert(context, res, emit(m), id + index, op.Size);
|
||||
}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
using ARMeilleure.Decoders;
|
||||
using ARMeilleure.IntermediateRepresentation;
|
||||
using ARMeilleure.State;
|
||||
using ARMeilleure.Translation;
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
|
@ -11,6 +12,78 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
static partial class InstEmit32
|
||||
{
|
||||
public static void Vqrshrn(ArmEmitterContext context)
|
||||
{
|
||||
OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
|
||||
|
||||
EmitRoundShrImmSaturatingNarrowOp(context, op.U ? ShrImmSaturatingNarrowFlags.VectorZxZx : ShrImmSaturatingNarrowFlags.VectorSxSx);
|
||||
}
|
||||
|
||||
public static void Vqrshrun(ArmEmitterContext context)
|
||||
{
|
||||
EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
|
||||
}
|
||||
|
||||
public static void Vrshr(ArmEmitterContext context)
|
||||
{
|
||||
OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
|
||||
int shift = GetImmShr(op);
|
||||
long roundConst = 1L << (shift - 1);
|
||||
|
||||
if (op.U)
|
||||
{
|
||||
if (op.Size < 2)
|
||||
{
|
||||
EmitVectorUnaryOpZx32(context, (op1) =>
|
||||
{
|
||||
op1 = context.Add(op1, Const(op1.Type, roundConst));
|
||||
|
||||
return context.ShiftRightUI(op1, Const(shift));
|
||||
});
|
||||
}
|
||||
else if (op.Size == 2)
|
||||
{
|
||||
EmitVectorUnaryOpZx32(context, (op1) =>
|
||||
{
|
||||
op1 = context.ZeroExtend32(OperandType.I64, op1);
|
||||
op1 = context.Add(op1, Const(op1.Type, roundConst));
|
||||
|
||||
return context.ConvertI64ToI32(context.ShiftRightUI(op1, Const(shift)));
|
||||
});
|
||||
}
|
||||
else /* if (op.Size == 3) */
|
||||
{
|
||||
EmitVectorUnaryOpZx32(context, (op1) => EmitShrImm64(context, op1, signed: false, roundConst, shift));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (op.Size < 2)
|
||||
{
|
||||
EmitVectorUnaryOpSx32(context, (op1) =>
|
||||
{
|
||||
op1 = context.Add(op1, Const(op1.Type, roundConst));
|
||||
|
||||
return context.ShiftRightSI(op1, Const(shift));
|
||||
});
|
||||
}
|
||||
else if (op.Size == 2)
|
||||
{
|
||||
EmitVectorUnaryOpSx32(context, (op1) =>
|
||||
{
|
||||
op1 = context.SignExtend32(OperandType.I64, op1);
|
||||
op1 = context.Add(op1, Const(op1.Type, roundConst));
|
||||
|
||||
return context.ConvertI64ToI32(context.ShiftRightSI(op1, Const(shift)));
|
||||
});
|
||||
}
|
||||
else /* if (op.Size == 3) */
|
||||
{
|
||||
EmitVectorUnaryOpZx32(context, (op1) => EmitShrImm64(context, op1, signed: true, roundConst, shift));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vshl(ArmEmitterContext context)
|
||||
{
|
||||
OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
|
||||
|
@ -35,7 +108,7 @@ namespace ARMeilleure.Instructions
|
|||
public static void Vshr(ArmEmitterContext context)
|
||||
{
|
||||
OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
|
||||
int shift = (8 << op.Size) - op.Shift; // Shr amount is flipped.
|
||||
int shift = GetImmShr(op);
|
||||
int maxShift = (8 << op.Size) - 1;
|
||||
|
||||
if (op.U)
|
||||
|
@ -51,7 +124,7 @@ namespace ARMeilleure.Instructions
|
|||
public static void Vshrn(ArmEmitterContext context)
|
||||
{
|
||||
OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
|
||||
int shift = (8 << op.Size) - op.Shift; // Shr amount is flipped.
|
||||
int shift = GetImmShr(op);
|
||||
|
||||
EmitVectorUnaryNarrowOp32(context, (op1) => context.ShiftRightUI(op1, Const(shift)));
|
||||
}
|
||||
|
@ -96,5 +169,110 @@ namespace ARMeilleure.Instructions
|
|||
return context.ConditionalSelect(isOutOfRange0, Const(op.Type, 0), context.ConditionalSelect(isOutOfRangeN, min, res));
|
||||
}
|
||||
}
|
||||
|
||||
[Flags]
|
||||
private enum ShrImmSaturatingNarrowFlags
|
||||
{
|
||||
Scalar = 1 << 0,
|
||||
SignedSrc = 1 << 1,
|
||||
SignedDst = 1 << 2,
|
||||
|
||||
Round = 1 << 3,
|
||||
|
||||
ScalarSxSx = Scalar | SignedSrc | SignedDst,
|
||||
ScalarSxZx = Scalar | SignedSrc,
|
||||
ScalarZxZx = Scalar,
|
||||
|
||||
VectorSxSx = SignedSrc | SignedDst,
|
||||
VectorSxZx = SignedSrc,
|
||||
VectorZxZx = 0
|
||||
}
|
||||
|
||||
private static void EmitRoundShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags)
|
||||
{
|
||||
EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.Round | flags);
|
||||
}
|
||||
|
||||
private static void EmitShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags)
|
||||
{
|
||||
OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
|
||||
|
||||
bool scalar = (flags & ShrImmSaturatingNarrowFlags.Scalar) != 0;
|
||||
bool signedSrc = (flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0;
|
||||
bool signedDst = (flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0;
|
||||
bool round = (flags & ShrImmSaturatingNarrowFlags.Round) != 0;
|
||||
|
||||
if (scalar)
|
||||
{
|
||||
// TODO: Support scalar operation.
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
int shift = GetImmShr(op);
|
||||
long roundConst = 1L << (shift - 1);
|
||||
|
||||
EmitVectorUnaryNarrowOp32(context, (op1) =>
|
||||
{
|
||||
if (op.Size <= 1 || !round)
|
||||
{
|
||||
if (round)
|
||||
{
|
||||
op1 = context.Add(op1, Const(op1.Type, roundConst));
|
||||
}
|
||||
|
||||
op1 = signedSrc ? context.ShiftRightSI(op1, Const(shift)) : context.ShiftRightUI(op1, Const(shift));
|
||||
}
|
||||
else /* if (op.Size == 2 && round) */
|
||||
{
|
||||
op1 = EmitShrImm64(context, op1, signedSrc, roundConst, shift); // shift <= 32
|
||||
}
|
||||
|
||||
return EmitSatQ(context, op1, 8 << op.Size, signedDst);
|
||||
}, signedSrc);
|
||||
}
|
||||
|
||||
private static int GetImmShr(OpCode32SimdShImm op)
|
||||
{
|
||||
return (8 << op.Size) - op.Shift; // Shr amount is flipped.
|
||||
}
|
||||
|
||||
// dst64 = (Int(src64, signed) + roundConst) >> shift;
|
||||
private static Operand EmitShrImm64(
|
||||
ArmEmitterContext context,
|
||||
Operand value,
|
||||
bool signed,
|
||||
long roundConst,
|
||||
int shift)
|
||||
{
|
||||
Delegate dlg = signed
|
||||
? (Delegate)new _S64_S64_S64_S32(SoftFallback.SignedShrImm64)
|
||||
: (Delegate)new _U64_U64_S64_S32(SoftFallback.UnsignedShrImm64);
|
||||
|
||||
return context.Call(dlg, value, Const(roundConst), Const(shift));
|
||||
}
|
||||
|
||||
private static Operand EmitSatQ(ArmEmitterContext context, Operand value, int eSize, bool signed)
|
||||
{
|
||||
Debug.Assert(eSize <= 32);
|
||||
|
||||
long intMin = signed ? -(1L << (eSize - 1)) : 0;
|
||||
long intMax = signed ? (1L << (eSize - 1)) - 1 : (1L << eSize) - 1;
|
||||
|
||||
Operand gt = context.ICompareGreater(value, Const(value.Type, intMax));
|
||||
Operand lt = context.ICompareLess(value, Const(value.Type, intMin));
|
||||
|
||||
value = context.ConditionalSelect(gt, Const(value.Type, intMax), value);
|
||||
value = context.ConditionalSelect(lt, Const(value.Type, intMin), value);
|
||||
|
||||
Operand lblNoSat = Label();
|
||||
|
||||
context.BranchIfFalse(lblNoSat, context.BitwiseOr(gt, lt));
|
||||
|
||||
// TODO: Set QC (to 1) on FPSCR here.
|
||||
|
||||
context.MarkLabel(lblNoSat);
|
||||
|
||||
return value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -560,12 +560,14 @@ namespace ARMeilleure.Instructions
|
|||
Vminnm,
|
||||
Vmla,
|
||||
Vmls,
|
||||
Vmlsl,
|
||||
Vmov,
|
||||
Vmovl,
|
||||
Vmovn,
|
||||
Vmrs,
|
||||
Vmsr,
|
||||
Vmul,
|
||||
Vmull,
|
||||
Vmvn,
|
||||
Vneg,
|
||||
Vnmul,
|
||||
|
@ -573,8 +575,11 @@ namespace ARMeilleure.Instructions
|
|||
Vnmls,
|
||||
Vorr,
|
||||
Vpadd,
|
||||
Vqrshrn,
|
||||
Vqrshrun,
|
||||
Vrev,
|
||||
Vrint,
|
||||
Vrshr,
|
||||
Vsel,
|
||||
Vshl,
|
||||
Vshr,
|
||||
|
|
|
@ -284,6 +284,78 @@ namespace Ryujinx.Tests.Cpu
|
|||
CompareAgainstUnicorn();
|
||||
}
|
||||
|
||||
[Test, Pairwise, Description("VMLSL.<type><size> <Vd>, <Vn>, <Vm>")]
|
||||
public void Vmlsl_I([Values(0u)] uint rd,
|
||||
[Values(1u, 0u)] uint rn,
|
||||
[Values(2u, 0u)] uint rm,
|
||||
[Values(0u, 1u, 2u)] uint size,
|
||||
[Random(RndCnt)] ulong z,
|
||||
[Random(RndCnt)] ulong a,
|
||||
[Random(RndCnt)] ulong b,
|
||||
[Values] bool u)
|
||||
{
|
||||
uint opcode = 0xf2800a00u; // VMLSL.S8 Q0, D0, D0
|
||||
|
||||
opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1);
|
||||
opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
|
||||
opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3);
|
||||
|
||||
opcode |= size << 20;
|
||||
|
||||
if (u)
|
||||
{
|
||||
opcode |= 1 << 24;
|
||||
}
|
||||
|
||||
V128 v0 = MakeVectorE0E1(z, z);
|
||||
V128 v1 = MakeVectorE0E1(a, z);
|
||||
V128 v2 = MakeVectorE0E1(b, z);
|
||||
|
||||
SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
|
||||
|
||||
CompareAgainstUnicorn();
|
||||
}
|
||||
|
||||
[Test, Pairwise, Description("VMULL.<size> <Vd>, <Vn>, <Vm>")]
|
||||
public void Vmull_I([Values(0u)] uint rd,
|
||||
[Values(1u, 0u)] uint rn,
|
||||
[Values(2u, 0u)] uint rm,
|
||||
[Values(0u, 1u, 2u)] uint size,
|
||||
[Random(RndCnt)] ulong z,
|
||||
[Random(RndCnt)] ulong a,
|
||||
[Random(RndCnt)] ulong b,
|
||||
[Values] bool op,
|
||||
[Values] bool u)
|
||||
{
|
||||
uint opcode = 0xf2800c00u; // VMULL.S8 Q0, D0, D0
|
||||
|
||||
opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1);
|
||||
opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
|
||||
opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3);
|
||||
|
||||
if (op)
|
||||
{
|
||||
opcode |= 1 << 9;
|
||||
size = 0;
|
||||
u = false;
|
||||
}
|
||||
|
||||
opcode |= size << 20;
|
||||
|
||||
if (u)
|
||||
{
|
||||
opcode |= 1 << 24;
|
||||
}
|
||||
|
||||
V128 v0 = MakeVectorE0E1(z, z);
|
||||
V128 v1 = MakeVectorE0E1(a, z);
|
||||
V128 v2 = MakeVectorE0E1(b, z);
|
||||
|
||||
SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
|
||||
|
||||
CompareAgainstUnicorn();
|
||||
}
|
||||
|
||||
[Test, Pairwise, Description("VSHL.<size> {<Vd>}, <Vm>, <Vn>")]
|
||||
public void Vshl([Values(0u)] uint rd,
|
||||
[Values(1u, 0u)] uint rn,
|
||||
|
|
81
Ryujinx.Tests/Cpu/CpuTestSimdRegElem32.cs
Normal file
81
Ryujinx.Tests/Cpu/CpuTestSimdRegElem32.cs
Normal file
|
@ -0,0 +1,81 @@
|
|||
#define SimdRegElem32
|
||||
|
||||
using ARMeilleure.State;
|
||||
using NUnit.Framework;
|
||||
using System;
|
||||
|
||||
namespace Ryujinx.Tests.Cpu
|
||||
{
|
||||
[Category("SimdRegElem32")]
|
||||
public sealed class CpuTestSimdRegElem32 : CpuTest32
|
||||
{
|
||||
#if SimdRegElem32
|
||||
private const int RndCnt = 2;
|
||||
|
||||
[Test, Pairwise, Description("VMUL.<size> {<Vd>}, <Vn>, <Vm>[<index>]")]
|
||||
public void Vmul_1I([Values(1u, 0u)] uint rd,
|
||||
[Values(1u, 0u)] uint rn,
|
||||
[Values(26u, 25u, 10u, 9u, 2u, 0u)] uint rm,
|
||||
[Values(1u, 2u)] uint size,
|
||||
[Random(RndCnt)] ulong z,
|
||||
[Random(RndCnt)] ulong a,
|
||||
[Random(RndCnt)] ulong b,
|
||||
[Values] bool q)
|
||||
{
|
||||
uint opcode = 0xf2900840u & ~(3u << 20); // VMUL.I16 D0, D0, D0[0]
|
||||
if (q)
|
||||
{
|
||||
opcode |= 1 << 24;
|
||||
rn <<= 1;
|
||||
rd <<= 1;
|
||||
}
|
||||
|
||||
opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1);
|
||||
opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
|
||||
opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3);
|
||||
|
||||
opcode |= size << 20;
|
||||
|
||||
V128 v0 = MakeVectorE0E1(z, z);
|
||||
V128 v1 = MakeVectorE0E1(a, z);
|
||||
V128 v2 = MakeVectorE0E1(b, z);
|
||||
|
||||
SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
|
||||
|
||||
CompareAgainstUnicorn();
|
||||
}
|
||||
|
||||
[Test, Pairwise, Description("VMULL.<size> <Vd>, <Vn>, <Vm>[<index>]")]
|
||||
public void Vmull_1([Values(2u, 0u)] uint rd,
|
||||
[Values(1u, 0u)] uint rn,
|
||||
[Values(26u, 25u, 10u, 9u, 2u, 0u)] uint rm,
|
||||
[Values(1u, 2u)] uint size,
|
||||
[Random(RndCnt)] ulong z,
|
||||
[Random(RndCnt)] ulong a,
|
||||
[Random(RndCnt)] ulong b,
|
||||
[Values] bool u)
|
||||
{
|
||||
uint opcode = 0xf2900a40u & ~(3u << 20); // VMULL.S16 Q0, D0, D0[0]
|
||||
|
||||
opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1);
|
||||
opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
|
||||
opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3);
|
||||
|
||||
opcode |= size << 20;
|
||||
|
||||
if (u)
|
||||
{
|
||||
opcode |= 1 << 24;
|
||||
}
|
||||
|
||||
V128 v0 = MakeVectorE0E1(z, z);
|
||||
V128 v1 = MakeVectorE0E1(a, z);
|
||||
V128 v2 = MakeVectorE0E1(b, z);
|
||||
|
||||
SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
|
||||
|
||||
CompareAgainstUnicorn();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
|
@ -11,6 +11,52 @@ namespace Ryujinx.Tests.Cpu
|
|||
#if SimdShImm32
|
||||
private const int RndCnt = 2;
|
||||
|
||||
[Test, Pairwise]
|
||||
public void Vrshr_Vshr_Imm([Values(0u)] uint rd,
|
||||
[Values(2u, 0u)] uint rm,
|
||||
[Values(0u, 1u, 2u, 3u)] uint size,
|
||||
[Random(RndCnt), Values(0u)] uint shiftImm,
|
||||
[Random(RndCnt)] ulong z,
|
||||
[Random(RndCnt)] ulong a,
|
||||
[Random(RndCnt)] ulong b,
|
||||
[Values] bool u,
|
||||
[Values] bool q,
|
||||
[Values] bool round)
|
||||
{
|
||||
uint opcode = 0xf2800010u; // VMOV.I32 D0, #0 (immediate value changes it into SHR)
|
||||
if (q)
|
||||
{
|
||||
opcode |= 1 << 6;
|
||||
rm <<= 1;
|
||||
rd <<= 1;
|
||||
}
|
||||
|
||||
if (round)
|
||||
{
|
||||
opcode |= 1 << 9; // Turn into VRSHR
|
||||
}
|
||||
|
||||
if (u)
|
||||
{
|
||||
opcode |= 1 << 24;
|
||||
}
|
||||
|
||||
uint imm = 1u << ((int)size + 3);
|
||||
imm |= shiftImm & (imm - 1);
|
||||
|
||||
opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1);
|
||||
opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
|
||||
opcode |= ((imm & 0x3f) << 16) | ((imm & 0x40) << 1);
|
||||
|
||||
V128 v0 = MakeVectorE0E1(z, z);
|
||||
V128 v1 = MakeVectorE0E1(a, z);
|
||||
V128 v2 = MakeVectorE0E1(b, z);
|
||||
|
||||
SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
|
||||
|
||||
CompareAgainstUnicorn();
|
||||
}
|
||||
|
||||
[Test, Pairwise, Description("VSHL.<size> {<Vd>}, <Vm>, #<imm>")]
|
||||
public void Vshl_Imm([Values(0u)] uint rd,
|
||||
[Values(2u, 0u)] uint rm,
|
||||
|
@ -45,47 +91,7 @@ namespace Ryujinx.Tests.Cpu
|
|||
CompareAgainstUnicorn();
|
||||
}
|
||||
|
||||
[Test, Pairwise, Description("VSHR.<size> {<Vd>}, <Vm>, #<imm>")]
|
||||
public void Vshr_Imm([Values(0u)] uint rd,
|
||||
[Values(2u, 0u)] uint rm,
|
||||
[Values(0u, 1u, 2u, 3u)] uint size,
|
||||
[Random(RndCnt), Values(0u)] uint shiftImm,
|
||||
[Random(RndCnt)] ulong z,
|
||||
[Random(RndCnt)] ulong a,
|
||||
[Random(RndCnt)] ulong b,
|
||||
[Values] bool u,
|
||||
[Values] bool q)
|
||||
{
|
||||
uint opcode = 0xf2800010u; // VMOV.I32 D0, #0 (immediate value changes it into SHR)
|
||||
if (q)
|
||||
{
|
||||
opcode |= 1 << 6;
|
||||
rm <<= 1;
|
||||
rd <<= 1;
|
||||
}
|
||||
|
||||
if (u)
|
||||
{
|
||||
opcode |= 1 << 24;
|
||||
}
|
||||
|
||||
uint imm = 1u << ((int)size + 3);
|
||||
imm |= shiftImm & (imm - 1);
|
||||
|
||||
opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1);
|
||||
opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
|
||||
opcode |= ((imm & 0x3f) << 16) | ((imm & 0x40) << 1);
|
||||
|
||||
V128 v0 = MakeVectorE0E1(z, z);
|
||||
V128 v1 = MakeVectorE0E1(a, z);
|
||||
V128 v2 = MakeVectorE0E1(b, z);
|
||||
|
||||
SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
|
||||
|
||||
CompareAgainstUnicorn();
|
||||
}
|
||||
|
||||
[Test, Pairwise, Description("VSHRN.<size> {<Vd>}, <Vm>, #<imm>")]
|
||||
[Test, Pairwise, Description("VSHRN.<size> <Vd>, <Vm>, #<imm>")]
|
||||
public void Vshrn_Imm([Values(0u, 1u)] uint rd,
|
||||
[Values(2u, 0u)] uint rm,
|
||||
[Values(0u, 1u, 2u)] uint size,
|
||||
|
@ -111,6 +117,66 @@ namespace Ryujinx.Tests.Cpu
|
|||
|
||||
CompareAgainstUnicorn();
|
||||
}
|
||||
|
||||
[Test, Pairwise, Description("VQRSHRN.<type><size> <Vd>, <Vm>, #<imm>")]
|
||||
public void Vqrshrn_Imm([Values(0u, 1u)] uint rd,
|
||||
[Values(2u, 0u)] uint rm,
|
||||
[Values(0u, 1u, 2u)] uint size,
|
||||
[Random(RndCnt), Values(0u)] uint shiftImm,
|
||||
[Random(RndCnt)] ulong z,
|
||||
[Random(RndCnt)] ulong a,
|
||||
[Random(RndCnt)] ulong b,
|
||||
[Values] bool u)
|
||||
{
|
||||
uint opcode = 0xf2800950u; // VORR.I16 Q0, #0 (immediate value changes it into QRSHRN)
|
||||
|
||||
uint imm = 1u << ((int)size + 3);
|
||||
imm |= shiftImm & (imm - 1);
|
||||
|
||||
opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1);
|
||||
opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
|
||||
opcode |= ((imm & 0x3f) << 16);
|
||||
|
||||
if (u)
|
||||
{
|
||||
opcode |= 1u << 24;
|
||||
}
|
||||
|
||||
V128 v0 = MakeVectorE0E1(z, z);
|
||||
V128 v1 = MakeVectorE0E1(a, z);
|
||||
V128 v2 = MakeVectorE0E1(b, z);
|
||||
|
||||
SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
|
||||
|
||||
CompareAgainstUnicorn();
|
||||
}
|
||||
|
||||
[Test, Pairwise, Description("VQRSHRUN.<type><size> <Vd>, <Vm>, #<imm>")]
|
||||
public void Vqrshrun_Imm([Values(0u, 1u)] uint rd,
|
||||
[Values(2u, 0u)] uint rm,
|
||||
[Values(0u, 1u, 2u)] uint size,
|
||||
[Random(RndCnt), Values(0u)] uint shiftImm,
|
||||
[Random(RndCnt)] ulong z,
|
||||
[Random(RndCnt)] ulong a,
|
||||
[Random(RndCnt)] ulong b)
|
||||
{
|
||||
uint opcode = 0xf3800850u; // VMOV.I16 Q0, #0x80 (immediate value changes it into QRSHRUN)
|
||||
|
||||
uint imm = 1u << ((int)size + 3);
|
||||
imm |= shiftImm & (imm - 1);
|
||||
|
||||
opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1);
|
||||
opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
|
||||
opcode |= ((imm & 0x3f) << 16);
|
||||
|
||||
V128 v0 = MakeVectorE0E1(z, z);
|
||||
V128 v1 = MakeVectorE0E1(a, z);
|
||||
V128 v2 = MakeVectorE0E1(b, z);
|
||||
|
||||
SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
|
||||
|
||||
CompareAgainstUnicorn();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue