From 7e98b0f6b21e81efce619f4d06c7144a74ddd6b5 Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Sun, 18 Nov 2018 03:41:16 +0100 Subject: [PATCH] Add Sse Opt. for S/Umax_V, S/Umin_V, S/Uaddw_V, S/Usubw_V, Fabs_S/V, Fneg_S/V Inst.; for Fcvtl_V, Fcvtn_V Inst.; and for Fcmp_S Inst.. Add/Improve other Sse Opt.. Add Tests. (#496) * Update CpuTest.cs * Update CpuTestSimd.cs * Update CpuTestSimdReg.cs * Update InstEmitSimdCmp.cs * Update SoftFloat.cs * Update InstEmitAluHelper.cs * Update InstEmitSimdArithmetic.cs * Update InstEmitSimdHelper.cs * Update VectorHelper.cs * Update InstEmitSimdCvt.cs * Update InstEmitSimdArithmetic.cs * Update CpuTestSimd.cs * Update InstEmitSimdArithmetic.cs * Update OpCodeTable.cs * Update InstEmitSimdArithmetic.cs * Update InstEmitSimdCmp.cs * Update InstEmitSimdCvt.cs * Update CpuTestSimd.cs * Update CpuTestSimdReg.cs * Create CpuTestSimdFcond.cs * Update OpCodeTable.cs * Update InstEmitSimdMove.cs * Update CpuTestSimdIns.cs * Create CpuTestSimdExt.cs * Nit. * Update PackageReference. --- Instructions/InstEmitAluHelper.cs | 29 +- Instructions/InstEmitSimdArithmetic.cs | 748 +++++++++++++++++++++++-- Instructions/InstEmitSimdCmp.cs | 261 ++++++--- Instructions/InstEmitSimdCvt.cs | 172 ++++-- Instructions/InstEmitSimdHelper.cs | 84 +-- Instructions/InstEmitSimdMove.cs | 121 ++-- Instructions/SoftFloat.cs | 74 +++ Instructions/VectorHelper.cs | 25 +- OpCodeTable.cs | 12 +- 9 files changed, 1214 insertions(+), 312 deletions(-) diff --git a/Instructions/InstEmitAluHelper.cs b/Instructions/InstEmitAluHelper.cs index 613dd23..97c5056 100644 --- a/Instructions/InstEmitAluHelper.cs +++ b/Instructions/InstEmitAluHelper.cs @@ -190,23 +190,32 @@ namespace ChocolArm64.Instructions } } - public static void EmitSetNzcv(ILEmitterCtx context, int nzcv) + public static void EmitSetNzcv(ILEmitterCtx context) { - context.EmitLdc_I4((nzcv >> 0) & 1); - + context.Emit(OpCodes.Dup); + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.And); context.EmitStflg((int)PState.VBit); - context.EmitLdc_I4((nzcv >> 1) & 1); - + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.Shr); + context.Emit(OpCodes.Dup); + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.And); context.EmitStflg((int)PState.CBit); - context.EmitLdc_I4((nzcv >> 2) & 1); - + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.Shr); + context.Emit(OpCodes.Dup); + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.And); context.EmitStflg((int)PState.ZBit); - context.EmitLdc_I4((nzcv >> 3) & 1); - + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.Shr); + context.Emit(OpCodes.Ldc_I4_1); + context.Emit(OpCodes.And); context.EmitStflg((int)PState.NBit); } } -} \ No newline at end of file +} diff --git a/Instructions/InstEmitSimdArithmetic.cs b/Instructions/InstEmitSimdArithmetic.cs index 5668bb6..c05e9f9 100644 --- a/Instructions/InstEmitSimdArithmetic.cs +++ b/Instructions/InstEmitSimdArithmetic.cs @@ -186,18 +186,101 @@ namespace ChocolArm64.Instructions public static void Fabs_S(ILEmitterCtx context) { - EmitScalarUnaryOpF(context, () => + if (Optimizations.UseSse2) { - EmitUnaryMathCall(context, nameof(Math.Abs)); - }); + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + if (op.Size == 0) + { + Type[] typesSsv = new Type[] { typeof(float) }; + Type[] typesAndNot = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdc_R4(-0f); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv)); + + context.EmitLdvec(op.Rn); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAndNot)); + + context.EmitStvec(op.Rd); + + EmitVectorZero32_128(context, op.Rd); + } + else /* if (op.Size == 1) */ + { + Type[] typesSsv = new Type[] { typeof(double) }; + Type[] typesAndNot = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdc_R8(-0d); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv)); + + EmitLdvecWithCastToDouble(context, op.Rn); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot)); + + EmitStvecWithCastFromDouble(context, op.Rd); + + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitScalarUnaryOpF(context, () => + { + EmitUnaryMathCall(context, nameof(Math.Abs)); + }); + } } public static void Fabs_V(ILEmitterCtx context) { - EmitVectorUnaryOpF(context, () => + if (Optimizations.UseSse2) { - EmitUnaryMathCall(context, nameof(Math.Abs)); - }); + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Type[] typesSav = new Type[] { typeof(float) }; + Type[] typesAndNot = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdc_R4(-0f); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav)); + + context.EmitLdvec(op.Rn); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAndNot)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else /* if (sizeF == 1) */ + { + Type[] typesSav = new Type[] { typeof(double) }; + Type[] typesAndNot = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdc_R8(-0d); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + EmitLdvecWithCastToDouble(context, op.Rn); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot)); + + EmitStvecWithCastFromDouble(context, op.Rd); + } + } + else + { + EmitVectorUnaryOpF(context, () => + { + EmitUnaryMathCall(context, nameof(Math.Abs)); + }); + } } public static void Fadd_S(ILEmitterCtx context) @@ -283,7 +366,7 @@ namespace ChocolArm64.Instructions } } - public static void Fmadd_S(ILEmitterCtx context) + public static void Fmadd_S(ILEmitterCtx context) // Fused. { if (Optimizations.FastFP && Optimizations.UseSse2) { @@ -450,22 +533,118 @@ namespace ChocolArm64.Instructions }); } - public static void Fmla_V(ILEmitterCtx context) + public static void Fmla_V(ILEmitterCtx context) // Fused. { - EmitVectorTernaryOpF(context, () => + if (Optimizations.FastFP && Optimizations.UseSse2) { - context.Emit(OpCodes.Mul); - context.Emit(OpCodes.Add); - }); + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Type[] typesMulAdd = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdvec(op.Rd); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulAdd)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Add), typesMulAdd)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else /* if (sizeF == 1) */ + { + Type[] typesMulAdd = new Type[] { typeof(Vector128), typeof(Vector128) }; + + EmitLdvecWithCastToDouble(context, op.Rd); + EmitLdvecWithCastToDouble(context, op.Rn); + EmitLdvecWithCastToDouble(context, op.Rm); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulAdd)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd)); + + EmitStvecWithCastFromDouble(context, op.Rd); + } + } + else + { + EmitVectorTernaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd)); + }); + } } - public static void Fmla_Ve(ILEmitterCtx context) + public static void Fmla_Ve(ILEmitterCtx context) // Fused. { - EmitVectorTernaryOpByElemF(context, () => + if (Optimizations.FastFP && Optimizations.UseSse2) { - context.Emit(OpCodes.Mul); - context.Emit(OpCodes.Add); - }); + OpCodeSimdRegElemF64 op = (OpCodeSimdRegElemF64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128), typeof(byte) }; + Type[] typesMulAdd = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdvec(op.Rd); + + context.EmitLdvec(op.Rn); + + context.EmitLdvec(op.Rm); + context.Emit(OpCodes.Dup); + + context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulAdd)); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Add), typesMulAdd)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else /* if (sizeF == 1) */ + { + Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128), typeof(byte) }; + Type[] typesMulAdd = new Type[] { typeof(Vector128), typeof(Vector128) }; + + EmitLdvecWithCastToDouble(context, op.Rd); + + EmitLdvecWithCastToDouble(context, op.Rn); + + EmitLdvecWithCastToDouble(context, op.Rm); + context.Emit(OpCodes.Dup); + + context.EmitLdc_I4(op.Index | op.Index << 1); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulAdd)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd)); + + EmitStvecWithCastFromDouble(context, op.Rd); + } + } + else + { + EmitVectorTernaryOpByElemF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd)); + }); + } } public static void Fmls_Se(ILEmitterCtx context) @@ -477,25 +656,121 @@ namespace ChocolArm64.Instructions }); } - public static void Fmls_V(ILEmitterCtx context) + public static void Fmls_V(ILEmitterCtx context) // Fused. { - EmitVectorTernaryOpF(context, () => + if (Optimizations.FastFP && Optimizations.UseSse2) { - context.Emit(OpCodes.Mul); - context.Emit(OpCodes.Sub); - }); + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Type[] typesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdvec(op.Rd); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulSub)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesMulSub)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else /* if (sizeF == 1) */ + { + Type[] typesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; + + EmitLdvecWithCastToDouble(context, op.Rd); + EmitLdvecWithCastToDouble(context, op.Rn); + EmitLdvecWithCastToDouble(context, op.Rm); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub)); + + EmitStvecWithCastFromDouble(context, op.Rd); + } + } + else + { + EmitVectorTernaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub)); + }); + } } - public static void Fmls_Ve(ILEmitterCtx context) + public static void Fmls_Ve(ILEmitterCtx context) // Fused. { - EmitVectorTernaryOpByElemF(context, () => + if (Optimizations.FastFP && Optimizations.UseSse2) { - context.Emit(OpCodes.Mul); - context.Emit(OpCodes.Sub); - }); + OpCodeSimdRegElemF64 op = (OpCodeSimdRegElemF64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128), typeof(byte) }; + Type[] typesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdvec(op.Rd); + + context.EmitLdvec(op.Rn); + + context.EmitLdvec(op.Rm); + context.Emit(OpCodes.Dup); + + context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulSub)); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesMulSub)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else /* if (sizeF == 1) */ + { + Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128), typeof(byte) }; + Type[] typesMulSub = new Type[] { typeof(Vector128), typeof(Vector128) }; + + EmitLdvecWithCastToDouble(context, op.Rd); + + EmitLdvecWithCastToDouble(context, op.Rn); + + EmitLdvecWithCastToDouble(context, op.Rm); + context.Emit(OpCodes.Dup); + + context.EmitLdc_I4(op.Index | op.Index << 1); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub)); + + EmitStvecWithCastFromDouble(context, op.Rd); + } + } + else + { + EmitVectorTernaryOpByElemF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub)); + }); + } } - public static void Fmsub_S(ILEmitterCtx context) + public static void Fmsub_S(ILEmitterCtx context) // Fused. { if (Optimizations.FastFP && Optimizations.UseSse2) { @@ -580,7 +855,59 @@ namespace ChocolArm64.Instructions public static void Fmul_Ve(ILEmitterCtx context) { - EmitVectorBinaryOpByElemF(context, () => context.Emit(OpCodes.Mul)); + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdRegElemF64 op = (OpCodeSimdRegElemF64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128), typeof(byte) }; + Type[] typesMul = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdvec(op.Rn); + + context.EmitLdvec(op.Rm); + context.Emit(OpCodes.Dup); + + context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMul)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else /* if (sizeF == 1) */ + { + Type[] typesSfl = new Type[] { typeof(Vector128), typeof(Vector128), typeof(byte) }; + Type[] typesMul = new Type[] { typeof(Vector128), typeof(Vector128) }; + + EmitLdvecWithCastToDouble(context, op.Rn); + + EmitLdvecWithCastToDouble(context, op.Rm); + context.Emit(OpCodes.Dup); + + context.EmitLdc_I4(op.Index | op.Index << 1); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMul)); + + EmitStvecWithCastFromDouble(context, op.Rd); + } + } + else + { + EmitVectorBinaryOpByElemF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul)); + }); + } } public static void Fmulx_S(ILEmitterCtx context) @@ -617,12 +944,95 @@ namespace ChocolArm64.Instructions public static void Fneg_S(ILEmitterCtx context) { - EmitScalarUnaryOpF(context, () => context.Emit(OpCodes.Neg)); + if (Optimizations.UseSse2) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + if (op.Size == 0) + { + Type[] typesSsv = new Type[] { typeof(float) }; + Type[] typesXor = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdc_R4(-0f); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv)); + + context.EmitLdvec(op.Rn); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Xor), typesXor)); + + context.EmitStvec(op.Rd); + + EmitVectorZero32_128(context, op.Rd); + } + else /* if (op.Size == 1) */ + { + Type[] typesSsv = new Type[] { typeof(double) }; + Type[] typesXor = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdc_R8(-0d); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv)); + + EmitLdvecWithCastToDouble(context, op.Rn); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXor)); + + EmitStvecWithCastFromDouble(context, op.Rd); + + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitScalarUnaryOpF(context, () => context.Emit(OpCodes.Neg)); + } } public static void Fneg_V(ILEmitterCtx context) { - EmitVectorUnaryOpF(context, () => context.Emit(OpCodes.Neg)); + if (Optimizations.UseSse2) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Type[] typesSav = new Type[] { typeof(float) }; + Type[] typesXor = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdc_R4(-0f); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav)); + + context.EmitLdvec(op.Rn); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Xor), typesXor)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else /* if (sizeF == 1) */ + { + Type[] typesSav = new Type[] { typeof(double) }; + Type[] typesXor = new Type[] { typeof(Vector128), typeof(Vector128) }; + + context.EmitLdc_R8(-0d); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + EmitLdvecWithCastToDouble(context, op.Rn); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXor)); + + EmitStvecWithCastFromDouble(context, op.Rd); + } + } + else + { + EmitVectorUnaryOpF(context, () => context.Emit(OpCodes.Neg)); + } } public static void Fnmadd_S(ILEmitterCtx context) @@ -689,7 +1099,7 @@ namespace ChocolArm64.Instructions }); } - public static void Frecps_S(ILEmitterCtx context) + public static void Frecps_S(ILEmitterCtx context) // Fused. { if (Optimizations.FastFP && Optimizations.UseSse2) { @@ -743,7 +1153,7 @@ namespace ChocolArm64.Instructions } } - public static void Frecps_V(ILEmitterCtx context) + public static void Frecps_V(ILEmitterCtx context) // Fused. { if (Optimizations.FastFP && Optimizations.UseSse2) { @@ -986,7 +1396,7 @@ namespace ChocolArm64.Instructions }); } - public static void Frsqrts_S(ILEmitterCtx context) + public static void Frsqrts_S(ILEmitterCtx context) // Fused. { if (Optimizations.FastFP && Optimizations.UseSse2) { @@ -1048,7 +1458,7 @@ namespace ChocolArm64.Instructions } } - public static void Frsqrts_V(ILEmitterCtx context) + public static void Frsqrts_V(ILEmitterCtx context) // Fused. { if (Optimizations.FastFP && Optimizations.UseSse2) { @@ -1310,7 +1720,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); - EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + EmitLdvecWithSignedCast(context, op.Rm, op.Size); context.EmitLdc_I4(numBytes); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); @@ -1334,7 +1744,38 @@ namespace ChocolArm64.Instructions public static void Saddw_V(ILEmitterCtx context) { - EmitVectorWidenRmBinaryOpSx(context, () => context.Emit(OpCodes.Add)); + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + Type[] typesSrl = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesCvt = new Type[] { VectorIntTypesPerSizeLog2[op.Size] }; + Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1], + VectorIntTypesPerSizeLog2[op.Size + 1] }; + + string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16), + nameof(Sse41.ConvertToVector128Int32), + nameof(Sse41.ConvertToVector128Int64) }; + + int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; + + EmitLdvecWithSignedCast(context, op.Rn, op.Size + 1); + + EmitLdvecWithSignedCast(context, op.Rm, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size + 1); + } + else + { + EmitVectorWidenRmBinaryOpSx(context, () => context.Emit(OpCodes.Add)); + } } public static void Shadd_V(ILEmitterCtx context) @@ -1439,11 +1880,34 @@ namespace ChocolArm64.Instructions public static void Smax_V(ILEmitterCtx context) { - Type[] types = new Type[] { typeof(long), typeof(long) }; + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; - MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Max), types); + Type[] typesMax = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; - EmitVectorBinaryOpSx(context, () => context.EmitCall(mthdInfo)); + Type typeSse = op.Size == 1 ? typeof(Sse2) : typeof(Sse41); + + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + EmitLdvecWithSignedCast(context, op.Rm, op.Size); + + context.EmitCall(typeSse.GetMethod(nameof(Sse2.Max), typesMax)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + Type[] types = new Type[] { typeof(long), typeof(long) }; + + MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Max), types); + + EmitVectorBinaryOpSx(context, () => context.EmitCall(mthdInfo)); + } } public static void Smaxp_V(ILEmitterCtx context) @@ -1457,11 +1921,34 @@ namespace ChocolArm64.Instructions public static void Smin_V(ILEmitterCtx context) { - Type[] types = new Type[] { typeof(long), typeof(long) }; + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; - MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Min), types); + Type[] typesMin = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; - EmitVectorBinaryOpSx(context, () => context.EmitCall(mthdInfo)); + Type typeSse = op.Size == 1 ? typeof(Sse2) : typeof(Sse41); + + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + EmitLdvecWithSignedCast(context, op.Rm, op.Size); + + context.EmitCall(typeSse.GetMethod(nameof(Sse2.Min), typesMin)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + Type[] types = new Type[] { typeof(long), typeof(long) }; + + MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Min), types); + + EmitVectorBinaryOpSx(context, () => context.EmitCall(mthdInfo)); + } } public static void Sminp_V(ILEmitterCtx context) @@ -1484,7 +1971,7 @@ namespace ChocolArm64.Instructions Type[] typesMulAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1], VectorIntTypesPerSizeLog2[op.Size + 1] }; - Type typeMul = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); string nameCvt = op.Size == 0 ? nameof(Sse41.ConvertToVector128Int16) @@ -1508,7 +1995,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); - context.EmitCall(typeMul.GetMethod(nameof(Sse2.MultiplyLow), typesMulAdd)); + context.EmitCall(typeSse.GetMethod(nameof(Sse2.MultiplyLow), typesMulAdd)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd)); @@ -1535,7 +2022,7 @@ namespace ChocolArm64.Instructions Type[] typesMulSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1], VectorIntTypesPerSizeLog2[op.Size + 1] }; - Type typeMul = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); string nameCvt = op.Size == 0 ? nameof(Sse41.ConvertToVector128Int16) @@ -1559,7 +2046,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); - context.EmitCall(typeMul.GetMethod(nameof(Sse2.MultiplyLow), typesMulSub)); + context.EmitCall(typeSse.GetMethod(nameof(Sse2.MultiplyLow), typesMulSub)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub)); @@ -1735,7 +2222,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); - EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + EmitLdvecWithSignedCast(context, op.Rm, op.Size); context.EmitLdc_I4(numBytes); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); @@ -1754,7 +2241,38 @@ namespace ChocolArm64.Instructions public static void Ssubw_V(ILEmitterCtx context) { - EmitVectorWidenRmBinaryOpSx(context, () => context.Emit(OpCodes.Sub)); + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + Type[] typesSrl = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesCvt = new Type[] { VectorIntTypesPerSizeLog2[op.Size] }; + Type[] typesSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1], + VectorIntTypesPerSizeLog2[op.Size + 1] }; + + string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16), + nameof(Sse41.ConvertToVector128Int32), + nameof(Sse41.ConvertToVector128Int64) }; + + int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; + + EmitLdvecWithSignedCast(context, op.Rn, op.Size + 1); + + EmitLdvecWithSignedCast(context, op.Rm, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSub)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size + 1); + } + else + { + EmitVectorWidenRmBinaryOpSx(context, () => context.Emit(OpCodes.Sub)); + } } public static void Sub_S(ILEmitterCtx context) @@ -1901,7 +2419,38 @@ namespace ChocolArm64.Instructions public static void Uaddw_V(ILEmitterCtx context) { - EmitVectorWidenRmBinaryOpZx(context, () => context.Emit(OpCodes.Add)); + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesCvt = new Type[] { VectorUIntTypesPerSizeLog2[op.Size] }; + Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], + VectorUIntTypesPerSizeLog2[op.Size + 1] }; + + string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16), + nameof(Sse41.ConvertToVector128Int32), + nameof(Sse41.ConvertToVector128Int64) }; + + int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size + 1); + + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1); + } + else + { + EmitVectorWidenRmBinaryOpZx(context, () => context.Emit(OpCodes.Add)); + } } public static void Uhadd_V(ILEmitterCtx context) @@ -1992,11 +2541,34 @@ namespace ChocolArm64.Instructions public static void Umax_V(ILEmitterCtx context) { - Type[] types = new Type[] { typeof(ulong), typeof(ulong) }; + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; - MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Max), types); + Type[] typesMax = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; - EmitVectorBinaryOpZx(context, () => context.EmitCall(mthdInfo)); + Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.EmitCall(typeSse.GetMethod(nameof(Sse2.Max), typesMax)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + Type[] types = new Type[] { typeof(ulong), typeof(ulong) }; + + MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Max), types); + + EmitVectorBinaryOpZx(context, () => context.EmitCall(mthdInfo)); + } } public static void Umaxp_V(ILEmitterCtx context) @@ -2010,11 +2582,34 @@ namespace ChocolArm64.Instructions public static void Umin_V(ILEmitterCtx context) { - Type[] types = new Type[] { typeof(ulong), typeof(ulong) }; + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; - MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Min), types); + Type[] typesMin = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; - EmitVectorBinaryOpZx(context, () => context.EmitCall(mthdInfo)); + Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.EmitCall(typeSse.GetMethod(nameof(Sse2.Min), typesMin)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + Type[] types = new Type[] { typeof(ulong), typeof(ulong) }; + + MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Min), types); + + EmitVectorBinaryOpZx(context, () => context.EmitCall(mthdInfo)); + } } public static void Uminp_V(ILEmitterCtx context) @@ -2037,7 +2632,7 @@ namespace ChocolArm64.Instructions Type[] typesMulAdd = new Type[] { VectorIntTypesPerSizeLog2 [op.Size + 1], VectorIntTypesPerSizeLog2 [op.Size + 1] }; - Type typeMul = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); string nameCvt = op.Size == 0 ? nameof(Sse41.ConvertToVector128Int16) @@ -2061,7 +2656,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); - context.EmitCall(typeMul.GetMethod(nameof(Sse2.MultiplyLow), typesMulAdd)); + context.EmitCall(typeSse.GetMethod(nameof(Sse2.MultiplyLow), typesMulAdd)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd)); @@ -2088,7 +2683,7 @@ namespace ChocolArm64.Instructions Type[] typesMulSub = new Type[] { VectorIntTypesPerSizeLog2 [op.Size + 1], VectorIntTypesPerSizeLog2 [op.Size + 1] }; - Type typeMul = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); string nameCvt = op.Size == 0 ? nameof(Sse41.ConvertToVector128Int16) @@ -2112,7 +2707,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); - context.EmitCall(typeMul.GetMethod(nameof(Sse2.MultiplyLow), typesMulSub)); + context.EmitCall(typeSse.GetMethod(nameof(Sse2.MultiplyLow), typesMulSub)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub)); @@ -2251,7 +2846,38 @@ namespace ChocolArm64.Instructions public static void Usubw_V(ILEmitterCtx context) { - EmitVectorWidenRmBinaryOpZx(context, () => context.Emit(OpCodes.Sub)); + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesCvt = new Type[] { VectorUIntTypesPerSizeLog2[op.Size] }; + Type[] typesSub = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], + VectorUIntTypesPerSizeLog2[op.Size + 1] }; + + string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16), + nameof(Sse41.ConvertToVector128Int32), + nameof(Sse41.ConvertToVector128Int64) }; + + int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size + 1); + + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSub)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1); + } + else + { + EmitVectorWidenRmBinaryOpZx(context, () => context.Emit(OpCodes.Sub)); + } } private static void EmitAbs(ILEmitterCtx context) diff --git a/Instructions/InstEmitSimdCmp.cs b/Instructions/InstEmitSimdCmp.cs index c473c0a..3ee2548 100644 --- a/Instructions/InstEmitSimdCmp.cs +++ b/Instructions/InstEmitSimdCmp.cs @@ -3,6 +3,7 @@ using ChocolArm64.State; using ChocolArm64.Translation; using System; using System.Reflection.Emit; +using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; using static ChocolArm64.Instructions.InstEmitAluHelper; @@ -137,26 +138,43 @@ namespace ChocolArm64.Instructions context.EmitCondBranch(lblTrue, op.Cond); - EmitSetNzcv(context, op.Nzcv); + context.EmitLdc_I4(op.Nzcv); + EmitSetNzcv(context); context.Emit(OpCodes.Br, lblEnd); context.MarkLabel(lblTrue); - Fcmp_S(context); + EmitFcmpE(context, signalNaNs: false); context.MarkLabel(lblEnd); } public static void Fccmpe_S(ILEmitterCtx context) { - Fccmp_S(context); + OpCodeSimdFcond64 op = (OpCodeSimdFcond64)context.CurrOp; + + ILLabel lblTrue = new ILLabel(); + ILLabel lblEnd = new ILLabel(); + + context.EmitCondBranch(lblTrue, op.Cond); + + context.EmitLdc_I4(op.Nzcv); + EmitSetNzcv(context); + + context.Emit(OpCodes.Br, lblEnd); + + context.MarkLabel(lblTrue); + + EmitFcmpE(context, signalNaNs: true); + + context.MarkLabel(lblEnd); } public static void Fcmeq_S(ILEmitterCtx context) { if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse - && Optimizations.UseSse2) + && Optimizations.UseSse2) { EmitScalarSseOrSse2OpF(context, nameof(Sse.CompareEqualScalar)); } @@ -169,7 +187,7 @@ namespace ChocolArm64.Instructions public static void Fcmeq_V(ILEmitterCtx context) { if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse - && Optimizations.UseSse2) + && Optimizations.UseSse2) { EmitVectorSseOrSse2OpF(context, nameof(Sse.CompareEqual)); } @@ -182,7 +200,7 @@ namespace ChocolArm64.Instructions public static void Fcmge_S(ILEmitterCtx context) { if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse - && Optimizations.UseSse2) + && Optimizations.UseSse2) { EmitScalarSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanOrEqualScalar)); } @@ -195,7 +213,7 @@ namespace ChocolArm64.Instructions public static void Fcmge_V(ILEmitterCtx context) { if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse - && Optimizations.UseSse2) + && Optimizations.UseSse2) { EmitVectorSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanOrEqual)); } @@ -208,7 +226,7 @@ namespace ChocolArm64.Instructions public static void Fcmgt_S(ILEmitterCtx context) { if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse - && Optimizations.UseSse2) + && Optimizations.UseSse2) { EmitScalarSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanScalar)); } @@ -221,7 +239,7 @@ namespace ChocolArm64.Instructions public static void Fcmgt_V(ILEmitterCtx context) { if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse - && Optimizations.UseSse2) + && Optimizations.UseSse2) { EmitVectorSseOrSse2OpF(context, nameof(Sse.CompareGreaterThan)); } @@ -252,31 +270,157 @@ namespace ChocolArm64.Instructions } public static void Fcmp_S(ILEmitterCtx context) + { + EmitFcmpE(context, signalNaNs: false); + } + + public static void Fcmpe_S(ILEmitterCtx context) + { + EmitFcmpE(context, signalNaNs: true); + } + + private static void EmitFcmpE(ILEmitterCtx context, bool signalNaNs) { OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; bool cmpWithZero = !(op is OpCodeSimdFcond64) ? op.Bit3 : false; - //Handle NaN case. - //If any number is NaN, then NZCV = 0011. - if (cmpWithZero) + if (Optimizations.FastFP && Optimizations.UseSse2) { - EmitNaNCheck(context, op.Rn); + if (op.Size == 0) + { + Type[] typesCmp = new Type[] { typeof(Vector128), typeof(Vector128) }; + + ILLabel lblNaN = new ILLabel(); + ILLabel lblEnd = new ILLabel(); + + context.EmitLdvec(op.Rn); + + context.Emit(OpCodes.Dup); + context.EmitStvectmp(); + + if (cmpWithZero) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); + } + else + { + context.EmitLdvec(op.Rm); + } + + context.Emit(OpCodes.Dup); + context.EmitStvectmp2(); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareOrderedScalar), typesCmp)); + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareEqualOrderedScalar), typesCmp)); + + context.Emit(OpCodes.Brtrue_S, lblNaN); + + context.EmitLdc_I4(0); + + context.EmitLdvectmp(); + context.EmitLdvectmp2(); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareGreaterThanOrEqualOrderedScalar), typesCmp)); + + context.EmitLdvectmp(); + context.EmitLdvectmp2(); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareEqualOrderedScalar), typesCmp)); + + context.EmitLdvectmp(); + context.EmitLdvectmp2(); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.CompareLessThanOrderedScalar), typesCmp)); + + context.EmitStflg((int)PState.NBit); + context.EmitStflg((int)PState.ZBit); + context.EmitStflg((int)PState.CBit); + context.EmitStflg((int)PState.VBit); + + context.Emit(OpCodes.Br_S, lblEnd); + + context.MarkLabel(lblNaN); + + context.EmitLdc_I4(1); + context.Emit(OpCodes.Dup); + context.EmitLdc_I4(0); + context.Emit(OpCodes.Dup); + + context.EmitStflg((int)PState.NBit); + context.EmitStflg((int)PState.ZBit); + context.EmitStflg((int)PState.CBit); + context.EmitStflg((int)PState.VBit); + + context.MarkLabel(lblEnd); + } + else /* if (op.Size == 1) */ + { + Type[] typesCmp = new Type[] { typeof(Vector128), typeof(Vector128) }; + + ILLabel lblNaN = new ILLabel(); + ILLabel lblEnd = new ILLabel(); + + EmitLdvecWithCastToDouble(context, op.Rn); + + context.Emit(OpCodes.Dup); + context.EmitStvectmp(); + + if (cmpWithZero) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleZero)); + } + else + { + EmitLdvecWithCastToDouble(context, op.Rm); + } + + context.Emit(OpCodes.Dup); + context.EmitStvectmp2(); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareOrderedScalar), typesCmp)); + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleZero)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareEqualOrderedScalar), typesCmp)); + + context.Emit(OpCodes.Brtrue_S, lblNaN); + + context.EmitLdc_I4(0); + + context.EmitLdvectmp(); + context.EmitLdvectmp2(); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThanOrEqualOrderedScalar), typesCmp)); + + context.EmitLdvectmp(); + context.EmitLdvectmp2(); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareEqualOrderedScalar), typesCmp)); + + context.EmitLdvectmp(); + context.EmitLdvectmp2(); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareLessThanOrderedScalar), typesCmp)); + + context.EmitStflg((int)PState.NBit); + context.EmitStflg((int)PState.ZBit); + context.EmitStflg((int)PState.CBit); + context.EmitStflg((int)PState.VBit); + + context.Emit(OpCodes.Br_S, lblEnd); + + context.MarkLabel(lblNaN); + + context.EmitLdc_I4(1); + context.Emit(OpCodes.Dup); + context.EmitLdc_I4(0); + context.Emit(OpCodes.Dup); + + context.EmitStflg((int)PState.NBit); + context.EmitStflg((int)PState.ZBit); + context.EmitStflg((int)PState.CBit); + context.EmitStflg((int)PState.VBit); + + context.MarkLabel(lblEnd); + } } else - { - EmitNaNCheck(context, op.Rn); - EmitNaNCheck(context, op.Rm); - - context.Emit(OpCodes.Or); - } - - ILLabel lblNaN = new ILLabel(); - ILLabel lblEnd = new ILLabel(); - - context.Emit(OpCodes.Brtrue_S, lblNaN); - - void EmitLoadOpers() { EmitVectorExtractF(context, op.Rn, 0, op.Size); @@ -286,7 +430,7 @@ namespace ChocolArm64.Instructions { context.EmitLdc_R4(0f); } - else /* if (Op.Size == 1) */ + else // if (op.Size == 1) { context.EmitLdc_R8(0d); } @@ -295,67 +439,12 @@ namespace ChocolArm64.Instructions { EmitVectorExtractF(context, op.Rm, 0, op.Size); } - } - //Z = Rn == Rm - EmitLoadOpers(); + context.EmitLdc_I4(!signalNaNs ? 0 : 1); - context.Emit(OpCodes.Ceq); - context.Emit(OpCodes.Dup); + EmitSoftFloatCall(context, nameof(SoftFloat32.FPCompare)); - context.EmitStflg((int)PState.ZBit); - - //C = Rn >= Rm - EmitLoadOpers(); - - context.Emit(OpCodes.Cgt); - context.Emit(OpCodes.Or); - - context.EmitStflg((int)PState.CBit); - - //N = Rn < Rm - EmitLoadOpers(); - - context.Emit(OpCodes.Clt); - - context.EmitStflg((int)PState.NBit); - - //V = 0 - context.EmitLdc_I4(0); - - context.EmitStflg((int)PState.VBit); - - context.Emit(OpCodes.Br_S, lblEnd); - - context.MarkLabel(lblNaN); - - EmitSetNzcv(context, 0b0011); - - context.MarkLabel(lblEnd); - } - - public static void Fcmpe_S(ILEmitterCtx context) - { - Fcmp_S(context); - } - - private static void EmitNaNCheck(ILEmitterCtx context, int reg) - { - IOpCodeSimd64 op = (IOpCodeSimd64)context.CurrOp; - - EmitVectorExtractF(context, reg, 0, op.Size); - - if (op.Size == 0) - { - context.EmitCall(typeof(float), nameof(float.IsNaN)); - } - else if (op.Size == 1) - { - context.EmitCall(typeof(double), nameof(double.IsNaN)); - } - else - { - throw new InvalidOperationException(); + EmitSetNzcv(context); } } @@ -486,7 +575,7 @@ namespace ChocolArm64.Instructions { context.EmitLdc_R4(0f); } - else /* if (SizeF == 1) */ + else /* if (sizeF == 1) */ { context.EmitLdc_R8(0d); } diff --git a/Instructions/InstEmitSimdCvt.cs b/Instructions/InstEmitSimdCvt.cs index 45f2bef..fe8722a 100644 --- a/Instructions/InstEmitSimdCvt.cs +++ b/Instructions/InstEmitSimdCvt.cs @@ -76,33 +76,54 @@ namespace ChocolArm64.Instructions int sizeF = op.Size & 1; - int elems = 4 >> sizeF; - - int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; - - for (int index = 0; index < elems; index++) + if (Optimizations.UseSse2 && sizeF == 1) { - if (sizeF == 0) - { - EmitVectorExtractZx(context, op.Rn, part + index, 1); - context.Emit(OpCodes.Conv_U2); + Type[] typesMov = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesCvt = new Type[] { typeof(Vector128) }; - context.EmitLdarg(TranslatedSub.StateArgIdx); + string nameMov = op.RegisterSize == RegisterSize.Simd128 + ? nameof(Sse.MoveHighToLow) + : nameof(Sse.MoveLowToHigh); - context.EmitCall(typeof(SoftFloat16_32), nameof(SoftFloat16_32.FPConvert)); - } - else /* if (sizeF == 1) */ - { - EmitVectorExtractF(context, op.Rn, part + index, 0); + context.EmitLdvec(op.Rn); + context.Emit(OpCodes.Dup); - context.Emit(OpCodes.Conv_R8); - } + context.EmitCall(typeof(Sse).GetMethod(nameMov, typesMov)); - EmitVectorInsertTmpF(context, index, sizeF); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Double), typesCvt)); + + EmitStvecWithCastFromDouble(context, op.Rd); } + else + { + int elems = 4 >> sizeF; - context.EmitLdvectmp(); - context.EmitStvec(op.Rd); + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + if (sizeF == 0) + { + EmitVectorExtractZx(context, op.Rn, part + index, 1); + context.Emit(OpCodes.Conv_U2); + + context.EmitLdarg(TranslatedSub.StateArgIdx); + + context.EmitCall(typeof(SoftFloat16_32), nameof(SoftFloat16_32.FPConvert)); + } + else /* if (sizeF == 1) */ + { + EmitVectorExtractF(context, op.Rn, part + index, 0); + + context.Emit(OpCodes.Conv_R8); + } + + EmitVectorInsertTmpF(context, index, sizeF); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + } } public static void Fcvtms_Gp(ILEmitterCtx context) @@ -121,43 +142,70 @@ namespace ChocolArm64.Instructions int sizeF = op.Size & 1; - int elems = 4 >> sizeF; - - int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; - - if (part != 0) + if (Optimizations.UseSse2 && sizeF == 1) { + Type[] typesMov = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesCvt = new Type[] { typeof(Vector128) }; + + string nameMov = op.RegisterSize == RegisterSize.Simd128 + ? nameof(Sse.MoveLowToHigh) + : nameof(Sse.MoveHighToLow); + context.EmitLdvec(op.Rd); - context.EmitStvectmp(); + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh), typesMov)); + + EmitLdvecWithCastToDouble(context, op.Rn); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Single), typesCvt)); + context.Emit(OpCodes.Dup); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh), typesMov)); + + context.EmitCall(typeof(Sse).GetMethod(nameMov, typesMov)); + + context.EmitStvec(op.Rd); } - - for (int index = 0; index < elems; index++) + else { - EmitVectorExtractF(context, op.Rn, index, sizeF); + int elems = 4 >> sizeF; - if (sizeF == 0) + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + if (part != 0) { - context.EmitLdarg(TranslatedSub.StateArgIdx); - - context.EmitCall(typeof(SoftFloat32_16), nameof(SoftFloat32_16.FPConvert)); - - context.Emit(OpCodes.Conv_U8); - EmitVectorInsertTmp(context, part + index, 1); + context.EmitLdvec(op.Rd); + context.EmitStvectmp(); } - else /* if (sizeF == 1) */ + + for (int index = 0; index < elems; index++) { - context.Emit(OpCodes.Conv_R4); + EmitVectorExtractF(context, op.Rn, index, sizeF); - EmitVectorInsertTmpF(context, part + index, 0); + if (sizeF == 0) + { + context.EmitLdarg(TranslatedSub.StateArgIdx); + + context.EmitCall(typeof(SoftFloat32_16), nameof(SoftFloat32_16.FPConvert)); + + context.Emit(OpCodes.Conv_U8); + EmitVectorInsertTmp(context, part + index, 1); + } + else /* if (sizeF == 1) */ + { + context.Emit(OpCodes.Conv_R4); + + EmitVectorInsertTmpF(context, part + index, 0); + } } - } - context.EmitLdvectmp(); - context.EmitStvec(op.Rd); + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); - if (part == 0) - { - EmitVectorZeroUpper(context, op.Rd); + if (part == 0) + { + EmitVectorZeroUpper(context, op.Rd); + } } } @@ -260,7 +308,29 @@ namespace ChocolArm64.Instructions public static void Scvtf_V(ILEmitterCtx context) { - EmitVectorCvtf(context, signed: true); + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.UseSse2 && sizeF == 0) + { + Type[] typesCvt = new Type[] { typeof(Vector128) }; + + EmitLdvecWithSignedCast(context, op.Rn, 2); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertToVector128Single), typesCvt)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitVectorCvtf(context, signed: true); + } } public static void Ucvtf_Gp(ILEmitterCtx context) @@ -441,16 +511,6 @@ namespace ChocolArm64.Instructions context.EmitStintzr(op.Rd); } - private static void EmitVectorScvtf(ILEmitterCtx context) - { - EmitVectorCvtf(context, true); - } - - private static void EmitVectorUcvtf(ILEmitterCtx context) - { - EmitVectorCvtf(context, false); - } - private static void EmitVectorCvtf(ILEmitterCtx context, bool signed) { OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; diff --git a/Instructions/InstEmitSimdHelper.cs b/Instructions/InstEmitSimdHelper.cs index fad5151..7b597be 100644 --- a/Instructions/InstEmitSimdHelper.cs +++ b/Instructions/InstEmitSimdHelper.cs @@ -219,7 +219,7 @@ namespace ChocolArm64.Instructions type = typeof(Sse); baseType = typeof(Vector128); } - else /* if (SizeF == 1) */ + else /* if (sizeF == 1) */ { type = typeof(Sse2); baseType = typeof(Vector128); @@ -249,7 +249,7 @@ namespace ChocolArm64.Instructions { EmitVectorZero32_128(context, op.Rd); } - else /* if (SizeF == 1) */ + else /* if (sizeF == 1) */ { EmitVectorZeroUpper(context, op.Rd); } @@ -272,7 +272,7 @@ namespace ChocolArm64.Instructions { mthdInfo = typeof(MathF).GetMethod(name, new Type[] { typeof(float) }); } - else /* if (SizeF == 1) */ + else /* if (sizeF == 1) */ { mthdInfo = typeof(Math).GetMethod(name, new Type[] { typeof(double) }); } @@ -292,7 +292,7 @@ namespace ChocolArm64.Instructions { mthdInfo = typeof(MathF).GetMethod(name, new Type[] { typeof(float), typeof(float) }); } - else /* if (SizeF == 1) */ + else /* if (sizeF == 1) */ { mthdInfo = typeof(Math).GetMethod(name, new Type[] { typeof(double), typeof(double) }); } @@ -312,7 +312,7 @@ namespace ChocolArm64.Instructions { mthdInfo = typeof(MathF).GetMethod(nameof(MathF.Round), new Type[] { typeof(float), typeof(MidpointRounding) }); } - else /* if (SizeF == 1) */ + else /* if (sizeF == 1) */ { mthdInfo = typeof(Math).GetMethod(nameof(Math.Round), new Type[] { typeof(double), typeof(MidpointRounding) }); } @@ -334,7 +334,7 @@ namespace ChocolArm64.Instructions { mthdInfo = typeof(SoftFloat).GetMethod(name, new Type[] { typeof(float) }); } - else /* if (SizeF == 1) */ + else /* if (sizeF == 1) */ { mthdInfo = typeof(SoftFloat).GetMethod(name, new Type[] { typeof(double) }); } @@ -961,7 +961,7 @@ namespace ChocolArm64.Instructions { EmitSatQ(context, op.Size, true, true); } - else /* if (Op.Size == 3) */ + else /* if (op.Size == 3) */ { EmitUnarySignedSatQAbsOrNeg(context); } @@ -1022,7 +1022,7 @@ namespace ChocolArm64.Instructions { for (int index = 0; index < elems; index++) { - EmitVectorExtract(context, op.Rn, index, op.Size, signed); + EmitVectorExtract(context, op.Rn, index, op.Size, signed); EmitVectorExtract(context, ((OpCodeSimdReg64)op).Rm, index, op.Size, signed); if (op.Size <= 2) @@ -1031,13 +1031,13 @@ namespace ChocolArm64.Instructions EmitSatQ(context, op.Size, true, signed); } - else /* if (Op.Size == 3) */ + else /* if (op.Size == 3) */ { if (add) { EmitBinarySatQAdd(context, signed); } - else /* if (Sub) */ + else /* if (sub) */ { EmitBinarySatQSub(context, signed); } @@ -1059,7 +1059,7 @@ namespace ChocolArm64.Instructions EmitSatQ(context, op.Size, true, signed); } - else /* if (Op.Size == 3) */ + else /* if (op.Size == 3) */ { EmitBinarySatQAccumulate(context, signed); } @@ -1071,7 +1071,7 @@ namespace ChocolArm64.Instructions { for (int index = 0; index < elems; index++) { - EmitVectorExtract(context, op.Rn, index, op.Size, signed); + EmitVectorExtract(context, op.Rn, index, op.Size, signed); EmitVectorExtract(context, ((OpCodeSimdReg64)op).Rm, index, op.Size, signed); emit(); @@ -1304,52 +1304,64 @@ namespace ChocolArm64.Instructions } } - public static void EmitVectorZeroAll(ILEmitterCtx context, int rd) + public static void EmitVectorZeroAll(ILEmitterCtx context, int reg) { - if (Optimizations.UseSse2) + if (Optimizations.UseSse) { VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); - context.EmitStvec(rd); + context.EmitStvec(reg); } else { - EmitVectorZeroLower(context, rd); - EmitVectorZeroUpper(context, rd); + EmitVectorZeroLower(context, reg); + EmitVectorZeroUpper(context, reg); } } - public static void EmitVectorZeroLower(ILEmitterCtx context, int rd) + public static void EmitVectorZeroLower(ILEmitterCtx context, int reg) { - EmitVectorInsert(context, rd, 0, 3, 0); + EmitVectorInsert(context, reg, 0, 3, 0); } public static void EmitVectorZeroLowerTmp(ILEmitterCtx context) { - EmitVectorInsertTmp(context, 0, 3, 0); + if (Optimizations.UseSse) + { + context.EmitLdvectmp(); + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveHighToLow))); + + context.EmitStvectmp(); + } + else + { + EmitVectorInsertTmp(context, 0, 3, 0); + } } public static void EmitVectorZeroUpper(ILEmitterCtx context, int reg) { - if (Optimizations.UseSse2) + if (Optimizations.UseSse) { - //TODO: Use MoveScalar once it is fixed, as of the - //time of writing it just crashes the JIT. + //TODO: Use Sse2.MoveScalar once it is fixed, + //as of the time of writing it just crashes the JIT (SDK 2.1.500). + + /*Type[] typesMov = new Type[] { typeof(Vector128) }; + EmitLdvecWithUnsignedCast(context, reg, 3); - Type[] types = new Type[] { typeof(Vector128), typeof(byte) }; + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MoveScalar), typesMov)); - //Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MoveScalar), Types)); + EmitStvecWithUnsignedCast(context, reg, 3);*/ - context.EmitLdc_I4(8); + context.EmitLdvec(reg); + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical128BitLane), types)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh))); - context.EmitLdc_I4(8); - - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), types)); - - EmitStvecWithUnsignedCast(context, reg, 3); + context.EmitStvec(reg); } else { @@ -1359,9 +1371,15 @@ namespace ChocolArm64.Instructions public static void EmitVectorZero32_128(ILEmitterCtx context, int reg) { + if (!Sse.IsSupported) + { + throw new PlatformNotSupportedException(); + } + + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); context.EmitLdvec(reg); - VectorHelper.EmitCall(context, nameof(VectorHelper.VectorZero32_128)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveScalar))); context.EmitStvec(reg); } diff --git a/Instructions/InstEmitSimdMove.cs b/Instructions/InstEmitSimdMove.cs index 3f539b8..0d9aa31 100644 --- a/Instructions/InstEmitSimdMove.cs +++ b/Instructions/InstEmitSimdMove.cs @@ -3,6 +3,7 @@ using ChocolArm64.State; using ChocolArm64.Translation; using System; using System.Reflection.Emit; +using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; using static ChocolArm64.Instructions.InstEmitSimdHelper; @@ -17,6 +18,8 @@ namespace ChocolArm64.Instructions if (Optimizations.UseSse2) { + Type[] typesSav = new Type[] { UIntTypesPerSizeLog2[op.Size] }; + context.EmitLdintzr(op.Rn); switch (op.Size) @@ -26,16 +29,9 @@ namespace ChocolArm64.Instructions case 2: context.Emit(OpCodes.Conv_U4); break; } - Type[] types = new Type[] { UIntTypesPerSizeLog2[op.Size] }; - - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), types)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); EmitStvecWithUnsignedCast(context, op.Rd, op.Size); - - if (op.RegisterSize == RegisterSize.Simd64) - { - EmitVectorZeroUpper(context, op.Rd); - } } else { @@ -48,11 +44,11 @@ namespace ChocolArm64.Instructions EmitVectorInsert(context, op.Rd, index, op.Size); } + } - if (op.RegisterSize == RegisterSize.Simd64) - { - EmitVectorZeroUpper(context, op.Rd); - } + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); } } @@ -69,14 +65,34 @@ namespace ChocolArm64.Instructions { OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp; - int bytes = op.GetBitsCount() >> 3; - int elems = bytes >> op.Size; - - for (int index = 0; index < elems; index++) + if (Optimizations.UseSse2) { + Type[] typesSav = new Type[] { UIntTypesPerSizeLog2[op.Size] }; + EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size); - EmitVectorInsert(context, op.Rd, index, op.Size); + switch (op.Size) + { + case 0: context.Emit(OpCodes.Conv_U1); break; + case 1: context.Emit(OpCodes.Conv_U2); break; + case 2: context.Emit(OpCodes.Conv_U4); break; + } + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + } + else + { + int bytes = op.GetBitsCount() >> 3; + int elems = bytes >> op.Size; + + for (int index = 0; index < elems; index++) + { + EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size); + + EmitVectorInsert(context, op.Rd, index, op.Size); + } } if (op.RegisterSize == RegisterSize.Simd64) @@ -89,32 +105,65 @@ namespace ChocolArm64.Instructions { OpCodeSimdExt64 op = (OpCodeSimdExt64)context.CurrOp; - context.EmitLdvec(op.Rd); - context.EmitStvectmp(); - - int bytes = op.GetBitsCount() >> 3; - - int position = op.Imm4; - - for (int index = 0; index < bytes; index++) + if (Optimizations.UseSse2) { - int reg = op.Imm4 + index < bytes ? op.Rn : op.Rm; + Type[] typesShs = new Type[] { typeof(Vector128), typeof(byte) }; + Type[] typesOr = new Type[] { typeof(Vector128), typeof(Vector128) }; - if (position == bytes) + EmitLdvecWithUnsignedCast(context, op.Rn, 0); + + if (op.RegisterSize == RegisterSize.Simd64) { - position = 0; + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh))); } - EmitVectorExtractZx(context, reg, position++, 0); - EmitVectorInsertTmp(context, index, 0); + context.EmitLdc_I4(op.Imm4); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesShs)); + + EmitLdvecWithUnsignedCast(context, op.Rm, 0); + + context.EmitLdc_I4((op.RegisterSize == RegisterSize.Simd64 ? 8 : 16) - op.Imm4); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical128BitLane), typesShs)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh))); + } + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesOr)); + + EmitStvecWithUnsignedCast(context, op.Rd, 0); } - - context.EmitLdvectmp(); - context.EmitStvec(op.Rd); - - if (op.RegisterSize == RegisterSize.Simd64) + else { - EmitVectorZeroUpper(context, op.Rd); + int bytes = op.GetBitsCount() >> 3; + + int position = op.Imm4; + + for (int index = 0; index < bytes; index++) + { + int reg = op.Imm4 + index < bytes ? op.Rn : op.Rm; + + if (position == bytes) + { + position = 0; + } + + EmitVectorExtractZx(context, reg, position++, 0); + EmitVectorInsertTmp(context, index, 0); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } } } diff --git a/Instructions/SoftFloat.cs b/Instructions/SoftFloat.cs index 72b39ef..2af8afb 100644 --- a/Instructions/SoftFloat.cs +++ b/Instructions/SoftFloat.cs @@ -789,6 +789,43 @@ namespace ChocolArm64.Instructions return result; } + public static int FPCompare(float value1, float value2, bool signalNaNs, CpuThreadState state) + { + Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPCompare: state.Fpcr = 0x{state.Fpcr:X8}"); + + value1 = value1.FPUnpack(out FpType type1, out bool sign1, out _, state); + value2 = value2.FPUnpack(out FpType type2, out bool sign2, out _, state); + + int result; + + if (type1 == FpType.SNaN || type1 == FpType.QNaN || type2 == FpType.SNaN || type2 == FpType.QNaN) + { + result = 0b0011; + + if (type1 == FpType.SNaN || type2 == FpType.SNaN || signalNaNs) + { + FPProcessException(FpExc.InvalidOp, state); + } + } + else + { + if (value1 == value2) + { + result = 0b0110; + } + else if (value1 < value2) + { + result = 0b1000; + } + else + { + result = 0b0010; + } + } + + return result; + } + public static float FPDiv(float value1, float value2, CpuThreadState state) { Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPDiv: state.Fpcr = 0x{state.Fpcr:X8}"); @@ -1584,6 +1621,43 @@ namespace ChocolArm64.Instructions return result; } + public static int FPCompare(double value1, double value2, bool signalNaNs, CpuThreadState state) + { + Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPCompare: state.Fpcr = 0x{state.Fpcr:X8}"); + + value1 = value1.FPUnpack(out FpType type1, out bool sign1, out _, state); + value2 = value2.FPUnpack(out FpType type2, out bool sign2, out _, state); + + int result; + + if (type1 == FpType.SNaN || type1 == FpType.QNaN || type2 == FpType.SNaN || type2 == FpType.QNaN) + { + result = 0b0011; + + if (type1 == FpType.SNaN || type2 == FpType.SNaN || signalNaNs) + { + FPProcessException(FpExc.InvalidOp, state); + } + } + else + { + if (value1 == value2) + { + result = 0b0110; + } + else if (value1 < value2) + { + result = 0b1000; + } + else + { + result = 0b0010; + } + } + + return result; + } + public static double FPDiv(double value1, double value2, CpuThreadState state) { Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPDiv: state.Fpcr = 0x{state.Fpcr:X8}"); diff --git a/Instructions/VectorHelper.cs b/Instructions/VectorHelper.cs index 8ef1581..f02c131 100644 --- a/Instructions/VectorHelper.cs +++ b/Instructions/VectorHelper.cs @@ -9,18 +9,6 @@ namespace ChocolArm64.Instructions { static class VectorHelper { - private static readonly Vector128 Zero32128Mask; - - static VectorHelper() - { - if (!Sse2.IsSupported) - { - throw new PlatformNotSupportedException(); - } - - Zero32128Mask = Sse.StaticCast(Sse2.SetVector128(0, 0, 0, 0xffffffff)); - } - public static void EmitCall(ILEmitterCtx context, string name64, string name128) { bool isSimd64 = context.CurrOp.RegisterSize == RegisterSize.Simd64; @@ -491,7 +479,7 @@ namespace ChocolArm64.Instructions { int intValue = BitConverter.SingleToInt32Bits(value); - ushort low = (ushort)(intValue >> 0); + ushort low = (ushort)(intValue >> 0); ushort high = (ushort)(intValue >> 16); Vector128 shortVector = Sse.StaticCast(vector); @@ -578,17 +566,6 @@ namespace ChocolArm64.Instructions throw new PlatformNotSupportedException(); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector128 VectorZero32_128(Vector128 vector) - { - if (Sse.IsSupported) - { - return Sse.And(vector, Zero32128Mask); - } - - throw new PlatformNotSupportedException(); - } - [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 VectorSingleToSByte(Vector128 vector) { diff --git a/OpCodeTable.cs b/OpCodeTable.cs index 6b1a724..8151718 100644 --- a/OpCodeTable.cs +++ b/OpCodeTable.cs @@ -216,9 +216,9 @@ namespace ChocolArm64 SetA64("01011110111xxxxx100011xxxxxxxxxx", InstEmit.Cmtst_S, typeof(OpCodeSimdReg64)); SetA64("0>001110<<1xxxxx100011xxxxxxxxxx", InstEmit.Cmtst_V, typeof(OpCodeSimdReg64)); SetA64("0x00111000100000010110xxxxxxxxxx", InstEmit.Cnt_V, typeof(OpCodeSimd64)); - SetA64("0x001110000xxxxx000011xxxxxxxxxx", InstEmit.Dup_Gp, typeof(OpCodeSimdIns64)); + SetA64("0>001110000x<>>>000011xxxxxxxxxx", InstEmit.Dup_Gp, typeof(OpCodeSimdIns64)); SetA64("01011110000xxxxx000001xxxxxxxxxx", InstEmit.Dup_S, typeof(OpCodeSimdIns64)); - SetA64("0x001110000xxxxx000001xxxxxxxxxx", InstEmit.Dup_V, typeof(OpCodeSimdIns64)); + SetA64("0>001110000x<>>>000001xxxxxxxxxx", InstEmit.Dup_V, typeof(OpCodeSimdIns64)); SetA64("0x101110001xxxxx000111xxxxxxxxxx", InstEmit.Eor_V, typeof(OpCodeSimdReg64)); SetA64("0>101110000xxxxx00011100<100001110110xxxxxxxxxx", InstEmit.Scvtf_V, typeof(OpCodeSimd64)); SetA64("01011110000xxxxx000000xxxxxxxxxx", InstEmit.Sha1c_V, typeof(OpCodeSimdReg64)); SetA64("0101111000101000000010xxxxxxxxxx", InstEmit.Sha1h_V, typeof(OpCodeSimd64)); SetA64("01011110000xxxxx001000xxxxxxxxxx", InstEmit.Sha1m_V, typeof(OpCodeSimdReg64)); @@ -486,9 +486,9 @@ namespace ChocolArm64 SetA64("001011100x110000001110xxxxxxxxxx", InstEmit.Uaddlv_V, typeof(OpCodeSimd64)); SetA64("01101110<<110000001110xxxxxxxxxx", InstEmit.Uaddlv_V, typeof(OpCodeSimd64)); SetA64("0x101110<<1xxxxx000100xxxxxxxxxx", InstEmit.Uaddw_V, typeof(OpCodeSimdReg64)); - SetA64("x0011110xx100011000000xxxxxxxxxx", InstEmit.Ucvtf_Gp, typeof(OpCodeSimdCvt64)); + SetA64("x00111100x100011000000xxxxxxxxxx", InstEmit.Ucvtf_Gp, typeof(OpCodeSimdCvt64)); SetA64("011111100x100001110110xxxxxxxxxx", InstEmit.Ucvtf_S, typeof(OpCodeSimd64)); - SetA64("0x1011100x100001110110xxxxxxxxxx", InstEmit.Ucvtf_V, typeof(OpCodeSimd64)); + SetA64("0>1011100<100001110110xxxxxxxxxx", InstEmit.Ucvtf_V, typeof(OpCodeSimd64)); SetA64("0x101110<<1xxxxx000001xxxxxxxxxx", InstEmit.Uhadd_V, typeof(OpCodeSimdReg64)); SetA64("0x101110<<1xxxxx001001xxxxxxxxxx", InstEmit.Uhsub_V, typeof(OpCodeSimdReg64)); SetA64("0x101110<<1xxxxx011001xxxxxxxxxx", InstEmit.Umax_V, typeof(OpCodeSimdReg64));