From d9561f41ebf52aa3f3fbba097945a7e42b493cdc Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Sat, 23 Mar 2019 19:50:19 +0100 Subject: [PATCH] Add Tbl_V Sse opt. with Tests. (#651) * Add v4, v5, v30, v31 required for Tbl_V Tests. * Add Tests for Tbl_V. * Add Tbl_V Sse opt.. * Nit. * Small opt. on comparison constant vector. * Nit. * Add EmitLd/Stvectmp2/3. * Nit. --- Instructions/InstEmitSimdMove.cs | 101 ++++++++++++++++++++++++------- Translation/ILEmitterCtx.cs | 10 ++- 2 files changed, 89 insertions(+), 22 deletions(-) diff --git a/Instructions/InstEmitSimdMove.cs b/Instructions/InstEmitSimdMove.cs index 20647ce..cdd3517 100644 --- a/Instructions/InstEmitSimdMove.cs +++ b/Instructions/InstEmitSimdMove.cs @@ -355,35 +355,94 @@ namespace ChocolArm64.Instructions { OpCodeSimdTbl64 op = (OpCodeSimdTbl64)context.CurrOp; - context.EmitLdvec(op.Rm); - - for (int index = 0; index < op.Size; index++) + if (Optimizations.UseSsse3) { - context.EmitLdvec((op.Rn + index) & 0x1f); - } + Type[] typesCmpSflSub = new Type[] { typeof(Vector128), typeof(Vector128) }; + Type[] typesOr = new Type[] { typeof(Vector128 ), typeof(Vector128 ) }; + Type[] typesSav = new Type[] { typeof(long) }; - switch (op.Size) + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + + context.EmitLdc_I8(0x0F0F0F0F0F0F0F0FL); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + context.EmitStvectmp2(); + context.EmitLdvectmp2(); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThan), typesCmpSflSub)); + + context.EmitLdvec(op.Rm); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesOr)); + + context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesCmpSflSub)); + + for (int index = 1; index < op.Size; index++) + { + context.EmitLdvec((op.Rn + index) & 0x1F); + context.EmitLdvec(op.Rm); + + context.EmitLdc_I8(0x1010101010101010L * index); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesCmpSflSub)); + + context.EmitStvectmp(); + context.EmitLdvectmp(); + + context.EmitLdvectmp2(); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.CompareGreaterThan), typesCmpSflSub)); + + context.EmitLdvectmp(); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesOr)); + + context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesCmpSflSub)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), typesOr)); + } + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else { - case 1: VectorHelper.EmitCall(context, - nameof(VectorHelper.Tbl1_V64), - nameof(VectorHelper.Tbl1_V128)); break; + context.EmitLdvec(op.Rm); - case 2: VectorHelper.EmitCall(context, - nameof(VectorHelper.Tbl2_V64), - nameof(VectorHelper.Tbl2_V128)); break; + for (int index = 0; index < op.Size; index++) + { + context.EmitLdvec((op.Rn + index) & 0x1F); + } - case 3: VectorHelper.EmitCall(context, - nameof(VectorHelper.Tbl3_V64), - nameof(VectorHelper.Tbl3_V128)); break; + switch (op.Size) + { + case 1: VectorHelper.EmitCall(context, + nameof(VectorHelper.Tbl1_V64), + nameof(VectorHelper.Tbl1_V128)); break; - case 4: VectorHelper.EmitCall(context, - nameof(VectorHelper.Tbl4_V64), - nameof(VectorHelper.Tbl4_V128)); break; + case 2: VectorHelper.EmitCall(context, + nameof(VectorHelper.Tbl2_V64), + nameof(VectorHelper.Tbl2_V128)); break; - default: throw new InvalidOperationException(); + case 3: VectorHelper.EmitCall(context, + nameof(VectorHelper.Tbl3_V64), + nameof(VectorHelper.Tbl3_V128)); break; + + case 4: VectorHelper.EmitCall(context, + nameof(VectorHelper.Tbl4_V64), + nameof(VectorHelper.Tbl4_V128)); break; + + default: throw new InvalidOperationException(); + } + + context.EmitStvec(op.Rd); } - - context.EmitStvec(op.Rd); } public static void Trn1_V(ILEmitterCtx context) diff --git a/Translation/ILEmitterCtx.cs b/Translation/ILEmitterCtx.cs index f39bd37..8804521 100644 --- a/Translation/ILEmitterCtx.cs +++ b/Translation/ILEmitterCtx.cs @@ -61,7 +61,9 @@ namespace ChocolArm64.Translation //Vectors are part of another "set" of locals. private const int VecGpTmp1Index = ReservedLocalsCount + 0; - private const int UserVecTempStart = ReservedLocalsCount + 1; + private const int VecGpTmp2Index = ReservedLocalsCount + 1; + private const int VecGpTmp3Index = ReservedLocalsCount + 2; + private const int UserVecTempStart = ReservedLocalsCount + 3; private static int _userIntTempCount; private static int _userVecTempCount; @@ -629,6 +631,12 @@ namespace ChocolArm64.Translation public void EmitLdvectmp() => EmitLdvec(VecGpTmp1Index); public void EmitStvectmp() => EmitStvec(VecGpTmp1Index); + public void EmitLdvectmp2() => EmitLdvec(VecGpTmp2Index); + public void EmitStvectmp2() => EmitStvec(VecGpTmp2Index); + + public void EmitLdvectmp3() => EmitLdvec(VecGpTmp3Index); + public void EmitStvectmp3() => EmitStvec(VecGpTmp3Index); + public void EmitLdint(int index) => Ldloc(index, VarType.Int); public void EmitStint(int index) => Stloc(index, VarType.Int);