From 86aae79b9dae85d91c160b724772eb4400e5e2ed Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Tue, 26 Jun 2018 03:32:29 +0200 Subject: [PATCH] Add Sse Opt. for Cmeq_V_2D, Cmgt_V_2D (Reg). Add Sse Opt. for Crc32cb, Crc32ch, Crc32cw, Crc32cx. Add 10 simple tests for Fcmgt, Fcmge, Fcmeq, Fcmle, Fcmlt (S, V) (Reg, Zero). Add 2 Cnt_V tests. (#183) * Add files via upload * Add files via upload * Add files via upload * CPE * Add EmitSse42Crc32() * Update CpuTestSimdCmp.cs * Update Pseudocode.cs * Update Instructions.cs * Update CpuTestSimd.cs * Update Instructions.cs --- AOpCodeTable.cs | 6 +-- AOptimizations.cs | 12 ++++- Instruction/AInstEmitHash.cs | 52 +++++++++++++++++++-- Instruction/AInstEmitSimdArithmetic.cs | 32 ++++++------- Instruction/AInstEmitSimdCmp.cs | 60 +++++++++++++++++------- Instruction/AInstEmitSimdHelper.cs | 64 +++++++++++++++----------- 6 files changed, 156 insertions(+), 70 deletions(-) diff --git a/AOpCodeTable.cs b/AOpCodeTable.cs index fcaee38..e78d0b5 100644 --- a/AOpCodeTable.cs +++ b/AOpCodeTable.cs @@ -225,16 +225,16 @@ namespace ChocolArm64 SetA64("000111100x1xxxxxxxxx01xxxxx0xxxx", AInstEmit.Fccmp_S, typeof(AOpCodeSimdFcond)); SetA64("000111100x1xxxxxxxxx01xxxxx1xxxx", AInstEmit.Fccmpe_S, typeof(AOpCodeSimdFcond)); SetA64("010111100x1xxxxx111001xxxxxxxxxx", AInstEmit.Fcmeq_S, typeof(AOpCodeSimdReg)); - SetA64("0>0011100<1xxxxx111001xxxxxxxxxx", AInstEmit.Fcmeq_V, typeof(AOpCodeSimdReg)); SetA64("010111101x100000110110xxxxxxxxxx", AInstEmit.Fcmeq_S, typeof(AOpCodeSimd)); + SetA64("0>0011100<1xxxxx111001xxxxxxxxxx", AInstEmit.Fcmeq_V, typeof(AOpCodeSimdReg)); SetA64("0>0011101<100000110110xxxxxxxxxx", AInstEmit.Fcmeq_V, typeof(AOpCodeSimd)); SetA64("011111100x1xxxxx111001xxxxxxxxxx", AInstEmit.Fcmge_S, typeof(AOpCodeSimdReg)); - SetA64("0>1011100<1xxxxx111001xxxxxxxxxx", AInstEmit.Fcmge_V, typeof(AOpCodeSimdReg)); SetA64("011111101x100000110010xxxxxxxxxx", AInstEmit.Fcmge_S, typeof(AOpCodeSimd)); + SetA64("0>1011100<1xxxxx111001xxxxxxxxxx", AInstEmit.Fcmge_V, typeof(AOpCodeSimdReg)); SetA64("0>1011101<100000110010xxxxxxxxxx", AInstEmit.Fcmge_V, typeof(AOpCodeSimd)); SetA64("011111101x1xxxxx111001xxxxxxxxxx", AInstEmit.Fcmgt_S, typeof(AOpCodeSimdReg)); - SetA64("0>1011101<1xxxxx111001xxxxxxxxxx", AInstEmit.Fcmgt_V, typeof(AOpCodeSimdReg)); SetA64("010111101x100000110010xxxxxxxxxx", AInstEmit.Fcmgt_S, typeof(AOpCodeSimd)); + SetA64("0>1011101<1xxxxx111001xxxxxxxxxx", AInstEmit.Fcmgt_V, typeof(AOpCodeSimdReg)); SetA64("0>0011101<100000110010xxxxxxxxxx", AInstEmit.Fcmgt_V, typeof(AOpCodeSimd)); SetA64("011111101x100000110110xxxxxxxxxx", AInstEmit.Fcmle_S, typeof(AOpCodeSimd)); SetA64("0>1011101<100000110110xxxxxxxxxx", AInstEmit.Fcmle_V, typeof(AOpCodeSimd)); diff --git a/AOptimizations.cs b/AOptimizations.cs index e8c1f7c..800cf36 100644 --- a/AOptimizations.cs +++ b/AOptimizations.cs @@ -6,7 +6,15 @@ public static class AOptimizations public static bool GenerateCallStack = true; - public static bool UseSse2IfAvailable = true; + private static bool UseAllSseIfAvailable = true; - internal static bool UseSse2 = UseSse2IfAvailable && Sse2.IsSupported; + private static bool UseSseIfAvailable = true; + private static bool UseSse2IfAvailable = true; + private static bool UseSse41IfAvailable = true; + private static bool UseSse42IfAvailable = true; + + internal static bool UseSse = (UseAllSseIfAvailable && UseSseIfAvailable) && Sse.IsSupported; + internal static bool UseSse2 = (UseAllSseIfAvailable && UseSse2IfAvailable) && Sse2.IsSupported; + internal static bool UseSse41 = (UseAllSseIfAvailable && UseSse41IfAvailable) && Sse41.IsSupported; + internal static bool UseSse42 = (UseAllSseIfAvailable && UseSse42IfAvailable) && Sse42.IsSupported; } \ No newline at end of file diff --git a/Instruction/AInstEmitHash.cs b/Instruction/AInstEmitHash.cs index 94e03f6..69bdbc4 100644 --- a/Instruction/AInstEmitHash.cs +++ b/Instruction/AInstEmitHash.cs @@ -1,7 +1,9 @@ using ChocolArm64.Decoder; using ChocolArm64.State; using ChocolArm64.Translation; +using System; using System.Reflection.Emit; +using System.Runtime.Intrinsics.X86; namespace ChocolArm64.Instruction { @@ -29,22 +31,62 @@ namespace ChocolArm64.Instruction public static void Crc32cb(AILEmitterCtx Context) { - EmitCrc32(Context, nameof(ASoftFallback.Crc32cb)); + if (AOptimizations.UseSse42) + { + EmitSse42Crc32(Context, typeof(uint), typeof(byte)); + } + else + { + EmitCrc32(Context, nameof(ASoftFallback.Crc32cb)); + } } public static void Crc32ch(AILEmitterCtx Context) { - EmitCrc32(Context, nameof(ASoftFallback.Crc32ch)); + if (AOptimizations.UseSse42) + { + EmitSse42Crc32(Context, typeof(uint), typeof(ushort)); + } + else + { + EmitCrc32(Context, nameof(ASoftFallback.Crc32ch)); + } } public static void Crc32cw(AILEmitterCtx Context) { - EmitCrc32(Context, nameof(ASoftFallback.Crc32cw)); + if (AOptimizations.UseSse42) + { + EmitSse42Crc32(Context, typeof(uint), typeof(uint)); + } + else + { + EmitCrc32(Context, nameof(ASoftFallback.Crc32cw)); + } } public static void Crc32cx(AILEmitterCtx Context) { - EmitCrc32(Context, nameof(ASoftFallback.Crc32cx)); + if (AOptimizations.UseSse42) + { + EmitSse42Crc32(Context, typeof(ulong), typeof(ulong)); + } + else + { + EmitCrc32(Context, nameof(ASoftFallback.Crc32cx)); + } + } + + private static void EmitSse42Crc32(AILEmitterCtx Context, Type TCrc, Type TData) + { + AOpCodeAluRs Op = (AOpCodeAluRs)Context.CurrOp; + + Context.EmitLdintzr(Op.Rn); + Context.EmitLdintzr(Op.Rm); + + Context.EmitCall(typeof(Sse42).GetMethod(nameof(Sse42.Crc32), new Type[] { TCrc, TData })); + + Context.EmitStintzr(Op.Rd); } private static void EmitCrc32(AILEmitterCtx Context, string Name) @@ -70,4 +112,4 @@ namespace ChocolArm64.Instruction Context.EmitStintzr(Op.Rd); } } -} \ No newline at end of file +} diff --git a/Instruction/AInstEmitSimdArithmetic.cs b/Instruction/AInstEmitSimdArithmetic.cs index 8cd4654..8b6e234 100644 --- a/Instruction/AInstEmitSimdArithmetic.cs +++ b/Instruction/AInstEmitSimdArithmetic.cs @@ -319,9 +319,9 @@ namespace ChocolArm64.Instruction public static void Fadd_S(AILEmitterCtx Context) { - if (AOptimizations.UseSse2) + if (AOptimizations.UseSse && AOptimizations.UseSse2) { - EmitSse2CallF(Context, nameof(Sse2.AddScalar)); + EmitSseOrSse2CallF(Context, nameof(Sse.AddScalar)); } else { @@ -331,9 +331,9 @@ namespace ChocolArm64.Instruction public static void Fadd_V(AILEmitterCtx Context) { - if (AOptimizations.UseSse2) + if (AOptimizations.UseSse && AOptimizations.UseSse2) { - EmitSse2CallF(Context, nameof(Sse2.Add)); + EmitSseOrSse2CallF(Context, nameof(Sse.Add)); } else { @@ -389,9 +389,9 @@ namespace ChocolArm64.Instruction public static void Fdiv_S(AILEmitterCtx Context) { - if (AOptimizations.UseSse2) + if (AOptimizations.UseSse && AOptimizations.UseSse2) { - EmitSse2CallF(Context, nameof(Sse2.DivideScalar)); + EmitSseOrSse2CallF(Context, nameof(Sse.DivideScalar)); } else { @@ -401,9 +401,9 @@ namespace ChocolArm64.Instruction public static void Fdiv_V(AILEmitterCtx Context) { - if (AOptimizations.UseSse2) + if (AOptimizations.UseSse && AOptimizations.UseSse2) { - EmitSse2CallF(Context, nameof(Sse2.Divide)); + EmitSseOrSse2CallF(Context, nameof(Sse.Divide)); } else { @@ -563,9 +563,9 @@ namespace ChocolArm64.Instruction public static void Fmul_S(AILEmitterCtx Context) { - if (AOptimizations.UseSse2) + if (AOptimizations.UseSse && AOptimizations.UseSse2) { - EmitSse2CallF(Context, nameof(Sse2.MultiplyScalar)); + EmitSseOrSse2CallF(Context, nameof(Sse.MultiplyScalar)); } else { @@ -580,9 +580,9 @@ namespace ChocolArm64.Instruction public static void Fmul_V(AILEmitterCtx Context) { - if (AOptimizations.UseSse2) + if (AOptimizations.UseSse && AOptimizations.UseSse2) { - EmitSse2CallF(Context, nameof(Sse2.Multiply)); + EmitSseOrSse2CallF(Context, nameof(Sse.Multiply)); } else { @@ -1019,9 +1019,9 @@ namespace ChocolArm64.Instruction public static void Fsub_S(AILEmitterCtx Context) { - if (AOptimizations.UseSse2) + if (AOptimizations.UseSse && AOptimizations.UseSse2) { - EmitSse2CallF(Context, nameof(Sse2.SubtractScalar)); + EmitSseOrSse2CallF(Context, nameof(Sse.SubtractScalar)); } else { @@ -1031,9 +1031,9 @@ namespace ChocolArm64.Instruction public static void Fsub_V(AILEmitterCtx Context) { - if (AOptimizations.UseSse2) + if (AOptimizations.UseSse && AOptimizations.UseSse2) { - EmitSse2CallF(Context, nameof(Sse2.Subtract)); + EmitSseOrSse2CallF(Context, nameof(Sse.Subtract)); } else { diff --git a/Instruction/AInstEmitSimdCmp.cs b/Instruction/AInstEmitSimdCmp.cs index ba8ac3e..68a7ab8 100644 --- a/Instruction/AInstEmitSimdCmp.cs +++ b/Instruction/AInstEmitSimdCmp.cs @@ -19,9 +19,20 @@ namespace ChocolArm64.Instruction public static void Cmeq_V(AILEmitterCtx Context) { - if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg Op && Op.Size < 3) + if (Context.CurrOp is AOpCodeSimdReg Op) { - EmitSse2Call(Context, nameof(Sse2.CompareEqual)); + if (Op.Size < 3 && AOptimizations.UseSse2) + { + EmitSse2Call(Context, nameof(Sse2.CompareEqual)); + } + else if (Op.Size == 3 && AOptimizations.UseSse41) + { + EmitSse41Call(Context, nameof(Sse41.CompareEqual)); + } + else + { + EmitCmp(Context, OpCodes.Beq_S, Scalar: false); + } } else { @@ -46,9 +57,20 @@ namespace ChocolArm64.Instruction public static void Cmgt_V(AILEmitterCtx Context) { - if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg Op && Op.Size < 3) + if (Context.CurrOp is AOpCodeSimdReg Op) { - EmitSse2Call(Context, nameof(Sse2.CompareGreaterThan)); + if (Op.Size < 3 && AOptimizations.UseSse2) + { + EmitSse2Call(Context, nameof(Sse2.CompareGreaterThan)); + } + else if (Op.Size == 3 && AOptimizations.UseSse42) + { + EmitSse42Call(Context, nameof(Sse42.CompareGreaterThan)); + } + else + { + EmitCmp(Context, OpCodes.Bgt_S, Scalar: false); + } } else { @@ -133,9 +155,10 @@ namespace ChocolArm64.Instruction public static void Fcmeq_S(AILEmitterCtx Context) { - if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg) + if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse + && AOptimizations.UseSse2) { - EmitSse2CallF(Context, nameof(Sse2.CompareEqualScalar)); + EmitSseOrSse2CallF(Context, nameof(Sse.CompareEqualScalar)); } else { @@ -145,9 +168,10 @@ namespace ChocolArm64.Instruction public static void Fcmeq_V(AILEmitterCtx Context) { - if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg) + if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse + && AOptimizations.UseSse2) { - EmitSse2CallF(Context, nameof(Sse2.CompareEqual)); + EmitSseOrSse2CallF(Context, nameof(Sse.CompareEqual)); } else { @@ -157,9 +181,10 @@ namespace ChocolArm64.Instruction public static void Fcmge_S(AILEmitterCtx Context) { - if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg) + if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse + && AOptimizations.UseSse2) { - EmitSse2CallF(Context, nameof(Sse2.CompareGreaterThanOrEqualScalar)); + EmitSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThanOrEqualScalar)); } else { @@ -169,9 +194,10 @@ namespace ChocolArm64.Instruction public static void Fcmge_V(AILEmitterCtx Context) { - if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg) + if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse + && AOptimizations.UseSse2) { - EmitSse2CallF(Context, nameof(Sse2.CompareGreaterThanOrEqual)); + EmitSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThanOrEqual)); } else { @@ -181,9 +207,10 @@ namespace ChocolArm64.Instruction public static void Fcmgt_S(AILEmitterCtx Context) { - if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg) + if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse + && AOptimizations.UseSse2) { - EmitSse2CallF(Context, nameof(Sse2.CompareGreaterThanScalar)); + EmitSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThanScalar)); } else { @@ -193,9 +220,10 @@ namespace ChocolArm64.Instruction public static void Fcmgt_V(AILEmitterCtx Context) { - if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg) + if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse + && AOptimizations.UseSse2) { - EmitSse2CallF(Context, nameof(Sse2.CompareGreaterThan)); + EmitSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThan)); } else { diff --git a/Instruction/AInstEmitSimdHelper.cs b/Instruction/AInstEmitSimdHelper.cs index 3caf2a3..80c6aeb 100644 --- a/Instruction/AInstEmitSimdHelper.cs +++ b/Instruction/AInstEmitSimdHelper.cs @@ -3,6 +3,7 @@ using ChocolArm64.State; using ChocolArm64.Translation; using System; using System.Reflection; +using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; @@ -34,11 +35,27 @@ namespace ChocolArm64.Instruction return (8 << (Op.Size + 1)) - Op.Imm; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static void EmitSse2Call(AILEmitterCtx Context, string Name) { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; + EmitSseCall(Context, Name, typeof(Sse2)); + } - int SizeF = Op.Size & 1; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void EmitSse41Call(AILEmitterCtx Context, string Name) + { + EmitSseCall(Context, Name, typeof(Sse41)); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void EmitSse42Call(AILEmitterCtx Context, string Name) + { + EmitSseCall(Context, Name, typeof(Sse42)); + } + + private static void EmitSseCall(AILEmitterCtx Context, string Name, Type Type) + { + AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; void Ldvec(int Reg) { @@ -57,8 +74,6 @@ namespace ChocolArm64.Instruction Type BaseType = null; - Type[] Types; - switch (Op.Size) { case 0: BaseType = typeof(Vector128); break; @@ -71,15 +86,13 @@ namespace ChocolArm64.Instruction { Ldvec(BinOp.Rm); - Types = new Type[] { BaseType, BaseType }; + Context.EmitCall(Type.GetMethod(Name, new Type[] { BaseType, BaseType })); } else { - Types = new Type[] { BaseType }; + Context.EmitCall(Type.GetMethod(Name, new Type[] { BaseType })); } - Context.EmitCall(typeof(Sse2).GetMethod(Name, Types)); - switch (Op.Size) { case 0: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSByteToSingle)); break; @@ -96,7 +109,7 @@ namespace ChocolArm64.Instruction } } - public static void EmitSse2CallF(AILEmitterCtx Context, string Name) + public static void EmitSseOrSse2CallF(AILEmitterCtx Context, string Name) { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; @@ -114,36 +127,31 @@ namespace ChocolArm64.Instruction Ldvec(Op.Rn); - Type BaseType = SizeF == 0 - ? typeof(Vector128) - : typeof(Vector128); + Type Type; + Type BaseType; - Type[] Types; + if (SizeF == 0) + { + Type = typeof(Sse); + BaseType = typeof(Vector128); + } + else /* if (SizeF == 1) */ + { + Type = typeof(Sse2); + BaseType = typeof(Vector128); + } if (Op is AOpCodeSimdReg BinOp) { Ldvec(BinOp.Rm); - Types = new Type[] { BaseType, BaseType }; + Context.EmitCall(Type.GetMethod(Name, new Type[] { BaseType, BaseType })); } else { - Types = new Type[] { BaseType }; + Context.EmitCall(Type.GetMethod(Name, new Type[] { BaseType })); } - MethodInfo MthdInfo; - - if (SizeF == 0) - { - MthdInfo = typeof(Sse).GetMethod(Name, Types); - } - else /* if (SizeF == 1) */ - { - MthdInfo = typeof(Sse2).GetMethod(Name, Types); - } - - Context.EmitCall(MthdInfo); - if (SizeF == 1) { AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorDoubleToSingle));