From 041a5e0301d9f14a6ccf913fe4357de56bd8f317 Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Fri, 26 Apr 2019 00:58:29 +0200 Subject: [PATCH] Sse optimized the Scalar & Vector fp-to-fp conversion instructions (MNPZ & IX); added the related Tests (AMNPZ & IX). Small refactoring of existing instructions. (#676) * Nit. * Update InstEmitSimdCvt.cs * Update VectorHelper.cs * Update InstEmitSimdArithmetic.cs * Update CpuTestSimd.cs * Superseded. --- Instructions/InstEmitSimdArithmetic.cs | 298 +++++++++++++++++++------ Instructions/InstEmitSimdCvt.cs | 38 +++- Instructions/InstEmitSimdHelper.cs | 4 +- Instructions/VectorHelper.cs | 160 +++++++++++-- 4 files changed, 408 insertions(+), 92 deletions(-) diff --git a/Instructions/InstEmitSimdArithmetic.cs b/Instructions/InstEmitSimdArithmetic.cs index d2d87be..357d88b 100644 --- a/Instructions/InstEmitSimdArithmetic.cs +++ b/Instructions/InstEmitSimdArithmetic.cs @@ -1382,13 +1382,10 @@ namespace ChocolArm64.Instructions public static void Frinta_S(ILEmitterCtx context) { - OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; - - EmitVectorExtractF(context, op.Rn, 0, op.Size); - - EmitRoundMathCall(context, MidpointRounding.AwayFromZero); - - EmitScalarSetF(context, op.Rd, op.Size); + EmitScalarUnaryOpF(context, () => + { + EmitRoundMathCall(context, MidpointRounding.AwayFromZero); + }); } public static void Frinta_V(ILEmitterCtx context) @@ -1403,23 +1400,40 @@ namespace ChocolArm64.Instructions { OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; - EmitScalarUnaryOpF(context, () => + if (Optimizations.UseSse41) { + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); + context.EmitLdvec(op.Rn); + context.EmitLdarg(TranslatedSub.StateArgIdx); if (op.Size == 0) { - VectorHelper.EmitCall(context, nameof(VectorHelper.RoundF)); + VectorHelper.EmitCall(context, nameof(VectorHelper.Sse41ScalarRoundF)); } - else if (op.Size == 1) + else /* if (op.Size == 1) */ { - VectorHelper.EmitCall(context, nameof(VectorHelper.Round)); + VectorHelper.EmitCall(context, nameof(VectorHelper.Sse41ScalarRound)); } - else + + context.EmitStvec(op.Rd); + } + else + { + EmitScalarUnaryOpF(context, () => { - throw new InvalidOperationException(); - } - }); + context.EmitLdarg(TranslatedSub.StateArgIdx); + + if (op.Size == 0) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.RoundF)); + } + else /* if (op.Size == 1) */ + { + VectorHelper.EmitCall(context, nameof(VectorHelper.Round)); + } + }); + } } public static void Frinti_V(ILEmitterCtx context) @@ -1428,136 +1442,250 @@ namespace ChocolArm64.Instructions int sizeF = op.Size & 1; - EmitVectorUnaryOpF(context, () => + if (Optimizations.UseSse41) { + context.EmitLdvec(op.Rn); + context.EmitLdarg(TranslatedSub.StateArgIdx); if (sizeF == 0) { - VectorHelper.EmitCall(context, nameof(VectorHelper.RoundF)); + VectorHelper.EmitCall(context, nameof(VectorHelper.Sse41VectorRoundF)); } - else if (sizeF == 1) + else /* if (sizeF == 1) */ { - VectorHelper.EmitCall(context, nameof(VectorHelper.Round)); + VectorHelper.EmitCall(context, nameof(VectorHelper.Sse41VectorRound)); } - else + + context.EmitStvec(op.Rd); + + if (sizeF == 0 && op.RegisterSize == RegisterSize.Simd64) { - throw new InvalidOperationException(); + EmitVectorZeroUpper(context, op.Rd); } - }); + } + else + { + EmitVectorUnaryOpF(context, () => + { + context.EmitLdarg(TranslatedSub.StateArgIdx); + + if (sizeF == 0) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.RoundF)); + } + else /* if (sizeF == 1) */ + { + VectorHelper.EmitCall(context, nameof(VectorHelper.Round)); + } + }); + } } public static void Frintm_S(ILEmitterCtx context) { - EmitScalarUnaryOpF(context, () => + if (Optimizations.UseSse41) { - EmitUnaryMathCall(context, nameof(Math.Floor)); - }); + EmitSse41Frint(context, RoundMode.TowardsMinusInfinity, scalar: true); + } + else + { + EmitScalarUnaryOpF(context, () => + { + EmitUnaryMathCall(context, nameof(Math.Floor)); + }); + } } public static void Frintm_V(ILEmitterCtx context) { - EmitVectorUnaryOpF(context, () => + if (Optimizations.UseSse41) { - EmitUnaryMathCall(context, nameof(Math.Floor)); - }); + EmitSse41Frint(context, RoundMode.TowardsMinusInfinity, scalar: false); + } + else + { + EmitVectorUnaryOpF(context, () => + { + EmitUnaryMathCall(context, nameof(Math.Floor)); + }); + } } public static void Frintn_S(ILEmitterCtx context) { - OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; - - EmitVectorExtractF(context, op.Rn, 0, op.Size); - - EmitRoundMathCall(context, MidpointRounding.ToEven); - - EmitScalarSetF(context, op.Rd, op.Size); + if (Optimizations.UseSse41) + { + EmitSse41Frint(context, RoundMode.ToNearest, scalar: true); + } + else + { + EmitScalarUnaryOpF(context, () => + { + EmitRoundMathCall(context, MidpointRounding.ToEven); + }); + } } public static void Frintn_V(ILEmitterCtx context) { - EmitVectorUnaryOpF(context, () => + if (Optimizations.UseSse41) { - EmitRoundMathCall(context, MidpointRounding.ToEven); - }); + EmitSse41Frint(context, RoundMode.ToNearest, scalar: false); + } + else + { + EmitVectorUnaryOpF(context, () => + { + EmitRoundMathCall(context, MidpointRounding.ToEven); + }); + } } public static void Frintp_S(ILEmitterCtx context) { - EmitScalarUnaryOpF(context, () => + if (Optimizations.UseSse41) { - EmitUnaryMathCall(context, nameof(Math.Ceiling)); - }); + EmitSse41Frint(context, RoundMode.TowardsPlusInfinity, scalar: true); + } + else + { + EmitScalarUnaryOpF(context, () => + { + EmitUnaryMathCall(context, nameof(Math.Ceiling)); + }); + } } public static void Frintp_V(ILEmitterCtx context) { - EmitVectorUnaryOpF(context, () => + if (Optimizations.UseSse41) { - EmitUnaryMathCall(context, nameof(Math.Ceiling)); - }); + EmitSse41Frint(context, RoundMode.TowardsPlusInfinity, scalar: false); + } + else + { + EmitVectorUnaryOpF(context, () => + { + EmitUnaryMathCall(context, nameof(Math.Ceiling)); + }); + } } public static void Frintx_S(ILEmitterCtx context) { OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; - EmitScalarUnaryOpF(context, () => + if (Optimizations.UseSse41) { + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); + context.EmitLdvec(op.Rn); + context.EmitLdarg(TranslatedSub.StateArgIdx); if (op.Size == 0) { - VectorHelper.EmitCall(context, nameof(VectorHelper.RoundF)); + VectorHelper.EmitCall(context, nameof(VectorHelper.Sse41ScalarRoundF)); } - else if (op.Size == 1) + else /* if (op.Size == 1) */ { - VectorHelper.EmitCall(context, nameof(VectorHelper.Round)); + VectorHelper.EmitCall(context, nameof(VectorHelper.Sse41ScalarRound)); } - else + + context.EmitStvec(op.Rd); + } + else + { + EmitScalarUnaryOpF(context, () => { - throw new InvalidOperationException(); - } - }); + context.EmitLdarg(TranslatedSub.StateArgIdx); + + if (op.Size == 0) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.RoundF)); + } + else /* if (op.Size == 1) */ + { + VectorHelper.EmitCall(context, nameof(VectorHelper.Round)); + } + }); + } } public static void Frintx_V(ILEmitterCtx context) { OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; - EmitVectorUnaryOpF(context, () => + int sizeF = op.Size & 1; + + if (Optimizations.UseSse41) { + context.EmitLdvec(op.Rn); + context.EmitLdarg(TranslatedSub.StateArgIdx); - if (op.Size == 0) + if (sizeF == 0) { - VectorHelper.EmitCall(context, nameof(VectorHelper.RoundF)); + VectorHelper.EmitCall(context, nameof(VectorHelper.Sse41VectorRoundF)); } - else if (op.Size == 1) + else /* if (sizeF == 1) */ { - VectorHelper.EmitCall(context, nameof(VectorHelper.Round)); + VectorHelper.EmitCall(context, nameof(VectorHelper.Sse41VectorRound)); } - else + + context.EmitStvec(op.Rd); + + if (sizeF == 0 && op.RegisterSize == RegisterSize.Simd64) { - throw new InvalidOperationException(); + EmitVectorZeroUpper(context, op.Rd); } - }); + } + else + { + EmitVectorUnaryOpF(context, () => + { + context.EmitLdarg(TranslatedSub.StateArgIdx); + + if (sizeF == 0) + { + VectorHelper.EmitCall(context, nameof(VectorHelper.RoundF)); + } + else /* if (sizeF == 1) */ + { + VectorHelper.EmitCall(context, nameof(VectorHelper.Round)); + } + }); + } } public static void Frintz_S(ILEmitterCtx context) { - EmitScalarUnaryOpF(context, () => + if (Optimizations.UseSse41) { - EmitUnaryMathCall(context, nameof(Math.Truncate)); - }); + EmitSse41Frint(context, RoundMode.TowardsZero, scalar: true); + } + else + { + EmitScalarUnaryOpF(context, () => + { + EmitUnaryMathCall(context, nameof(Math.Truncate)); + }); + } } public static void Frintz_V(ILEmitterCtx context) { - EmitVectorUnaryOpF(context, () => + if (Optimizations.UseSse41) { - EmitUnaryMathCall(context, nameof(Math.Truncate)); - }); + EmitSse41Frint(context, RoundMode.TowardsZero, scalar: false); + } + else + { + EmitVectorUnaryOpF(context, () => + { + EmitUnaryMathCall(context, nameof(Math.Truncate)); + }); + } } public static void Frsqrte_S(ILEmitterCtx context) @@ -3542,6 +3670,44 @@ namespace ChocolArm64.Instructions } } + private static void EmitSse41Frint(ILEmitterCtx context, RoundMode roundMode, bool scalar) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + if (scalar) + { + Type[] typesRnd = op.Size == 0 + ? new Type[] { typeof(Vector128), typeof(Vector128) } + : new Type[] { typeof(Vector128), typeof(Vector128) }; + + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); + context.EmitLdvec(op.Rn); + + context.EmitCall(typeof(Sse41).GetMethod(GetScalarSse41NameRnd(roundMode), typesRnd)); + + context.EmitStvec(op.Rd); + } + else + { + int sizeF = op.Size & 1; + + Type[] typesRnd = sizeF == 0 + ? new Type[] { typeof(Vector128) } + : new Type[] { typeof(Vector128) }; + + context.EmitLdvec(op.Rn); + + context.EmitCall(typeof(Sse41).GetMethod(GetVectorSse41NameRnd(roundMode), typesRnd)); + + context.EmitStvec(op.Rd); + + if (sizeF == 0 && op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + } + private static void EmitSse41Mul_AddSub(ILEmitterCtx context, string nameAddSub = null) { OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; diff --git a/Instructions/InstEmitSimdCvt.cs b/Instructions/InstEmitSimdCvt.cs index c5f16f8..b34687e 100644 --- a/Instructions/InstEmitSimdCvt.cs +++ b/Instructions/InstEmitSimdCvt.cs @@ -898,7 +898,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), types)); } - context.EmitCall(typeof(Sse41).GetMethod(GetSse41NameRnd(roundMode), typesRndCvt)); + context.EmitCall(typeof(Sse41).GetMethod(GetVectorSse41NameRnd(roundMode), typesRndCvt)); context.EmitStvectmp(); context.EmitLdvectmp(); @@ -954,7 +954,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), types)); } - context.EmitCall(typeof(Sse41).GetMethod(GetSse41NameRnd(roundMode), typesRndCvt)); + context.EmitCall(typeof(Sse41).GetMethod(GetVectorSse41NameRnd(roundMode), typesRndCvt)); context.EmitStvectmp(); @@ -1032,7 +1032,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), types)); } - context.EmitCall(typeof(Sse41).GetMethod(GetSse41NameRnd(roundMode), typesRndCvt)); + context.EmitCall(typeof(Sse41).GetMethod(GetVectorSse41NameRnd(roundMode), typesRndCvt)); context.Emit(OpCodes.Dup); @@ -1120,7 +1120,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), types)); } - context.EmitCall(typeof(Sse41).GetMethod(GetSse41NameRnd(roundMode), typesRndCvt)); + context.EmitCall(typeof(Sse41).GetMethod(GetVectorSse41NameRnd(roundMode), typesRndCvt)); context.Emit(OpCodes.Dup); @@ -1305,23 +1305,39 @@ namespace ChocolArm64.Instructions } } - private static string GetSse41NameRnd(RoundMode roundMode) + private static string GetScalarSse41NameRnd(RoundMode roundMode) + { + switch (roundMode) + { + case RoundMode.ToNearest: + return nameof(Sse41.RoundToNearestIntegerScalar); // even + + case RoundMode.TowardsPlusInfinity: + return nameof(Sse41.RoundToPositiveInfinityScalar); + + case RoundMode.TowardsMinusInfinity: + return nameof(Sse41.RoundToNegativeInfinityScalar); + + default: /* case RoundMode.TowardsZero: */ + return nameof(Sse41.RoundToZeroScalar); + } + } + + private static string GetVectorSse41NameRnd(RoundMode roundMode) { switch (roundMode) { case RoundMode.ToNearest: return nameof(Sse41.RoundToNearestInteger); // even - case RoundMode.TowardsMinusInfinity: - return nameof(Sse41.RoundToNegativeInfinity); - case RoundMode.TowardsPlusInfinity: return nameof(Sse41.RoundToPositiveInfinity); - case RoundMode.TowardsZero: - return nameof(Sse41.RoundToZero); + case RoundMode.TowardsMinusInfinity: + return nameof(Sse41.RoundToNegativeInfinity); - default: throw new ArgumentException(nameof(roundMode)); + default: /* case RoundMode.TowardsZero: */ + return nameof(Sse41.RoundToZero); } } } diff --git a/Instructions/InstEmitSimdHelper.cs b/Instructions/InstEmitSimdHelper.cs index 6799a3a..2bcda35 100644 --- a/Instructions/InstEmitSimdHelper.cs +++ b/Instructions/InstEmitSimdHelper.cs @@ -237,7 +237,9 @@ namespace ChocolArm64.Instructions { IOpCodeSimd64 op = (IOpCodeSimd64)context.CurrOp; - Type type = (op.Size & 1) == 0 + int sizeF = op.Size & 1; + + Type type = sizeF == 0 ? typeof(SoftFloat32) : typeof(SoftFloat64); diff --git a/Instructions/VectorHelper.cs b/Instructions/VectorHelper.cs index d1dface..3e2b258 100644 --- a/Instructions/VectorHelper.cs +++ b/Instructions/VectorHelper.cs @@ -93,30 +93,162 @@ namespace ChocolArm64.Instructions value <= ulong.MinValue ? ulong.MinValue : (ulong)value; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static double Round(double value, CpuThreadState state) { - switch (state.FPRoundingMode()) - { - case RoundMode.ToNearest: return Math.Round (value); - case RoundMode.TowardsPlusInfinity: return Math.Ceiling (value); - case RoundMode.TowardsMinusInfinity: return Math.Floor (value); - case RoundMode.TowardsZero: return Math.Truncate(value); - } + RoundMode roundMode = state.FPRoundingMode(); - throw new InvalidOperationException(); + if (roundMode == RoundMode.ToNearest) + { + return Math.Round(value); // even + } + else if (roundMode == RoundMode.TowardsPlusInfinity) + { + return Math.Ceiling(value); + } + else if (roundMode == RoundMode.TowardsMinusInfinity) + { + return Math.Floor(value); + } + else /* if (roundMode == RoundMode.TowardsZero) */ + { + return Math.Truncate(value); + } } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static float RoundF(float value, CpuThreadState state) { - switch (state.FPRoundingMode()) + RoundMode roundMode = state.FPRoundingMode(); + + if (roundMode == RoundMode.ToNearest) { - case RoundMode.ToNearest: return MathF.Round (value); - case RoundMode.TowardsPlusInfinity: return MathF.Ceiling (value); - case RoundMode.TowardsMinusInfinity: return MathF.Floor (value); - case RoundMode.TowardsZero: return MathF.Truncate(value); + return MathF.Round(value); // even + } + else if (roundMode == RoundMode.TowardsPlusInfinity) + { + return MathF.Ceiling(value); + } + else if (roundMode == RoundMode.TowardsMinusInfinity) + { + return MathF.Floor(value); + } + else /* if (roundMode == RoundMode.TowardsZero) */ + { + return MathF.Truncate(value); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 Sse41ScalarRound(Vector128 upper, Vector128 value, CpuThreadState state) + { + if (!Sse41.IsSupported) + { + throw new PlatformNotSupportedException(); } - throw new InvalidOperationException(); + RoundMode roundMode = state.FPRoundingMode(); + + if (roundMode == RoundMode.ToNearest) + { + return Sse41.RoundToNearestIntegerScalar(upper, value); // even + } + else if (roundMode == RoundMode.TowardsPlusInfinity) + { + return Sse41.RoundToPositiveInfinityScalar(upper, value); + } + else if (roundMode == RoundMode.TowardsMinusInfinity) + { + return Sse41.RoundToNegativeInfinityScalar(upper, value); + } + else /* if (roundMode == RoundMode.TowardsZero) */ + { + return Sse41.RoundToZeroScalar(upper, value); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 Sse41ScalarRoundF(Vector128 upper, Vector128 value, CpuThreadState state) + { + if (!Sse41.IsSupported) + { + throw new PlatformNotSupportedException(); + } + + RoundMode roundMode = state.FPRoundingMode(); + + if (roundMode == RoundMode.ToNearest) + { + return Sse41.RoundToNearestIntegerScalar(upper, value); // even + } + else if (roundMode == RoundMode.TowardsPlusInfinity) + { + return Sse41.RoundToPositiveInfinityScalar(upper, value); + } + else if (roundMode == RoundMode.TowardsMinusInfinity) + { + return Sse41.RoundToNegativeInfinityScalar(upper, value); + } + else /* if (roundMode == RoundMode.TowardsZero) */ + { + return Sse41.RoundToZeroScalar(upper, value); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 Sse41VectorRound(Vector128 value, CpuThreadState state) + { + if (!Sse41.IsSupported) + { + throw new PlatformNotSupportedException(); + } + + RoundMode roundMode = state.FPRoundingMode(); + + if (roundMode == RoundMode.ToNearest) + { + return Sse41.RoundToNearestInteger(value); // even + } + else if (roundMode == RoundMode.TowardsPlusInfinity) + { + return Sse41.RoundToPositiveInfinity(value); + } + else if (roundMode == RoundMode.TowardsMinusInfinity) + { + return Sse41.RoundToNegativeInfinity(value); + } + else /* if (roundMode == RoundMode.TowardsZero) */ + { + return Sse41.RoundToZero(value); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 Sse41VectorRoundF(Vector128 value, CpuThreadState state) + { + if (!Sse41.IsSupported) + { + throw new PlatformNotSupportedException(); + } + + RoundMode roundMode = state.FPRoundingMode(); + + if (roundMode == RoundMode.ToNearest) + { + return Sse41.RoundToNearestInteger(value); // even + } + else if (roundMode == RoundMode.TowardsPlusInfinity) + { + return Sse41.RoundToPositiveInfinity(value); + } + else if (roundMode == RoundMode.TowardsMinusInfinity) + { + return Sse41.RoundToNegativeInfinity(value); + } + else /* if (roundMode == RoundMode.TowardsZero) */ + { + return Sse41.RoundToZero(value); + } } public static Vector128 Tbl1_V64(