From 1f2400ed1805685813974cb83fb811b66a43dcf6 Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Sun, 15 Jul 2018 05:53:26 +0200 Subject: [PATCH] Fix EmitHighNarrow(), EmitSaturatingNarrowOp() when Rd == Rn || Rd == Rm (& Part != 0). Optimization of EmitVectorTranspose(), EmitVectorUnzip(), EmitVectorZip() algorithms (reduction of the number of operations and their complexity). Add 12 Tests about Trn1/2, Uzp1/2, Zip1/2 (V) instructions. (#268) * Update CpuTestSimdArithmetic.cs * Update CpuTestSimd.cs * Update CpuTestSimdReg.cs * Update Instructions.cs * Update AInstEmitSimdArithmetic.cs * Update AInstEmitSimdHelper.cs * Update AInstEmitSimdMove.cs * Delete CpuTestSimdMove.cs --- Instruction/AInstEmitSimdArithmetic.cs | 12 ++++++- Instruction/AInstEmitSimdHelper.cs | 12 ++++++- Instruction/AInstEmitSimdMove.cs | 47 ++++++++++++++------------ 3 files changed, 47 insertions(+), 24 deletions(-) diff --git a/Instruction/AInstEmitSimdArithmetic.cs b/Instruction/AInstEmitSimdArithmetic.cs index a39ffc0..36bb1cb 100644 --- a/Instruction/AInstEmitSimdArithmetic.cs +++ b/Instruction/AInstEmitSimdArithmetic.cs @@ -163,12 +163,19 @@ namespace ChocolArm64.Instruction AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; int Elems = 8 >> Op.Size; + int ESize = 8 << Op.Size; int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0; long RoundConst = 1L << (ESize - 1); + if (Part != 0) + { + Context.EmitLdvec(Op.Rd); + Context.EmitStvectmp(); + } + for (int Index = 0; Index < Elems; Index++) { EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size + 1); @@ -185,9 +192,12 @@ namespace ChocolArm64.Instruction Context.EmitLsr(ESize); - EmitVectorInsert(Context, Op.Rd, Part + Index, Op.Size); + EmitVectorInsertTmp(Context, Part + Index, Op.Size); } + Context.EmitLdvectmp(); + Context.EmitStvec(Op.Rd); + if (Part == 0) { EmitVectorZeroUpper(Context, Op.Rd); diff --git a/Instruction/AInstEmitSimdHelper.cs b/Instruction/AInstEmitSimdHelper.cs index 1f7a2da..7716e29 100644 --- a/Instruction/AInstEmitSimdHelper.cs +++ b/Instruction/AInstEmitSimdHelper.cs @@ -813,6 +813,7 @@ namespace ChocolArm64.Instruction AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; int Elems = !Scalar ? 8 >> Op.Size : 1; + int ESize = 8 << Op.Size; int Part = !Scalar && (Op.RegisterSize == ARegisterSize.SIMD128) ? Elems : 0; @@ -823,6 +824,12 @@ namespace ChocolArm64.Instruction Context.EmitLdc_I8(0L); Context.EmitSttmp(); + if (Part != 0) + { + Context.EmitLdvec(Op.Rd); + Context.EmitStvectmp(); + } + for (int Index = 0; Index < Elems; Index++) { AILLabel LblLe = new AILLabel(); @@ -867,9 +874,12 @@ namespace ChocolArm64.Instruction EmitVectorZeroLower(Context, Op.Rd); } - EmitVectorInsert(Context, Op.Rd, Part + Index, Op.Size); + EmitVectorInsertTmp(Context, Part + Index, Op.Size); } + Context.EmitLdvectmp(); + Context.EmitStvec(Op.Rd); + if (Part == 0) { EmitVectorZeroUpper(Context, Op.Rd); diff --git a/Instruction/AInstEmitSimdMove.cs b/Instruction/AInstEmitSimdMove.cs index 739f01c..592cab7 100644 --- a/Instruction/AInstEmitSimdMove.cs +++ b/Instruction/AInstEmitSimdMove.cs @@ -331,17 +331,18 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Op.GetBitsCount() >> 3; + int Words = Op.GetBitsCount() >> 4; + int Pairs = Words >> Op.Size; - int Elems = Bytes >> Op.Size; - - for (int Index = 0; Index < Elems; Index++) + for (int Index = 0; Index < Pairs; Index++) { - int Elem = (Index & ~1) + Part; + int Idx = Index << 1; - EmitVectorExtractZx(Context, (Index & 1) == 0 ? Op.Rn : Op.Rm, Elem, Op.Size); + EmitVectorExtractZx(Context, Op.Rn, Idx + Part, Op.Size); + EmitVectorExtractZx(Context, Op.Rm, Idx + Part, Op.Size); - EmitVectorInsertTmp(Context, Index, Op.Size); + EmitVectorInsertTmp(Context, Idx + 1, Op.Size); + EmitVectorInsertTmp(Context, Idx , Op.Size); } Context.EmitLdvectmp(); @@ -357,18 +358,18 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Op.GetBitsCount() >> 3; + int Words = Op.GetBitsCount() >> 4; + int Pairs = Words >> Op.Size; - int Elems = Bytes >> Op.Size; - int Half = Elems >> 1; - - for (int Index = 0; Index < Elems; Index++) + for (int Index = 0; Index < Pairs; Index++) { - int Elem = Part + ((Index & (Half - 1)) << 1); + int Idx = Index << 1; - EmitVectorExtractZx(Context, Index < Half ? Op.Rn : Op.Rm, Elem, Op.Size); + EmitVectorExtractZx(Context, Op.Rn, Idx + Part, Op.Size); + EmitVectorExtractZx(Context, Op.Rm, Idx + Part, Op.Size); - EmitVectorInsertTmp(Context, Index, Op.Size); + EmitVectorInsertTmp(Context, Pairs + Index, Op.Size); + EmitVectorInsertTmp(Context, Index, Op.Size); } Context.EmitLdvectmp(); @@ -384,18 +385,20 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Op.GetBitsCount() >> 3; + int Words = Op.GetBitsCount() >> 4; + int Pairs = Words >> Op.Size; - int Elems = Bytes >> Op.Size; - int Half = Elems >> 1; + int Base = Part != 0 ? Pairs : 0; - for (int Index = 0; Index < Elems; Index++) + for (int Index = 0; Index < Pairs; Index++) { - int Elem = Part * Half + (Index >> 1); + int Idx = Index << 1; - EmitVectorExtractZx(Context, (Index & 1) == 0 ? Op.Rn : Op.Rm, Elem, Op.Size); + EmitVectorExtractZx(Context, Op.Rn, Base + Index, Op.Size); + EmitVectorExtractZx(Context, Op.Rm, Base + Index, Op.Size); - EmitVectorInsertTmp(Context, Index, Op.Size); + EmitVectorInsertTmp(Context, Idx + 1, Op.Size); + EmitVectorInsertTmp(Context, Idx , Op.Size); } Context.EmitLdvectmp();