Fix EmitHighNarrow(), EmitSaturatingNarrowOp() when Rd == Rn || Rd == Rm (& Part != 0). Optimization of EmitVectorTranspose(), EmitVectorUnzip(), EmitVectorZip() algorithms (reduction of the number of operations and their complexity). Add 12 Tests about Trn1/2, Uzp1/2, Zip1/2 (V) instructions. (#268)
* Update CpuTestSimdArithmetic.cs * Update CpuTestSimd.cs * Update CpuTestSimdReg.cs * Update Instructions.cs * Update AInstEmitSimdArithmetic.cs * Update AInstEmitSimdHelper.cs * Update AInstEmitSimdMove.cs * Delete CpuTestSimdMove.cs
This commit is contained in:
parent
97e469e315
commit
1f2400ed18
3 changed files with 47 additions and 24 deletions
|
@ -163,12 +163,19 @@ namespace ChocolArm64.Instruction
|
|||
AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
|
||||
|
||||
int Elems = 8 >> Op.Size;
|
||||
|
||||
int ESize = 8 << Op.Size;
|
||||
|
||||
int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0;
|
||||
|
||||
long RoundConst = 1L << (ESize - 1);
|
||||
|
||||
if (Part != 0)
|
||||
{
|
||||
Context.EmitLdvec(Op.Rd);
|
||||
Context.EmitStvectmp();
|
||||
}
|
||||
|
||||
for (int Index = 0; Index < Elems; Index++)
|
||||
{
|
||||
EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size + 1);
|
||||
|
@ -185,9 +192,12 @@ namespace ChocolArm64.Instruction
|
|||
|
||||
Context.EmitLsr(ESize);
|
||||
|
||||
EmitVectorInsert(Context, Op.Rd, Part + Index, Op.Size);
|
||||
EmitVectorInsertTmp(Context, Part + Index, Op.Size);
|
||||
}
|
||||
|
||||
Context.EmitLdvectmp();
|
||||
Context.EmitStvec(Op.Rd);
|
||||
|
||||
if (Part == 0)
|
||||
{
|
||||
EmitVectorZeroUpper(Context, Op.Rd);
|
||||
|
|
|
@ -813,6 +813,7 @@ namespace ChocolArm64.Instruction
|
|||
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
|
||||
|
||||
int Elems = !Scalar ? 8 >> Op.Size : 1;
|
||||
|
||||
int ESize = 8 << Op.Size;
|
||||
|
||||
int Part = !Scalar && (Op.RegisterSize == ARegisterSize.SIMD128) ? Elems : 0;
|
||||
|
@ -823,6 +824,12 @@ namespace ChocolArm64.Instruction
|
|||
Context.EmitLdc_I8(0L);
|
||||
Context.EmitSttmp();
|
||||
|
||||
if (Part != 0)
|
||||
{
|
||||
Context.EmitLdvec(Op.Rd);
|
||||
Context.EmitStvectmp();
|
||||
}
|
||||
|
||||
for (int Index = 0; Index < Elems; Index++)
|
||||
{
|
||||
AILLabel LblLe = new AILLabel();
|
||||
|
@ -867,9 +874,12 @@ namespace ChocolArm64.Instruction
|
|||
EmitVectorZeroLower(Context, Op.Rd);
|
||||
}
|
||||
|
||||
EmitVectorInsert(Context, Op.Rd, Part + Index, Op.Size);
|
||||
EmitVectorInsertTmp(Context, Part + Index, Op.Size);
|
||||
}
|
||||
|
||||
Context.EmitLdvectmp();
|
||||
Context.EmitStvec(Op.Rd);
|
||||
|
||||
if (Part == 0)
|
||||
{
|
||||
EmitVectorZeroUpper(Context, Op.Rd);
|
||||
|
|
|
@ -331,17 +331,18 @@ namespace ChocolArm64.Instruction
|
|||
{
|
||||
AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
|
||||
|
||||
int Bytes = Op.GetBitsCount() >> 3;
|
||||
int Words = Op.GetBitsCount() >> 4;
|
||||
int Pairs = Words >> Op.Size;
|
||||
|
||||
int Elems = Bytes >> Op.Size;
|
||||
|
||||
for (int Index = 0; Index < Elems; Index++)
|
||||
for (int Index = 0; Index < Pairs; Index++)
|
||||
{
|
||||
int Elem = (Index & ~1) + Part;
|
||||
int Idx = Index << 1;
|
||||
|
||||
EmitVectorExtractZx(Context, (Index & 1) == 0 ? Op.Rn : Op.Rm, Elem, Op.Size);
|
||||
EmitVectorExtractZx(Context, Op.Rn, Idx + Part, Op.Size);
|
||||
EmitVectorExtractZx(Context, Op.Rm, Idx + Part, Op.Size);
|
||||
|
||||
EmitVectorInsertTmp(Context, Index, Op.Size);
|
||||
EmitVectorInsertTmp(Context, Idx + 1, Op.Size);
|
||||
EmitVectorInsertTmp(Context, Idx , Op.Size);
|
||||
}
|
||||
|
||||
Context.EmitLdvectmp();
|
||||
|
@ -357,18 +358,18 @@ namespace ChocolArm64.Instruction
|
|||
{
|
||||
AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
|
||||
|
||||
int Bytes = Op.GetBitsCount() >> 3;
|
||||
int Words = Op.GetBitsCount() >> 4;
|
||||
int Pairs = Words >> Op.Size;
|
||||
|
||||
int Elems = Bytes >> Op.Size;
|
||||
int Half = Elems >> 1;
|
||||
|
||||
for (int Index = 0; Index < Elems; Index++)
|
||||
for (int Index = 0; Index < Pairs; Index++)
|
||||
{
|
||||
int Elem = Part + ((Index & (Half - 1)) << 1);
|
||||
int Idx = Index << 1;
|
||||
|
||||
EmitVectorExtractZx(Context, Index < Half ? Op.Rn : Op.Rm, Elem, Op.Size);
|
||||
EmitVectorExtractZx(Context, Op.Rn, Idx + Part, Op.Size);
|
||||
EmitVectorExtractZx(Context, Op.Rm, Idx + Part, Op.Size);
|
||||
|
||||
EmitVectorInsertTmp(Context, Index, Op.Size);
|
||||
EmitVectorInsertTmp(Context, Pairs + Index, Op.Size);
|
||||
EmitVectorInsertTmp(Context, Index, Op.Size);
|
||||
}
|
||||
|
||||
Context.EmitLdvectmp();
|
||||
|
@ -384,18 +385,20 @@ namespace ChocolArm64.Instruction
|
|||
{
|
||||
AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
|
||||
|
||||
int Bytes = Op.GetBitsCount() >> 3;
|
||||
int Words = Op.GetBitsCount() >> 4;
|
||||
int Pairs = Words >> Op.Size;
|
||||
|
||||
int Elems = Bytes >> Op.Size;
|
||||
int Half = Elems >> 1;
|
||||
int Base = Part != 0 ? Pairs : 0;
|
||||
|
||||
for (int Index = 0; Index < Elems; Index++)
|
||||
for (int Index = 0; Index < Pairs; Index++)
|
||||
{
|
||||
int Elem = Part * Half + (Index >> 1);
|
||||
int Idx = Index << 1;
|
||||
|
||||
EmitVectorExtractZx(Context, (Index & 1) == 0 ? Op.Rn : Op.Rm, Elem, Op.Size);
|
||||
EmitVectorExtractZx(Context, Op.Rn, Base + Index, Op.Size);
|
||||
EmitVectorExtractZx(Context, Op.Rm, Base + Index, Op.Size);
|
||||
|
||||
EmitVectorInsertTmp(Context, Index, Op.Size);
|
||||
EmitVectorInsertTmp(Context, Idx + 1, Op.Size);
|
||||
EmitVectorInsertTmp(Context, Idx , Op.Size);
|
||||
}
|
||||
|
||||
Context.EmitLdvectmp();
|
||||
|
|
Loading…
Reference in a new issue