Add Fmls_Se, Fmulx_Se/Ve, Smov_S Inst.; Opt. Clz/Clz_V, Cnt_V, Shl_V, S/Ushr_V, S/Usra_V Inst.; Add 11 Tests. Some fixes. (#449)

* Update AOpCodeTable.cs

* Update AInstEmitSimdMove.cs

* Update AInstEmitSimdArithmetic.cs

* Update AInstEmitSimdShift.cs

* Update ASoftFallback.cs

* Update ASoftFloat.cs

* Update AOpCodeSimdRegElemF.cs

* Update CpuTestSimdIns.cs

* Update CpuTestSimdRegElem.cs

* Create CpuTestSimdRegElemF.cs

* Update CpuTestSimd.cs

* Update CpuTestSimdReg.cs

* Superseded Fmul_Se Test. Nit.

* Address PR feedback.

* Address PR feedback.

* Update AInstEmitSimdArithmetic.cs

* Update ASoftFallback.cs

* Update AInstEmitAlu.cs

* Update AInstEmitSimdShift.cs
This commit is contained in:
LDj3SNuD 2018-10-14 04:35:16 +02:00 committed by gdkchan
parent f1e01ed47b
commit 9533b338ac
8 changed files with 268 additions and 58 deletions

View file

@ -284,11 +284,12 @@ namespace ChocolArm64
SetA64("000111100x1xxxxx011110xxxxxxxxxx", AInstEmit.Fminnm_S, typeof(AOpCodeSimdReg)); SetA64("000111100x1xxxxx011110xxxxxxxxxx", AInstEmit.Fminnm_S, typeof(AOpCodeSimdReg));
SetA64("0>0011101<1xxxxx110001xxxxxxxxxx", AInstEmit.Fminnm_V, typeof(AOpCodeSimdReg)); SetA64("0>0011101<1xxxxx110001xxxxxxxxxx", AInstEmit.Fminnm_V, typeof(AOpCodeSimdReg));
SetA64("0>1011101<1xxxxx111101xxxxxxxxxx", AInstEmit.Fminp_V, typeof(AOpCodeSimdReg)); SetA64("0>1011101<1xxxxx111101xxxxxxxxxx", AInstEmit.Fminp_V, typeof(AOpCodeSimdReg));
SetA64("010111111<<xxxxx0001x0xxxxxxxxxx", AInstEmit.Fmla_Se, typeof(AOpCodeSimdRegElemF)); SetA64("010111111xxxxxxx0001x0xxxxxxxxxx", AInstEmit.Fmla_Se, typeof(AOpCodeSimdRegElemF));
SetA64("0>0011100<1xxxxx110011xxxxxxxxxx", AInstEmit.Fmla_V, typeof(AOpCodeSimdReg)); SetA64("0>0011100<1xxxxx110011xxxxxxxxxx", AInstEmit.Fmla_V, typeof(AOpCodeSimdReg));
SetA64("0x0011111<<xxxxx0001x0xxxxxxxxxx", AInstEmit.Fmla_Ve, typeof(AOpCodeSimdRegElemF)); SetA64("0>0011111<xxxxxx0001x0xxxxxxxxxx", AInstEmit.Fmla_Ve, typeof(AOpCodeSimdRegElemF));
SetA64("010111111xxxxxxx0101x0xxxxxxxxxx", AInstEmit.Fmls_Se, typeof(AOpCodeSimdRegElemF));
SetA64("0>0011101<1xxxxx110011xxxxxxxxxx", AInstEmit.Fmls_V, typeof(AOpCodeSimdReg)); SetA64("0>0011101<1xxxxx110011xxxxxxxxxx", AInstEmit.Fmls_V, typeof(AOpCodeSimdReg));
SetA64("0x0011111<<xxxxx0101x0xxxxxxxxxx", AInstEmit.Fmls_Ve, typeof(AOpCodeSimdRegElemF)); SetA64("0>0011111<xxxxxx0101x0xxxxxxxxxx", AInstEmit.Fmls_Ve, typeof(AOpCodeSimdRegElemF));
SetA64("000111100x100000010000xxxxxxxxxx", AInstEmit.Fmov_S, typeof(AOpCodeSimd)); SetA64("000111100x100000010000xxxxxxxxxx", AInstEmit.Fmov_S, typeof(AOpCodeSimd));
SetA64("00011110xx1xxxxxxxx100xxxxxxxxxx", AInstEmit.Fmov_Si, typeof(AOpCodeSimdFmov)); SetA64("00011110xx1xxxxxxxx100xxxxxxxxxx", AInstEmit.Fmov_Si, typeof(AOpCodeSimdFmov));
SetA64("0xx0111100000xxx111101xxxxxxxxxx", AInstEmit.Fmov_V, typeof(AOpCodeSimdImm)); SetA64("0xx0111100000xxx111101xxxxxxxxxx", AInstEmit.Fmov_V, typeof(AOpCodeSimdImm));
@ -298,11 +299,13 @@ namespace ChocolArm64
SetA64("1001111010101111000000xxxxxxxxxx", AInstEmit.Fmov_Itof1, typeof(AOpCodeSimdCvt)); SetA64("1001111010101111000000xxxxxxxxxx", AInstEmit.Fmov_Itof1, typeof(AOpCodeSimdCvt));
SetA64("000111110x0xxxxx1xxxxxxxxxxxxxxx", AInstEmit.Fmsub_S, typeof(AOpCodeSimdReg)); SetA64("000111110x0xxxxx1xxxxxxxxxxxxxxx", AInstEmit.Fmsub_S, typeof(AOpCodeSimdReg));
SetA64("000111100x1xxxxx000010xxxxxxxxxx", AInstEmit.Fmul_S, typeof(AOpCodeSimdReg)); SetA64("000111100x1xxxxx000010xxxxxxxxxx", AInstEmit.Fmul_S, typeof(AOpCodeSimdReg));
SetA64("010111111<<xxxxx1001x0xxxxxxxxxx", AInstEmit.Fmul_Se, typeof(AOpCodeSimdRegElemF)); SetA64("010111111xxxxxxx1001x0xxxxxxxxxx", AInstEmit.Fmul_Se, typeof(AOpCodeSimdRegElemF));
SetA64("0>1011100<1xxxxx110111xxxxxxxxxx", AInstEmit.Fmul_V, typeof(AOpCodeSimdReg)); SetA64("0>1011100<1xxxxx110111xxxxxxxxxx", AInstEmit.Fmul_V, typeof(AOpCodeSimdReg));
SetA64("0x0011111<<xxxxx1001x0xxxxxxxxxx", AInstEmit.Fmul_Ve, typeof(AOpCodeSimdRegElemF)); SetA64("0>0011111<xxxxxx1001x0xxxxxxxxxx", AInstEmit.Fmul_Ve, typeof(AOpCodeSimdRegElemF));
SetA64("010111100x1xxxxx110111xxxxxxxxxx", AInstEmit.Fmulx_S, typeof(AOpCodeSimdReg)); SetA64("010111100x1xxxxx110111xxxxxxxxxx", AInstEmit.Fmulx_S, typeof(AOpCodeSimdReg));
SetA64("011111111xxxxxxx1001x0xxxxxxxxxx", AInstEmit.Fmulx_Se, typeof(AOpCodeSimdRegElemF));
SetA64("0>0011100<1xxxxx110111xxxxxxxxxx", AInstEmit.Fmulx_V, typeof(AOpCodeSimdReg)); SetA64("0>0011100<1xxxxx110111xxxxxxxxxx", AInstEmit.Fmulx_V, typeof(AOpCodeSimdReg));
SetA64("0>1011111<xxxxxx1001x0xxxxxxxxxx", AInstEmit.Fmulx_Ve, typeof(AOpCodeSimdRegElemF));
SetA64("000111100x100001010000xxxxxxxxxx", AInstEmit.Fneg_S, typeof(AOpCodeSimd)); SetA64("000111100x100001010000xxxxxxxxxx", AInstEmit.Fneg_S, typeof(AOpCodeSimd));
SetA64("0>1011101<100000111110xxxxxxxxxx", AInstEmit.Fneg_V, typeof(AOpCodeSimd)); SetA64("0>1011101<100000111110xxxxxxxxxx", AInstEmit.Fneg_V, typeof(AOpCodeSimd));
SetA64("000111110x1xxxxx0xxxxxxxxxxxxxxx", AInstEmit.Fnmadd_S, typeof(AOpCodeSimdReg)); SetA64("000111110x1xxxxx0xxxxxxxxxxxxxxx", AInstEmit.Fnmadd_S, typeof(AOpCodeSimdReg));
@ -401,6 +404,7 @@ namespace ChocolArm64
SetA64("0x001110<<1xxxxx101011xxxxxxxxxx", AInstEmit.Sminp_V, typeof(AOpCodeSimdReg)); SetA64("0x001110<<1xxxxx101011xxxxxxxxxx", AInstEmit.Sminp_V, typeof(AOpCodeSimdReg));
SetA64("0x001110<<1xxxxx100000xxxxxxxxxx", AInstEmit.Smlal_V, typeof(AOpCodeSimdReg)); SetA64("0x001110<<1xxxxx100000xxxxxxxxxx", AInstEmit.Smlal_V, typeof(AOpCodeSimdReg));
SetA64("0x001110<<1xxxxx101000xxxxxxxxxx", AInstEmit.Smlsl_V, typeof(AOpCodeSimdReg)); SetA64("0x001110<<1xxxxx101000xxxxxxxxxx", AInstEmit.Smlsl_V, typeof(AOpCodeSimdReg));
SetA64("0x001110000xxxxx001011xxxxxxxxxx", AInstEmit.Smov_S, typeof(AOpCodeSimdIns));
SetA64("0x001110<<1xxxxx110000xxxxxxxxxx", AInstEmit.Smull_V, typeof(AOpCodeSimdReg)); SetA64("0x001110<<1xxxxx110000xxxxxxxxxx", AInstEmit.Smull_V, typeof(AOpCodeSimdReg));
SetA64("01011110xx100000011110xxxxxxxxxx", AInstEmit.Sqabs_S, typeof(AOpCodeSimd)); SetA64("01011110xx100000011110xxxxxxxxxx", AInstEmit.Sqabs_S, typeof(AOpCodeSimd));
SetA64("0>001110<<100000011110xxxxxxxxxx", AInstEmit.Sqabs_V, typeof(AOpCodeSimd)); SetA64("0>001110<<100000011110xxxxxxxxxx", AInstEmit.Sqabs_V, typeof(AOpCodeSimd));

View file

@ -8,15 +8,26 @@ namespace ChocolArm64.Decoder
public AOpCodeSimdRegElemF(AInst Inst, long Position, int OpCode) : base(Inst, Position, OpCode) public AOpCodeSimdRegElemF(AInst Inst, long Position, int OpCode) : base(Inst, Position, OpCode)
{ {
if ((Size & 1) != 0) switch ((OpCode >> 21) & 3) // sz:L
{ {
Index = (OpCode >> 11) & 1; case 0: // H:0
} Index = (OpCode >> 10) & 2; // 0, 2
else
{ break;
Index = (OpCode >> 21) & 1 |
(OpCode >> 10) & 2; case 1: // H:1
Index = (OpCode >> 10) & 2;
Index++; // 1, 3
break;
case 2: // H
Index = (OpCode >> 11) & 1; // 0, 1
break;
default: Emitter = AInstEmit.Und; return;
} }
} }
} }
} }

View file

@ -4,6 +4,7 @@ using ChocolArm64.Translation;
using System; using System;
using System.Reflection; using System.Reflection;
using System.Reflection.Emit; using System.Reflection.Emit;
using System.Runtime.Intrinsics.X86;
using static ChocolArm64.Instruction.AInstEmitAluHelper; using static ChocolArm64.Instruction.AInstEmitAluHelper;
@ -117,9 +118,18 @@ namespace ChocolArm64.Instruction
Context.EmitLdintzr(Op.Rn); Context.EmitLdintzr(Op.Rn);
Context.EmitLdc_I4(Op.RegisterSize == ARegisterSize.Int32 ? 32 : 64); if (Lzcnt.IsSupported)
{
Type TValue = Op.RegisterSize == ARegisterSize.Int32 ? typeof(uint) : typeof(ulong);
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountLeadingZeros)); Context.EmitCall(typeof(Lzcnt).GetMethod(nameof(Lzcnt.LeadingZeroCount), new Type[] { TValue }));
}
else
{
Context.EmitLdc_I4(Op.RegisterSize == ARegisterSize.Int32 ? 32 : 64);
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountLeadingZeros));
}
Context.EmitStintzr(Op.Rd); Context.EmitStintzr(Op.Rd);
} }

View file

@ -82,20 +82,6 @@ namespace ChocolArm64.Instruction
} }
public static void Cls_V(AILEmitterCtx Context) public static void Cls_V(AILEmitterCtx Context)
{
MethodInfo MthdInfo = typeof(ASoftFallback).GetMethod(nameof(ASoftFallback.CountLeadingSigns));
EmitCountLeadingBits(Context, () => Context.EmitCall(MthdInfo));
}
public static void Clz_V(AILEmitterCtx Context)
{
MethodInfo MthdInfo = typeof(ASoftFallback).GetMethod(nameof(ASoftFallback.CountLeadingZeros));
EmitCountLeadingBits(Context, () => Context.EmitCall(MthdInfo));
}
private static void EmitCountLeadingBits(AILEmitterCtx Context, Action Emit)
{ {
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
@ -110,7 +96,44 @@ namespace ChocolArm64.Instruction
Context.EmitLdc_I4(ESize); Context.EmitLdc_I4(ESize);
Emit(); ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountLeadingSigns));
EmitVectorInsert(Context, Op.Rd, Index, Op.Size);
}
if (Op.RegisterSize == ARegisterSize.SIMD64)
{
EmitVectorZeroUpper(Context, Op.Rd);
}
}
public static void Clz_V(AILEmitterCtx Context)
{
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
int Bytes = Op.GetBitsCount() >> 3;
int Elems = Bytes >> Op.Size;
int ESize = 8 << Op.Size;
for (int Index = 0; Index < Elems; Index++)
{
EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size);
if (Lzcnt.IsSupported && ESize == 32)
{
Context.Emit(OpCodes.Conv_U4);
Context.EmitCall(typeof(Lzcnt).GetMethod(nameof(Lzcnt.LeadingZeroCount), new Type[] { typeof(uint) }));
Context.Emit(OpCodes.Conv_U8);
}
else
{
Context.EmitLdc_I4(ESize);
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountLeadingZeros));
}
EmitVectorInsert(Context, Op.Rd, Index, Op.Size); EmitVectorInsert(Context, Op.Rd, Index, Op.Size);
} }
@ -131,11 +154,14 @@ namespace ChocolArm64.Instruction
{ {
EmitVectorExtractZx(Context, Op.Rn, Index, 0); EmitVectorExtractZx(Context, Op.Rn, Index, 0);
Context.Emit(OpCodes.Conv_U4); if (Popcnt.IsSupported)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountSetBits8)); Context.EmitCall(typeof(Popcnt).GetMethod(nameof(Popcnt.PopCount), new Type[] { typeof(ulong) }));
}
Context.Emit(OpCodes.Conv_U8); else
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountSetBits8));
}
EmitVectorInsert(Context, Op.Rd, Index, 0); EmitVectorInsert(Context, Op.Rd, Index, 0);
} }
@ -440,6 +466,15 @@ namespace ChocolArm64.Instruction
}); });
} }
public static void Fmls_Se(AILEmitterCtx Context)
{
EmitScalarTernaryOpByElemF(Context, () =>
{
Context.Emit(OpCodes.Mul);
Context.Emit(OpCodes.Sub);
});
}
public static void Fmls_V(AILEmitterCtx Context) public static void Fmls_V(AILEmitterCtx Context)
{ {
EmitVectorTernaryOpF(Context, () => EmitVectorTernaryOpF(Context, () =>
@ -554,6 +589,14 @@ namespace ChocolArm64.Instruction
}); });
} }
public static void Fmulx_Se(AILEmitterCtx Context)
{
EmitScalarBinaryOpByElemF(Context, () =>
{
EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMulX));
});
}
public static void Fmulx_V(AILEmitterCtx Context) public static void Fmulx_V(AILEmitterCtx Context)
{ {
EmitVectorBinaryOpF(Context, () => EmitVectorBinaryOpF(Context, () =>
@ -562,6 +605,14 @@ namespace ChocolArm64.Instruction
}); });
} }
public static void Fmulx_Ve(AILEmitterCtx Context)
{
EmitVectorBinaryOpByElemF(Context, () =>
{
EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMulX));
});
}
public static void Fneg_S(AILEmitterCtx Context) public static void Fneg_S(AILEmitterCtx Context)
{ {
EmitScalarUnaryOpF(Context, () => Context.Emit(OpCodes.Neg)); EmitScalarUnaryOpF(Context, () => Context.Emit(OpCodes.Neg));

View file

@ -249,6 +249,17 @@ namespace ChocolArm64.Instruction
EmitVectorImmUnaryOp(Context, () => Context.Emit(OpCodes.Not)); EmitVectorImmUnaryOp(Context, () => Context.Emit(OpCodes.Not));
} }
public static void Smov_S(AILEmitterCtx Context)
{
AOpCodeSimdIns Op = (AOpCodeSimdIns)Context.CurrOp;
EmitVectorExtractSx(Context, Op.Rn, Op.DstIndex, Op.Size);
EmitIntZeroUpperIfNeeded(Context);
Context.EmitStintzr(Op.Rd);
}
public static void Tbl_V(AILEmitterCtx Context) public static void Tbl_V(AILEmitterCtx Context)
{ {
AOpCodeSimdTbl Op = (AOpCodeSimdTbl)Context.CurrOp; AOpCodeSimdTbl Op = (AOpCodeSimdTbl)Context.CurrOp;
@ -421,7 +432,8 @@ namespace ChocolArm64.Instruction
private static void EmitIntZeroUpperIfNeeded(AILEmitterCtx Context) private static void EmitIntZeroUpperIfNeeded(AILEmitterCtx Context)
{ {
if (Context.CurrOp.RegisterSize == ARegisterSize.Int32) if (Context.CurrOp.RegisterSize == ARegisterSize.Int32 ||
Context.CurrOp.RegisterSize == ARegisterSize.SIMD64)
{ {
Context.Emit(OpCodes.Conv_U4); Context.Emit(OpCodes.Conv_U4);
Context.Emit(OpCodes.Conv_U8); Context.Emit(OpCodes.Conv_U8);

View file

@ -3,6 +3,7 @@ using ChocolArm64.State;
using ChocolArm64.Translation; using ChocolArm64.Translation;
using System; using System;
using System.Reflection.Emit; using System.Reflection.Emit;
using System.Runtime.Intrinsics.X86;
using static ChocolArm64.Instruction.AInstEmitSimdHelper; using static ChocolArm64.Instruction.AInstEmitSimdHelper;
@ -31,12 +32,32 @@ namespace ChocolArm64.Instruction
{ {
AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
EmitVectorUnaryOpZx(Context, () => if (AOptimizations.UseSse2 && Op.Size > 0)
{ {
Type[] Types = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) };
EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size);
Context.EmitLdc_I4(GetImmShl(Op)); Context.EmitLdc_I4(GetImmShl(Op));
Context.Emit(OpCodes.Shl); Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), Types));
});
EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size);
if (Op.RegisterSize == ARegisterSize.SIMD64)
{
EmitVectorZeroUpper(Context, Op.Rd);
}
}
else
{
EmitVectorUnaryOpZx(Context, () =>
{
Context.EmitLdc_I4(GetImmShl(Op));
Context.Emit(OpCodes.Shl);
});
}
} }
public static void Shll_V(AILEmitterCtx Context) public static void Shll_V(AILEmitterCtx Context)
@ -167,7 +188,30 @@ namespace ChocolArm64.Instruction
public static void Sshr_V(AILEmitterCtx Context) public static void Sshr_V(AILEmitterCtx Context)
{ {
EmitShrImmOp(Context, ShrImmFlags.VectorSx); AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
if (AOptimizations.UseSse2 && Op.Size > 0
&& Op.Size < 3)
{
Type[] Types = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], typeof(byte) };
EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size);
Context.EmitLdc_I4(GetImmShr(Op));
Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), Types));
EmitStvecWithSignedCast(Context, Op.Rd, Op.Size);
if (Op.RegisterSize == ARegisterSize.SIMD64)
{
EmitVectorZeroUpper(Context, Op.Rd);
}
}
else
{
EmitShrImmOp(Context, ShrImmFlags.VectorSx);
}
} }
public static void Ssra_S(AILEmitterCtx Context) public static void Ssra_S(AILEmitterCtx Context)
@ -177,7 +221,33 @@ namespace ChocolArm64.Instruction
public static void Ssra_V(AILEmitterCtx Context) public static void Ssra_V(AILEmitterCtx Context)
{ {
EmitVectorShrImmOpSx(Context, ShrImmFlags.Accumulate); AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
if (AOptimizations.UseSse2 && Op.Size > 0
&& Op.Size < 3)
{
Type[] TypesSra = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], typeof(byte) };
Type[] TypesAdd = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], VectorIntTypesPerSizeLog2[Op.Size] };
EmitLdvecWithSignedCast(Context, Op.Rd, Op.Size);
EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size);
Context.EmitLdc_I4(GetImmShr(Op));
Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), TypesSra));
Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd));
EmitStvecWithSignedCast(Context, Op.Rd, Op.Size);
if (Op.RegisterSize == ARegisterSize.SIMD64)
{
EmitVectorZeroUpper(Context, Op.Rd);
}
}
else
{
EmitVectorShrImmOpSx(Context, ShrImmFlags.Accumulate);
}
} }
public static void Uqrshrn_S(AILEmitterCtx Context) public static void Uqrshrn_S(AILEmitterCtx Context)
@ -239,7 +309,29 @@ namespace ChocolArm64.Instruction
public static void Ushr_V(AILEmitterCtx Context) public static void Ushr_V(AILEmitterCtx Context)
{ {
EmitShrImmOp(Context, ShrImmFlags.VectorZx); AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
if (AOptimizations.UseSse2 && Op.Size > 0)
{
Type[] Types = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) };
EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size);
Context.EmitLdc_I4(GetImmShr(Op));
Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), Types));
EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size);
if (Op.RegisterSize == ARegisterSize.SIMD64)
{
EmitVectorZeroUpper(Context, Op.Rd);
}
}
else
{
EmitShrImmOp(Context, ShrImmFlags.VectorZx);
}
} }
public static void Usra_S(AILEmitterCtx Context) public static void Usra_S(AILEmitterCtx Context)
@ -249,7 +341,32 @@ namespace ChocolArm64.Instruction
public static void Usra_V(AILEmitterCtx Context) public static void Usra_V(AILEmitterCtx Context)
{ {
EmitVectorShrImmOpZx(Context, ShrImmFlags.Accumulate); AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
if (AOptimizations.UseSse2 && Op.Size > 0)
{
Type[] TypesSrl = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) };
Type[] TypesAdd = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], VectorUIntTypesPerSizeLog2[Op.Size] };
EmitLdvecWithUnsignedCast(Context, Op.Rd, Op.Size);
EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size);
Context.EmitLdc_I4(GetImmShr(Op));
Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), TypesSrl));
Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd));
EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size);
if (Op.RegisterSize == ARegisterSize.SIMD64)
{
EmitVectorZeroUpper(Context, Op.Rd);
}
}
else
{
EmitVectorShrImmOpZx(Context, ShrImmFlags.Accumulate);
}
} }
private static void EmitVectorShl(AILEmitterCtx Context, bool Signed) private static void EmitVectorShl(AILEmitterCtx Context, bool Signed)

View file

@ -386,7 +386,7 @@ namespace ChocolArm64.Instruction
#endregion #endregion
#region "Count" #region "Count"
public static ulong CountLeadingSigns(ulong Value, int Size) public static ulong CountLeadingSigns(ulong Value, int Size) // Size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.).
{ {
Value ^= Value >> 1; Value ^= Value >> 1;
@ -405,9 +405,9 @@ namespace ChocolArm64.Instruction
private static readonly byte[] ClzNibbleTbl = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 }; private static readonly byte[] ClzNibbleTbl = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
public static ulong CountLeadingZeros(ulong Value, int Size) public static ulong CountLeadingZeros(ulong Value, int Size) // Size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.).
{ {
if (Value == 0) if (Value == 0ul)
{ {
return (ulong)Size; return (ulong)Size;
} }
@ -426,12 +426,17 @@ namespace ChocolArm64.Instruction
return (ulong)Count; return (ulong)Count;
} }
public static uint CountSetBits8(uint Value) public static ulong CountSetBits8(ulong Value) // "Size" is 8 (SIMD&FP Inst.).
{ {
Value = ((Value >> 1) & 0x55) + (Value & 0x55); if (Value == 0xfful)
Value = ((Value >> 2) & 0x33) + (Value & 0x33); {
return 8ul;
}
return (Value >> 4) + (Value & 0x0f); Value = ((Value >> 1) & 0x55ul) + (Value & 0x55ul);
Value = ((Value >> 2) & 0x33ul) + (Value & 0x33ul);
return (Value >> 4) + (Value & 0x0ful);
} }
#endregion #endregion

View file

@ -365,8 +365,8 @@ namespace ChocolArm64.Instruction
{ {
Debug.WriteIf(State.Fpcr != 0, "ASoftFloat_32.FPMaxNum: "); Debug.WriteIf(State.Fpcr != 0, "ASoftFloat_32.FPMaxNum: ");
Value1.FPUnpack(out FPType Type1, out bool Sign1, out uint Op1); Value1.FPUnpack(out FPType Type1, out _, out _);
Value2.FPUnpack(out FPType Type2, out bool Sign2, out uint Op2); Value2.FPUnpack(out FPType Type2, out _, out _);
if (Type1 == FPType.QNaN && Type2 != FPType.QNaN) if (Type1 == FPType.QNaN && Type2 != FPType.QNaN)
{ {
@ -430,8 +430,8 @@ namespace ChocolArm64.Instruction
{ {
Debug.WriteIf(State.Fpcr != 0, "ASoftFloat_32.FPMinNum: "); Debug.WriteIf(State.Fpcr != 0, "ASoftFloat_32.FPMinNum: ");
Value1.FPUnpack(out FPType Type1, out bool Sign1, out uint Op1); Value1.FPUnpack(out FPType Type1, out _, out _);
Value2.FPUnpack(out FPType Type2, out bool Sign2, out uint Op2); Value2.FPUnpack(out FPType Type2, out _, out _);
if (Type1 == FPType.QNaN && Type2 != FPType.QNaN) if (Type1 == FPType.QNaN && Type2 != FPType.QNaN)
{ {
@ -1091,8 +1091,8 @@ namespace ChocolArm64.Instruction
{ {
Debug.WriteIf(State.Fpcr != 0, "ASoftFloat_64.FPMaxNum: "); Debug.WriteIf(State.Fpcr != 0, "ASoftFloat_64.FPMaxNum: ");
Value1.FPUnpack(out FPType Type1, out bool Sign1, out ulong Op1); Value1.FPUnpack(out FPType Type1, out _, out _);
Value2.FPUnpack(out FPType Type2, out bool Sign2, out ulong Op2); Value2.FPUnpack(out FPType Type2, out _, out _);
if (Type1 == FPType.QNaN && Type2 != FPType.QNaN) if (Type1 == FPType.QNaN && Type2 != FPType.QNaN)
{ {
@ -1156,8 +1156,8 @@ namespace ChocolArm64.Instruction
{ {
Debug.WriteIf(State.Fpcr != 0, "ASoftFloat_64.FPMinNum: "); Debug.WriteIf(State.Fpcr != 0, "ASoftFloat_64.FPMinNum: ");
Value1.FPUnpack(out FPType Type1, out bool Sign1, out ulong Op1); Value1.FPUnpack(out FPType Type1, out _, out _);
Value2.FPUnpack(out FPType Type2, out bool Sign2, out ulong Op2); Value2.FPUnpack(out FPType Type2, out _, out _);
if (Type1 == FPType.QNaN && Type2 != FPType.QNaN) if (Type1 == FPType.QNaN && Type2 != FPType.QNaN)
{ {