Add intrinsics support (#121)

* Initial intrinsics support

* Update tests to work with the new Vector128 type and intrinsics

* Drop SSE4.1 requirement

* Fix copy-paste mistake
This commit is contained in:
gdkchan 2018-05-11 20:10:27 -03:00 committed by GitHub
parent 428360c5ac
commit 1aa96453ef
14 changed files with 1233 additions and 713 deletions

View file

@ -1,6 +1,12 @@
using System.Runtime.Intrinsics.X86;
public static class AOptimizations
{
public static bool DisableMemoryChecks = false;
public static bool GenerateCallStack = true;
public static bool UseSse2IfAvailable = true;
internal static bool UseSse2 = UseSse2IfAvailable && Sse2.IsSupported;
}

View file

@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netcoreapp2.0</TargetFramework>
<TargetFramework>netcoreapp2.1</TargetFramework>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
@ -12,4 +12,8 @@
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="System.Runtime.Intrinsics.Experimental" Version="4.5.0-rc1" />
</ItemGroup>
</Project>

View file

@ -4,6 +4,7 @@ using ChocolArm64.Translation;
using System;
using System.Reflection;
using System.Reflection.Emit;
using System.Runtime.Intrinsics.X86;
using static ChocolArm64.Instruction.AInstEmitSimdHelper;
@ -40,9 +41,16 @@ namespace ChocolArm64.Instruction
}
public static void Add_V(AILEmitterCtx Context)
{
if (AOptimizations.UseSse2)
{
EmitSse2Call(Context, nameof(Sse2.Add));
}
else
{
EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Add));
}
}
public static void Addhn_V(AILEmitterCtx Context)
{
@ -158,7 +166,7 @@ namespace ChocolArm64.Instruction
Context.Emit(OpCodes.Conv_U1);
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountSetBits8));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.CountSetBits8));
Context.Emit(OpCodes.Conv_U8);
@ -302,14 +310,28 @@ namespace ChocolArm64.Instruction
}
public static void Fadd_S(AILEmitterCtx Context)
{
if (AOptimizations.UseSse2)
{
EmitSse2CallF(Context, nameof(Sse2.AddScalar));
}
else
{
EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Add));
}
}
public static void Fadd_V(AILEmitterCtx Context)
{
if (AOptimizations.UseSse2)
{
EmitSse2CallF(Context, nameof(Sse2.Add));
}
else
{
EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Add));
}
}
public static void Faddp_V(AILEmitterCtx Context)
{
@ -344,14 +366,28 @@ namespace ChocolArm64.Instruction
}
public static void Fdiv_S(AILEmitterCtx Context)
{
if (AOptimizations.UseSse2)
{
EmitSse2CallF(Context, nameof(Sse2.DivideScalar));
}
else
{
EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Div));
}
}
public static void Fdiv_V(AILEmitterCtx Context)
{
if (AOptimizations.UseSse2)
{
EmitSse2CallF(Context, nameof(Sse2.Divide));
}
else
{
EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Div));
}
}
public static void Fmadd_S(AILEmitterCtx Context)
{
@ -370,11 +406,11 @@ namespace ChocolArm64.Instruction
{
if (Op.Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.MaxF));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.MaxF));
}
else if (Op.Size == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Max));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Max));
}
else
{
@ -391,11 +427,11 @@ namespace ChocolArm64.Instruction
{
if (Op.Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.MaxF));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.MaxF));
}
else if (Op.Size == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Max));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Max));
}
else
{
@ -412,11 +448,11 @@ namespace ChocolArm64.Instruction
{
if (Op.Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.MinF));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.MinF));
}
else if (Op.Size == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Min));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Min));
}
else
{
@ -435,11 +471,11 @@ namespace ChocolArm64.Instruction
{
if (SizeF == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.MinF));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.MinF));
}
else if (SizeF == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Min));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Min));
}
else
{
@ -504,9 +540,16 @@ namespace ChocolArm64.Instruction
}
public static void Fmul_S(AILEmitterCtx Context)
{
if (AOptimizations.UseSse2)
{
EmitSse2CallF(Context, nameof(Sse2.MultiplyScalar));
}
else
{
EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Mul));
}
}
public static void Fmul_Se(AILEmitterCtx Context)
{
@ -514,9 +557,16 @@ namespace ChocolArm64.Instruction
}
public static void Fmul_V(AILEmitterCtx Context)
{
if (AOptimizations.UseSse2)
{
EmitSse2CallF(Context, nameof(Sse2.Multiply));
}
else
{
EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Mul));
}
}
public static void Fmul_Ve(AILEmitterCtx Context)
{
@ -716,11 +766,11 @@ namespace ChocolArm64.Instruction
if (Op.Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.RoundF));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.RoundF));
}
else if (Op.Size == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Round));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Round));
}
else
{
@ -743,11 +793,11 @@ namespace ChocolArm64.Instruction
if (SizeF == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.RoundF));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.RoundF));
}
else if (SizeF == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Round));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Round));
}
else
{
@ -819,11 +869,11 @@ namespace ChocolArm64.Instruction
if (Op.Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.RoundF));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.RoundF));
}
else if (Op.Size == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Round));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Round));
}
else
{
@ -844,11 +894,11 @@ namespace ChocolArm64.Instruction
if (Op.Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.RoundF));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.RoundF));
}
else if (Op.Size == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Round));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Round));
}
else
{
@ -946,14 +996,28 @@ namespace ChocolArm64.Instruction
}
public static void Fsub_S(AILEmitterCtx Context)
{
if (AOptimizations.UseSse2)
{
EmitSse2CallF(Context, nameof(Sse2.SubtractScalar));
}
else
{
EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Sub));
}
}
public static void Fsub_V(AILEmitterCtx Context)
{
if (AOptimizations.UseSse2)
{
EmitSse2CallF(Context, nameof(Sse2.Subtract));
}
else
{
EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Sub));
}
}
public static void Mla_V(AILEmitterCtx Context)
{
@ -1065,9 +1129,16 @@ namespace ChocolArm64.Instruction
}
public static void Sub_V(AILEmitterCtx Context)
{
if (AOptimizations.UseSse2)
{
EmitSse2Call(Context, nameof(Sse2.Subtract));
}
else
{
EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Sub));
}
}
public static void Subhn_V(AILEmitterCtx Context)
{

View file

@ -3,6 +3,7 @@ using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Reflection.Emit;
using System.Runtime.Intrinsics.X86;
using static ChocolArm64.Instruction.AInstEmitAluHelper;
using static ChocolArm64.Instruction.AInstEmitSimdHelper;
@ -12,19 +13,40 @@ namespace ChocolArm64.Instruction
static partial class AInstEmit
{
public static void Cmeq_V(AILEmitterCtx Context)
{
if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
{
EmitSse2Call(Context, nameof(Sse2.CompareEqual));
}
else
{
EmitVectorCmp(Context, OpCodes.Beq_S);
}
}
public static void Cmge_V(AILEmitterCtx Context)
{
if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
{
EmitSse2Call(Context, nameof(Sse2.CompareGreaterThanOrEqual));
}
else
{
EmitVectorCmp(Context, OpCodes.Bge_S);
}
}
public static void Cmgt_V(AILEmitterCtx Context)
{
if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
{
EmitSse2Call(Context, nameof(Sse2.CompareGreaterThan));
}
else
{
EmitVectorCmp(Context, OpCodes.Bgt_S);
}
}
public static void Cmhi_V(AILEmitterCtx Context)
{
@ -111,34 +133,76 @@ namespace ChocolArm64.Instruction
}
public static void Fcmeq_S(AILEmitterCtx Context)
{
if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
{
EmitSse2CallF(Context, nameof(Sse2.CompareEqualScalar));
}
else
{
EmitScalarFcmp(Context, OpCodes.Beq_S);
}
}
public static void Fcmeq_V(AILEmitterCtx Context)
{
if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
{
EmitSse2CallF(Context, nameof(Sse2.CompareEqual));
}
else
{
EmitVectorFcmp(Context, OpCodes.Beq_S);
}
}
public static void Fcmge_S(AILEmitterCtx Context)
{
if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
{
EmitSse2CallF(Context, nameof(Sse2.CompareGreaterThanOrEqualScalar));
}
else
{
EmitScalarFcmp(Context, OpCodes.Bge_S);
}
}
public static void Fcmge_V(AILEmitterCtx Context)
{
if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
{
EmitSse2CallF(Context, nameof(Sse2.CompareGreaterThanOrEqual));
}
else
{
EmitVectorFcmp(Context, OpCodes.Bge_S);
}
}
public static void Fcmgt_S(AILEmitterCtx Context)
{
if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
{
EmitSse2CallF(Context, nameof(Sse2.CompareGreaterThanScalar));
}
else
{
EmitScalarFcmp(Context, OpCodes.Bgt_S);
}
}
public static void Fcmgt_V(AILEmitterCtx Context)
{
if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
{
EmitSse2CallF(Context, nameof(Sse2.CompareGreaterThan));
}
else
{
EmitVectorFcmp(Context, OpCodes.Bgt_S);
}
}
public static void Fcmle_S(AILEmitterCtx Context)
{

View file

@ -382,15 +382,15 @@ namespace ChocolArm64.Instruction
if (SizeF == 0)
{
ASoftFallback.EmitCall(Context, Signed
? nameof(ASoftFallback.SatF32ToS32)
: nameof(ASoftFallback.SatF32ToU32));
AVectorHelper.EmitCall(Context, Signed
? nameof(AVectorHelper.SatF32ToS32)
: nameof(AVectorHelper.SatF32ToU32));
}
else /* if (SizeF == 1) */
{
ASoftFallback.EmitCall(Context, Signed
? nameof(ASoftFallback.SatF64ToS64)
: nameof(ASoftFallback.SatF64ToU64));
AVectorHelper.EmitCall(Context, Signed
? nameof(AVectorHelper.SatF64ToS64)
: nameof(AVectorHelper.SatF64ToU64));
}
if (SizeF == 0)
@ -420,22 +420,22 @@ namespace ChocolArm64.Instruction
{
if (Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF32ToS32));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF32ToS32));
}
else /* if (Size == 1) */
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF64ToS32));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF64ToS32));
}
}
else
{
if (Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF32ToS64));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF32ToS64));
}
else /* if (Size == 1) */
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF64ToS64));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF64ToS64));
}
}
}
@ -453,22 +453,22 @@ namespace ChocolArm64.Instruction
{
if (Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF32ToU32));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF32ToU32));
}
else /* if (Size == 1) */
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF64ToU32));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF64ToU32));
}
}
else
{
if (Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF32ToU64));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF32ToU64));
}
else /* if (Size == 1) */
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF64ToU64));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF64ToU64));
}
}
}

View file

@ -3,6 +3,8 @@ using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Reflection;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace ChocolArm64.Instruction
{
@ -32,6 +34,129 @@ namespace ChocolArm64.Instruction
return (8 << (Op.Size + 1)) - Op.Imm;
}
public static void EmitSse2Call(AILEmitterCtx Context, string Name)
{
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
int SizeF = Op.Size & 1;
void Ldvec(int Reg)
{
Context.EmitLdvec(Reg);
switch (Op.Size)
{
case 0: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToSByte)); break;
case 1: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToInt16)); break;
case 2: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToInt32)); break;
case 3: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToInt64)); break;
}
}
Ldvec(Op.Rn);
Type BaseType = null;
Type[] Types;
switch (Op.Size)
{
case 0: BaseType = typeof(Vector128<sbyte>); break;
case 1: BaseType = typeof(Vector128<short>); break;
case 2: BaseType = typeof(Vector128<int>); break;
case 3: BaseType = typeof(Vector128<long>); break;
}
if (Op is AOpCodeSimdReg BinOp)
{
Ldvec(BinOp.Rm);
Types = new Type[] { BaseType, BaseType };
}
else
{
Types = new Type[] { BaseType };
}
Context.EmitCall(typeof(Sse2).GetMethod(Name, Types));
switch (Op.Size)
{
case 0: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSByteToSingle)); break;
case 1: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInt16ToSingle)); break;
case 2: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInt32ToSingle)); break;
case 3: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInt64ToSingle)); break;
}
Context.EmitStvec(Op.Rd);
if (Op.RegisterSize == ARegisterSize.SIMD64)
{
EmitVectorZeroUpper(Context, Op.Rd);
}
}
public static void EmitSse2CallF(AILEmitterCtx Context, string Name)
{
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
int SizeF = Op.Size & 1;
void Ldvec(int Reg)
{
Context.EmitLdvec(Reg);
if (SizeF == 1)
{
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToDouble));
}
}
Ldvec(Op.Rn);
Type BaseType = SizeF == 0
? typeof(Vector128<float>)
: typeof(Vector128<double>);
Type[] Types;
if (Op is AOpCodeSimdReg BinOp)
{
Ldvec(BinOp.Rm);
Types = new Type[] { BaseType, BaseType };
}
else
{
Types = new Type[] { BaseType };
}
MethodInfo MthdInfo;
if (SizeF == 0)
{
MthdInfo = typeof(Sse).GetMethod(Name, Types);
}
else /* if (SizeF == 1) */
{
MthdInfo = typeof(Sse2).GetMethod(Name, Types);
}
Context.EmitCall(MthdInfo);
if (SizeF == 1)
{
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorDoubleToSingle));
}
Context.EmitStvec(Op.Rd);
if (Op.RegisterSize == ARegisterSize.SIMD64)
{
EmitVectorZeroUpper(Context, Op.Rd);
}
}
public static void EmitUnaryMathCall(AILEmitterCtx Context, string Name)
{
IAOpCodeSimd Op = (IAOpCodeSimd)Context.CurrOp;
@ -596,9 +721,9 @@ namespace ChocolArm64.Instruction
Context.EmitLdc_I4(Index);
Context.EmitLdc_I4(Size);
ASoftFallback.EmitCall(Context, Signed
? nameof(ASoftFallback.VectorExtractIntSx)
: nameof(ASoftFallback.VectorExtractIntZx));
AVectorHelper.EmitCall(Context, Signed
? nameof(AVectorHelper.VectorExtractIntSx)
: nameof(AVectorHelper.VectorExtractIntZx));
}
public static void EmitVectorExtractF(AILEmitterCtx Context, int Reg, int Index, int Size)
@ -610,11 +735,11 @@ namespace ChocolArm64.Instruction
if (Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorExtractSingle));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorExtractSingle));
}
else if (Size == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorExtractDouble));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorExtractDouble));
}
else
{
@ -646,7 +771,7 @@ namespace ChocolArm64.Instruction
Context.EmitLdc_I4(Index);
Context.EmitLdc_I4(Size);
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertInt));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertInt));
Context.EmitStvec(Reg);
}
@ -659,7 +784,7 @@ namespace ChocolArm64.Instruction
Context.EmitLdc_I4(Index);
Context.EmitLdc_I4(Size);
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertInt));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertInt));
Context.EmitStvectmp();
}
@ -673,7 +798,7 @@ namespace ChocolArm64.Instruction
Context.EmitLdc_I4(Index);
Context.EmitLdc_I4(Size);
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertInt));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertInt));
Context.EmitStvec(Reg);
}
@ -687,11 +812,11 @@ namespace ChocolArm64.Instruction
if (Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertSingle));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertSingle));
}
else if (Size == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertDouble));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertDouble));
}
else
{
@ -710,11 +835,11 @@ namespace ChocolArm64.Instruction
if (Size == 0)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertSingle));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertSingle));
}
else if (Size == 1)
{
ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertDouble));
AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertDouble));
}
else
{

View file

@ -2,6 +2,7 @@ using ChocolArm64.Decoder;
using ChocolArm64.State;
using ChocolArm64.Translation;
using System.Reflection.Emit;
using System.Runtime.Intrinsics.X86;
using static ChocolArm64.Instruction.AInstEmitSimdHelper;
@ -10,9 +11,16 @@ namespace ChocolArm64.Instruction
static partial class AInstEmit
{
public static void And_V(AILEmitterCtx Context)
{
if (AOptimizations.UseSse2)
{
EmitSse2Call(Context, nameof(Sse2.And));
}
else
{
EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.And));
}
}
public static void Bic_V(AILEmitterCtx Context)
{
@ -94,9 +102,16 @@ namespace ChocolArm64.Instruction
}
public static void Eor_V(AILEmitterCtx Context)
{
if (AOptimizations.UseSse2)
{
EmitSse2Call(Context, nameof(Sse2.Xor));
}
else
{
EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Xor));
}
}
public static void Not_V(AILEmitterCtx Context)
{
@ -113,9 +128,16 @@ namespace ChocolArm64.Instruction
}
public static void Orr_V(AILEmitterCtx Context)
{
if (AOptimizations.UseSse2)
{
EmitSse2Call(Context, nameof(Sse2.Or));
}
else
{
EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Or));
}
}
public static void Orr_Vi(AILEmitterCtx Context)
{

View file

@ -234,21 +234,21 @@ namespace ChocolArm64.Instruction
switch (Op.Size)
{
case 1: ASoftFallback.EmitCall(Context,
nameof(ASoftFallback.Tbl1_V64),
nameof(ASoftFallback.Tbl1_V128)); break;
case 1: AVectorHelper.EmitCall(Context,
nameof(AVectorHelper.Tbl1_V64),
nameof(AVectorHelper.Tbl1_V128)); break;
case 2: ASoftFallback.EmitCall(Context,
nameof(ASoftFallback.Tbl2_V64),
nameof(ASoftFallback.Tbl2_V128)); break;
case 2: AVectorHelper.EmitCall(Context,
nameof(AVectorHelper.Tbl2_V64),
nameof(AVectorHelper.Tbl2_V128)); break;
case 3: ASoftFallback.EmitCall(Context,
nameof(ASoftFallback.Tbl3_V64),
nameof(ASoftFallback.Tbl3_V128)); break;
case 3: AVectorHelper.EmitCall(Context,
nameof(AVectorHelper.Tbl3_V64),
nameof(AVectorHelper.Tbl3_V128)); break;
case 4: ASoftFallback.EmitCall(Context,
nameof(ASoftFallback.Tbl4_V64),
nameof(ASoftFallback.Tbl4_V128)); break;
case 4: AVectorHelper.EmitCall(Context,
nameof(AVectorHelper.Tbl4_V64),
nameof(AVectorHelper.Tbl4_V128)); break;
default: throw new InvalidOperationException();
}

View file

@ -1,20 +1,11 @@
using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Numerics;
using System.Runtime.CompilerServices;
namespace ChocolArm64.Instruction
{
static class ASoftFallback
{
public static void EmitCall(AILEmitterCtx Context, string Name64, string Name128)
{
bool IsSimd64 = Context.CurrOp.RegisterSize == ARegisterSize.SIMD64;
Context.EmitCall(typeof(ASoftFallback), IsSimd64 ? Name64 : Name128);
}
public static void EmitCall(AILEmitterCtx Context, string MthdName)
{
Context.EmitCall(typeof(ASoftFallback), MthdName);
@ -160,78 +151,6 @@ namespace ChocolArm64.Instruction
throw new ArgumentException(nameof(Size));
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int SatF32ToS32(float Value)
{
if (float.IsNaN(Value)) return 0;
return Value > int.MaxValue ? int.MaxValue :
Value < int.MinValue ? int.MinValue : (int)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long SatF32ToS64(float Value)
{
if (float.IsNaN(Value)) return 0;
return Value > long.MaxValue ? long.MaxValue :
Value < long.MinValue ? long.MinValue : (long)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint SatF32ToU32(float Value)
{
if (float.IsNaN(Value)) return 0;
return Value > uint.MaxValue ? uint.MaxValue :
Value < uint.MinValue ? uint.MinValue : (uint)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong SatF32ToU64(float Value)
{
if (float.IsNaN(Value)) return 0;
return Value > ulong.MaxValue ? ulong.MaxValue :
Value < ulong.MinValue ? ulong.MinValue : (ulong)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int SatF64ToS32(double Value)
{
if (double.IsNaN(Value)) return 0;
return Value > int.MaxValue ? int.MaxValue :
Value < int.MinValue ? int.MinValue : (int)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long SatF64ToS64(double Value)
{
if (double.IsNaN(Value)) return 0;
return Value > long.MaxValue ? long.MaxValue :
Value < long.MinValue ? long.MinValue : (long)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint SatF64ToU32(double Value)
{
if (double.IsNaN(Value)) return 0;
return Value > uint.MaxValue ? uint.MaxValue :
Value < uint.MinValue ? uint.MinValue : (uint)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong SatF64ToU64(double Value)
{
if (double.IsNaN(Value)) return 0;
return Value > ulong.MaxValue ? ulong.MaxValue :
Value < ulong.MinValue ? ulong.MinValue : (ulong)Value;
}
public static long SMulHi128(long LHS, long RHS)
{
return (long)(BigInteger.Multiply(LHS, RHS) >> 64);
@ -241,239 +160,5 @@ namespace ChocolArm64.Instruction
{
return (ulong)(BigInteger.Multiply(LHS, RHS) >> 64);
}
public static int CountSetBits8(byte Value)
{
return ((Value >> 0) & 1) + ((Value >> 1) & 1) +
((Value >> 2) & 1) + ((Value >> 3) & 1) +
((Value >> 4) & 1) + ((Value >> 5) & 1) +
((Value >> 6) & 1) + (Value >> 7);
}
public static float MaxF(float val1, float val2)
{
if (val1 == 0.0 && val2 == 0.0)
{
if (BitConverter.SingleToInt32Bits(val1) < 0 && BitConverter.SingleToInt32Bits(val2) < 0)
return -0.0f;
return 0.0f;
}
if (val1 > val2)
return val1;
if (float.IsNaN(val1))
return val1;
return val2;
}
public static double Max(double val1, double val2)
{
if (val1 == 0.0 && val2 == 0.0)
{
if (BitConverter.DoubleToInt64Bits(val1) < 0 && BitConverter.DoubleToInt64Bits(val2) < 0)
return -0.0;
return 0.0;
}
if (val1 > val2)
return val1;
if (double.IsNaN(val1))
return val1;
return val2;
}
public static float MinF(float val1, float val2)
{
if (val1 == 0.0 && val2 == 0.0)
{
if (BitConverter.SingleToInt32Bits(val1) < 0 || BitConverter.SingleToInt32Bits(val2) < 0)
return -0.0f;
return 0.0f;
}
if (val1 < val2)
return val1;
if (float.IsNaN(val1))
return val1;
return val2;
}
public static double Min(double val1, double val2)
{
if (val1 == 0.0 && val2 == 0.0)
{
if (BitConverter.DoubleToInt64Bits(val1) < 0 || BitConverter.DoubleToInt64Bits(val2) < 0)
return -0.0;
return 0.0;
}
if (val1 < val2)
return val1;
if (double.IsNaN(val1))
return val1;
return val2;
}
public static float RoundF(float Value, int Fpcr)
{
switch ((ARoundMode)((Fpcr >> 22) & 3))
{
case ARoundMode.ToNearest: return MathF.Round (Value);
case ARoundMode.TowardsPlusInfinity: return MathF.Ceiling (Value);
case ARoundMode.TowardsMinusInfinity: return MathF.Floor (Value);
case ARoundMode.TowardsZero: return MathF.Truncate(Value);
}
throw new InvalidOperationException();
}
public static double Round(double Value, int Fpcr)
{
switch ((ARoundMode)((Fpcr >> 22) & 3))
{
case ARoundMode.ToNearest: return Math.Round (Value);
case ARoundMode.TowardsPlusInfinity: return Math.Ceiling (Value);
case ARoundMode.TowardsMinusInfinity: return Math.Floor (Value);
case ARoundMode.TowardsZero: return Math.Truncate(Value);
}
throw new InvalidOperationException();
}
public static AVec Tbl1_V64(AVec Vector, AVec Tb0)
{
return Tbl(Vector, 8, Tb0);
}
public static AVec Tbl1_V128(AVec Vector, AVec Tb0)
{
return Tbl(Vector, 16, Tb0);
}
public static AVec Tbl2_V64(AVec Vector, AVec Tb0, AVec Tb1)
{
return Tbl(Vector, 8, Tb0, Tb1);
}
public static AVec Tbl2_V128(AVec Vector, AVec Tb0, AVec Tb1)
{
return Tbl(Vector, 16, Tb0, Tb1);
}
public static AVec Tbl3_V64(AVec Vector, AVec Tb0, AVec Tb1, AVec Tb2)
{
return Tbl(Vector, 8, Tb0, Tb1, Tb2);
}
public static AVec Tbl3_V128(AVec Vector, AVec Tb0, AVec Tb1, AVec Tb2)
{
return Tbl(Vector, 16, Tb0, Tb1, Tb2);
}
public static AVec Tbl4_V64(AVec Vector, AVec Tb0, AVec Tb1, AVec Tb2, AVec Tb3)
{
return Tbl(Vector, 8, Tb0, Tb1, Tb2, Tb3);
}
public static AVec Tbl4_V128(AVec Vector, AVec Tb0, AVec Tb1, AVec Tb2, AVec Tb3)
{
return Tbl(Vector, 16, Tb0, Tb1, Tb2, Tb3);
}
private static AVec Tbl(AVec Vector, int Bytes, params AVec[] Tb)
{
AVec Res = new AVec();
byte[] Table = new byte[Tb.Length * 16];
for (int Index = 0; Index < Tb.Length; Index++)
for (int Index2 = 0; Index2 < 16; Index2++)
{
Table[Index * 16 + Index2] = (byte)VectorExtractIntZx(Tb[Index], Index2, 0);
}
for (int Index = 0; Index < Bytes; Index++)
{
byte TblIdx = (byte)VectorExtractIntZx(Vector, Index, 0);
if (TblIdx < Table.Length)
{
Res = VectorInsertInt(Table[TblIdx], Res, Index, 0);
}
}
return Res;
}
public static ulong VectorExtractIntZx(AVec Vector, int Index, int Size)
{
switch (Size)
{
case 0: return Vector.ExtractByte (Index);
case 1: return Vector.ExtractUInt16(Index);
case 2: return Vector.ExtractUInt32(Index);
case 3: return Vector.ExtractUInt64(Index);
}
throw new ArgumentOutOfRangeException(nameof(Size));
}
public static long VectorExtractIntSx(AVec Vector, int Index, int Size)
{
switch (Size)
{
case 0: return (sbyte)Vector.ExtractByte (Index);
case 1: return (short)Vector.ExtractUInt16(Index);
case 2: return (int)Vector.ExtractUInt32(Index);
case 3: return (long)Vector.ExtractUInt64(Index);
}
throw new ArgumentOutOfRangeException(nameof(Size));
}
public static float VectorExtractSingle(AVec Vector, int Index)
{
return Vector.ExtractSingle(Index);
}
public static double VectorExtractDouble(AVec Vector, int Index)
{
return Vector.ExtractDouble(Index);
}
public static AVec VectorInsertSingle(float Value, AVec Vector, int Index)
{
return AVec.InsertSingle(Vector, Index, Value);
}
public static AVec VectorInsertDouble(double Value, AVec Vector, int Index)
{
return AVec.InsertDouble(Vector, Index, Value);
}
public static AVec VectorInsertInt(ulong Value, AVec Vector, int Index, int Size)
{
switch (Size)
{
case 0: return AVec.InsertByte (Vector, Index, (byte)Value);
case 1: return AVec.InsertUInt16(Vector, Index, (ushort)Value);
case 2: return AVec.InsertUInt32(Vector, Index, (uint)Value);
case 3: return AVec.InsertUInt64(Vector, Index, (ulong)Value);
}
throw new ArgumentOutOfRangeException(nameof(Size));
}
}
}

View file

@ -0,0 +1,626 @@
using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace ChocolArm64.Instruction
{
static class AVectorHelper
{
public static void EmitCall(AILEmitterCtx Context, string Name64, string Name128)
{
bool IsSimd64 = Context.CurrOp.RegisterSize == ARegisterSize.SIMD64;
Context.EmitCall(typeof(AVectorHelper), IsSimd64 ? Name64 : Name128);
}
public static void EmitCall(AILEmitterCtx Context, string MthdName)
{
Context.EmitCall(typeof(AVectorHelper), MthdName);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int SatF32ToS32(float Value)
{
if (float.IsNaN(Value)) return 0;
return Value > int.MaxValue ? int.MaxValue :
Value < int.MinValue ? int.MinValue : (int)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long SatF32ToS64(float Value)
{
if (float.IsNaN(Value)) return 0;
return Value > long.MaxValue ? long.MaxValue :
Value < long.MinValue ? long.MinValue : (long)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint SatF32ToU32(float Value)
{
if (float.IsNaN(Value)) return 0;
return Value > uint.MaxValue ? uint.MaxValue :
Value < uint.MinValue ? uint.MinValue : (uint)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong SatF32ToU64(float Value)
{
if (float.IsNaN(Value)) return 0;
return Value > ulong.MaxValue ? ulong.MaxValue :
Value < ulong.MinValue ? ulong.MinValue : (ulong)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int SatF64ToS32(double Value)
{
if (double.IsNaN(Value)) return 0;
return Value > int.MaxValue ? int.MaxValue :
Value < int.MinValue ? int.MinValue : (int)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long SatF64ToS64(double Value)
{
if (double.IsNaN(Value)) return 0;
return Value > long.MaxValue ? long.MaxValue :
Value < long.MinValue ? long.MinValue : (long)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint SatF64ToU32(double Value)
{
if (double.IsNaN(Value)) return 0;
return Value > uint.MaxValue ? uint.MaxValue :
Value < uint.MinValue ? uint.MinValue : (uint)Value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong SatF64ToU64(double Value)
{
if (double.IsNaN(Value)) return 0;
return Value > ulong.MaxValue ? ulong.MaxValue :
Value < ulong.MinValue ? ulong.MinValue : (ulong)Value;
}
public static int CountSetBits8(byte Value)
{
return ((Value >> 0) & 1) + ((Value >> 1) & 1) +
((Value >> 2) & 1) + ((Value >> 3) & 1) +
((Value >> 4) & 1) + ((Value >> 5) & 1) +
((Value >> 6) & 1) + (Value >> 7);
}
public static double Max(double LHS, double RHS)
{
if (LHS == 0.0 && RHS == 0.0)
{
if (BitConverter.DoubleToInt64Bits(LHS) < 0 &&
BitConverter.DoubleToInt64Bits(RHS) < 0)
return -0.0;
return 0.0;
}
if (LHS > RHS)
return LHS;
if (double.IsNaN(LHS))
return LHS;
return RHS;
}
public static float MaxF(float LHS, float RHS)
{
if (LHS == 0.0 && RHS == 0.0)
{
if (BitConverter.SingleToInt32Bits(LHS) < 0 &&
BitConverter.SingleToInt32Bits(RHS) < 0)
return -0.0f;
return 0.0f;
}
if (LHS > RHS)
return LHS;
if (float.IsNaN(LHS))
return LHS;
return RHS;
}
public static double Min(double LHS, double RHS)
{
if (LHS == 0.0 && RHS == 0.0)
{
if (BitConverter.DoubleToInt64Bits(LHS) < 0 ||
BitConverter.DoubleToInt64Bits(RHS) < 0)
return -0.0;
return 0.0;
}
if (LHS < RHS)
return LHS;
if (double.IsNaN(LHS))
return LHS;
return RHS;
}
public static float MinF(float LHS, float RHS)
{
if (LHS == 0.0 && RHS == 0.0)
{
if (BitConverter.SingleToInt32Bits(LHS) < 0 ||
BitConverter.SingleToInt32Bits(RHS) < 0)
return -0.0f;
return 0.0f;
}
if (LHS < RHS)
return LHS;
if (float.IsNaN(LHS))
return LHS;
return RHS;
}
public static double Round(double Value, int Fpcr)
{
switch ((ARoundMode)((Fpcr >> 22) & 3))
{
case ARoundMode.ToNearest: return Math.Round (Value);
case ARoundMode.TowardsPlusInfinity: return Math.Ceiling (Value);
case ARoundMode.TowardsMinusInfinity: return Math.Floor (Value);
case ARoundMode.TowardsZero: return Math.Truncate(Value);
}
throw new InvalidOperationException();
}
public static float RoundF(float Value, int Fpcr)
{
switch ((ARoundMode)((Fpcr >> 22) & 3))
{
case ARoundMode.ToNearest: return MathF.Round (Value);
case ARoundMode.TowardsPlusInfinity: return MathF.Ceiling (Value);
case ARoundMode.TowardsMinusInfinity: return MathF.Floor (Value);
case ARoundMode.TowardsZero: return MathF.Truncate(Value);
}
throw new InvalidOperationException();
}
public static Vector128<float> Tbl1_V64(
Vector128<float> Vector,
Vector128<float> Tb0)
{
return Tbl(Vector, 8, Tb0);
}
public static Vector128<float> Tbl1_V128(
Vector128<float> Vector,
Vector128<float> Tb0)
{
return Tbl(Vector, 16, Tb0);
}
public static Vector128<float> Tbl2_V64(
Vector128<float> Vector,
Vector128<float> Tb0,
Vector128<float> Tb1)
{
return Tbl(Vector, 8, Tb0, Tb1);
}
public static Vector128<float> Tbl2_V128(
Vector128<float> Vector,
Vector128<float> Tb0,
Vector128<float> Tb1)
{
return Tbl(Vector, 16, Tb0, Tb1);
}
public static Vector128<float> Tbl3_V64(
Vector128<float> Vector,
Vector128<float> Tb0,
Vector128<float> Tb1,
Vector128<float> Tb2)
{
return Tbl(Vector, 8, Tb0, Tb1, Tb2);
}
public static Vector128<float> Tbl3_V128(
Vector128<float> Vector,
Vector128<float> Tb0,
Vector128<float> Tb1,
Vector128<float> Tb2)
{
return Tbl(Vector, 16, Tb0, Tb1, Tb2);
}
public static Vector128<float> Tbl4_V64(
Vector128<float> Vector,
Vector128<float> Tb0,
Vector128<float> Tb1,
Vector128<float> Tb2,
Vector128<float> Tb3)
{
return Tbl(Vector, 8, Tb0, Tb1, Tb2, Tb3);
}
public static Vector128<float> Tbl4_V128(
Vector128<float> Vector,
Vector128<float> Tb0,
Vector128<float> Tb1,
Vector128<float> Tb2,
Vector128<float> Tb3)
{
return Tbl(Vector, 16, Tb0, Tb1, Tb2, Tb3);
}
private static Vector128<float> Tbl(Vector128<float> Vector, int Bytes, params Vector128<float>[] Tb)
{
Vector128<float> Res = new Vector128<float>();
byte[] Table = new byte[Tb.Length * 16];
for (byte Index = 0; Index < Tb.Length; Index++)
for (byte Index2 = 0; Index2 < 16; Index2++)
{
Table[Index * 16 + Index2] = (byte)VectorExtractIntZx(Tb[Index], Index2, 0);
}
for (byte Index = 0; Index < Bytes; Index++)
{
byte TblIdx = (byte)VectorExtractIntZx(Vector, Index, 0);
if (TblIdx < Table.Length)
{
Res = VectorInsertInt(Table[TblIdx], Res, Index, 0);
}
}
return Res;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double VectorExtractDouble(Vector128<float> Vector, byte Index)
{
return BitConverter.Int64BitsToDouble(VectorExtractIntSx(Vector, Index, 3));
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long VectorExtractIntSx(Vector128<float> Vector, byte Index, int Size)
{
if (Sse41.IsSupported)
{
switch (Size)
{
case 0:
return (sbyte)Sse41.Extract(Sse.StaticCast<float, byte>(Vector), Index);
case 1:
return (short)Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), Index);
case 2:
return Sse41.Extract(Sse.StaticCast<float, int>(Vector), Index);
case 3:
return Sse41.Extract(Sse.StaticCast<float, long>(Vector), Index);
}
throw new ArgumentOutOfRangeException(nameof(Size));
}
else if (Sse2.IsSupported)
{
switch (Size)
{
case 0:
return (sbyte)VectorExtractIntZx(Vector, Index, Size);
case 1:
return (short)VectorExtractIntZx(Vector, Index, Size);
case 2:
return (int)VectorExtractIntZx(Vector, Index, Size);
case 3:
return (long)VectorExtractIntZx(Vector, Index, Size);
}
throw new ArgumentOutOfRangeException(nameof(Size));
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong VectorExtractIntZx(Vector128<float> Vector, byte Index, int Size)
{
if (Sse41.IsSupported)
{
switch (Size)
{
case 0:
return Sse41.Extract(Sse.StaticCast<float, byte>(Vector), Index);
case 1:
return Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), Index);
case 2:
return Sse41.Extract(Sse.StaticCast<float, uint>(Vector), Index);
case 3:
return Sse41.Extract(Sse.StaticCast<float, ulong>(Vector), Index);
}
throw new ArgumentOutOfRangeException(nameof(Size));
}
else if (Sse2.IsSupported)
{
int ShortIdx = Size == 0
? Index >> 1
: Index << (Size - 1);
ushort Value = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)ShortIdx);
switch (Size)
{
case 0:
return (byte)(Value >> (Index & 1) * 8);
case 1:
return Value;
case 2:
case 3:
{
ushort Value1 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 1));
if (Size == 2)
{
return (uint)(Value | (Value1 << 16));
}
ushort Value2 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 2));
ushort Value3 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 3));
return ((ulong)Value << 0) |
((ulong)Value1 << 16) |
((ulong)Value2 << 32) |
((ulong)Value3 << 48);
}
}
throw new ArgumentOutOfRangeException(nameof(Size));
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float VectorExtractSingle(Vector128<float> Vector, byte Index)
{
if (Sse41.IsSupported)
{
return Sse41.Extract(Vector, Index);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorInsertDouble(double Value, Vector128<float> Vector, byte Index)
{
return VectorInsertInt((ulong)BitConverter.DoubleToInt64Bits(Value), Vector, Index, 3);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorInsertInt(ulong Value, Vector128<float> Vector, byte Index, int Size)
{
if (Sse41.IsSupported)
{
switch (Size)
{
case 0:
return Sse.StaticCast<byte, float>(Sse41.Insert(Sse.StaticCast<float, byte>(Vector), (byte)Value, Index));
case 1:
return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(Vector), (ushort)Value, Index));
case 2:
return Sse.StaticCast<uint, float>(Sse41.Insert(Sse.StaticCast<float, uint>(Vector), (uint)Value, Index));
case 3:
return Sse.StaticCast<ulong, float>(Sse41.Insert(Sse.StaticCast<float, ulong>(Vector), Value, Index));
}
throw new ArgumentOutOfRangeException(nameof(Size));
}
else if (Sse2.IsSupported)
{
Vector128<ushort> ShortVector = Sse.StaticCast<float, ushort>(Vector);
int ShortIdx = Size == 0
? Index >> 1
: Index << (Size - 1);
switch (Size)
{
case 0:
{
ushort ShortVal = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)ShortIdx);
int Shift = (Index & 1) * 8;
ShortVal &= (ushort)(0xff00 >> Shift);
ShortVal |= (ushort)((byte)Value << Shift);
return Sse.StaticCast<ushort, float>(Sse2.Insert(ShortVector, ShortVal, (byte)ShortIdx));
}
case 1:
return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(Vector), (ushort)Value, Index));
case 2:
case 3:
{
ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 0), (byte)(ShortIdx + 0));
ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 16), (byte)(ShortIdx + 1));
if (Size == 3)
{
ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 32), (byte)(ShortIdx + 2));
ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 48), (byte)(ShortIdx + 3));
}
return Sse.StaticCast<ushort, float>(ShortVector);
}
}
throw new ArgumentOutOfRangeException(nameof(Size));
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorInsertSingle(float Value, Vector128<float> Vector, byte Index)
{
if (Sse41.IsSupported)
{
return Sse41.Insert(Vector, Value, (byte)(Index << 4));
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<sbyte> VectorSingleToSByte(Vector128<float> Vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, sbyte>(Vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<short> VectorSingleToInt16(Vector128<float> Vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, short>(Vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<int> VectorSingleToInt32(Vector128<float> Vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, int>(Vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<long> VectorSingleToInt64(Vector128<float> Vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, long>(Vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<double> VectorSingleToDouble(Vector128<float> Vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<float, double>(Vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorSByteToSingle(Vector128<sbyte> Vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<sbyte, float>(Vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorInt16ToSingle(Vector128<short> Vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<short, float>(Vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorInt32ToSingle(Vector128<int> Vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<int, float>(Vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorInt64ToSingle(Vector128<long> Vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<long, float>(Vector);
}
throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorDoubleToSingle(Vector128<double> Vector)
{
if (Sse.IsSupported)
{
return Sse.StaticCast<double, float>(Vector);
}
throw new PlatformNotSupportedException();
}
}
}

View file

@ -3,6 +3,8 @@ using ChocolArm64.State;
using System;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace ChocolArm64.Memory
{
@ -189,33 +191,73 @@ namespace ChocolArm64.Memory
return ReadUInt64Unchecked(Position);
}
public AVec ReadVector8(long Position)
public Vector128<float> ReadVector8(long Position)
{
return new AVec() { B0 = ReadByte(Position) };
if (Sse2.IsSupported)
{
return Sse.StaticCast<byte, float>(Sse2.SetVector128(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ReadByte(Position)));
}
else
{
throw new PlatformNotSupportedException();
}
}
public AVec ReadVector16(long Position)
public Vector128<float> ReadVector16(long Position)
{
return new AVec() { H0 = ReadUInt16(Position) };
if (Sse2.IsSupported)
{
return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse2.SetZeroVector128<ushort>(), ReadUInt16(Position), 0));
}
else
{
throw new PlatformNotSupportedException();
}
}
public AVec ReadVector32(long Position)
public Vector128<float> ReadVector32(long Position)
{
return new AVec() { W0 = ReadUInt32(Position) };
EnsureAccessIsValid(Position + 0, AMemoryPerm.Read);
EnsureAccessIsValid(Position + 3, AMemoryPerm.Read);
if (Sse.IsSupported)
{
return Sse.LoadScalarVector128((float*)(RamPtr + (uint)Position));
}
else
{
throw new PlatformNotSupportedException();
}
}
public AVec ReadVector64(long Position)
public Vector128<float> ReadVector64(long Position)
{
return new AVec() { X0 = ReadUInt64(Position) };
EnsureAccessIsValid(Position + 0, AMemoryPerm.Read);
EnsureAccessIsValid(Position + 7, AMemoryPerm.Read);
if (Sse2.IsSupported)
{
return Sse.StaticCast<double, float>(Sse2.LoadScalarVector128((double*)(RamPtr + (uint)Position)));
}
else
{
throw new PlatformNotSupportedException();
}
}
public AVec ReadVector128(long Position)
public Vector128<float> ReadVector128(long Position)
{
return new AVec()
EnsureAccessIsValid(Position + 0, AMemoryPerm.Read);
EnsureAccessIsValid(Position + 15, AMemoryPerm.Read);
if (Sse.IsSupported)
{
X0 = ReadUInt64(Position + 0),
X1 = ReadUInt64(Position + 8)
};
return Sse.LoadVector128((float*)(RamPtr + (uint)Position));
}
else
{
throw new PlatformNotSupportedException();
}
}
public sbyte ReadSByteUnchecked(long Position)
@ -258,33 +300,64 @@ namespace ChocolArm64.Memory
return *((ulong*)(RamPtr + (uint)Position));
}
public AVec ReadVector8Unchecked(long Position)
public Vector128<float> ReadVector8Unchecked(long Position)
{
return new AVec() { B0 = ReadByteUnchecked(Position) };
if (Sse2.IsSupported)
{
return Sse.StaticCast<byte, float>(Sse2.SetVector128(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ReadByte(Position)));
}
else
{
throw new PlatformNotSupportedException();
}
}
public AVec ReadVector16Unchecked(long Position)
public Vector128<float> ReadVector16Unchecked(long Position)
{
return new AVec() { H0 = ReadUInt16Unchecked(Position) };
if (Sse2.IsSupported)
{
return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse2.SetZeroVector128<ushort>(), ReadUInt16Unchecked(Position), 0));
}
else
{
throw new PlatformNotSupportedException();
}
}
public AVec ReadVector32Unchecked(long Position)
public Vector128<float> ReadVector32Unchecked(long Position)
{
return new AVec() { W0 = ReadUInt32Unchecked(Position) };
if (Sse.IsSupported)
{
return Sse.LoadScalarVector128((float*)(RamPtr + (uint)Position));
}
else
{
throw new PlatformNotSupportedException();
}
}
public AVec ReadVector64Unchecked(long Position)
public Vector128<float> ReadVector64Unchecked(long Position)
{
return new AVec() { X0 = ReadUInt64Unchecked(Position) };
if (Sse2.IsSupported)
{
return Sse.StaticCast<double, float>(Sse2.LoadScalarVector128((double*)(RamPtr + (uint)Position)));
}
else
{
throw new PlatformNotSupportedException();
}
}
public AVec ReadVector128Unchecked(long Position)
public Vector128<float> ReadVector128Unchecked(long Position)
{
return new AVec()
if (Sse.IsSupported)
{
X0 = ReadUInt64Unchecked(Position + 0),
X1 = ReadUInt64Unchecked(Position + 8)
};
return Sse.LoadVector128((float*)(RamPtr + (uint)Position));
}
else
{
throw new PlatformNotSupportedException();
}
}
public void WriteSByte(long Position, sbyte Value)
@ -338,30 +411,77 @@ namespace ChocolArm64.Memory
WriteUInt64Unchecked(Position, Value);
}
public void WriteVector8(long Position, AVec Value)
public void WriteVector8(long Position, Vector128<float> Value)
{
WriteByte(Position, Value.B0);
if (Sse41.IsSupported)
{
WriteByte(Position, Sse41.Extract(Sse.StaticCast<float, byte>(Value), 0));
}
else if (Sse2.IsSupported)
{
WriteByte(Position, (byte)Sse2.Extract(Sse.StaticCast<float, ushort>(Value), 0));
}
else
{
throw new PlatformNotSupportedException();
}
}
public void WriteVector16(long Position, AVec Value)
public void WriteVector16(long Position, Vector128<float> Value)
{
WriteUInt16(Position, Value.H0);
if (Sse2.IsSupported)
{
WriteUInt16(Position, Sse2.Extract(Sse.StaticCast<float, ushort>(Value), 0));
}
else
{
throw new PlatformNotSupportedException();
}
}
public void WriteVector32(long Position, AVec Value)
public void WriteVector32(long Position, Vector128<float> Value)
{
WriteUInt32(Position, Value.W0);
EnsureAccessIsValid(Position + 0, AMemoryPerm.Write);
EnsureAccessIsValid(Position + 3, AMemoryPerm.Write);
if (Sse.IsSupported)
{
Sse.StoreScalar((float*)(RamPtr + (uint)Position), Value);
}
else
{
throw new PlatformNotSupportedException();
}
}
public void WriteVector64(long Position, AVec Value)
public void WriteVector64(long Position, Vector128<float> Value)
{
WriteUInt64(Position, Value.X0);
EnsureAccessIsValid(Position + 0, AMemoryPerm.Write);
EnsureAccessIsValid(Position + 7, AMemoryPerm.Write);
if (Sse2.IsSupported)
{
Sse2.StoreScalar((double*)(RamPtr + (uint)Position), Sse.StaticCast<float, double>(Value));
}
else
{
throw new PlatformNotSupportedException();
}
}
public void WriteVector128(long Position, AVec Value)
public void WriteVector128(long Position, Vector128<float> Value)
{
WriteUInt64(Position + 0, Value.X0);
WriteUInt64(Position + 8, Value.X1);
EnsureAccessIsValid(Position + 0, AMemoryPerm.Write);
EnsureAccessIsValid(Position + 15, AMemoryPerm.Write);
if (Sse.IsSupported)
{
Sse.Store((float*)(RamPtr + (uint)Position), Value);
}
else
{
throw new PlatformNotSupportedException();
}
}
public void WriteSByteUnchecked(long Position, sbyte Value)
@ -404,30 +524,68 @@ namespace ChocolArm64.Memory
*((ulong*)(RamPtr + (uint)Position)) = Value;
}
public void WriteVector8Unchecked(long Position, AVec Value)
public void WriteVector8Unchecked(long Position, Vector128<float> Value)
{
WriteByteUnchecked(Position, Value.B0);
if (Sse41.IsSupported)
{
WriteByteUnchecked(Position, Sse41.Extract(Sse.StaticCast<float, byte>(Value), 0));
}
else if (Sse2.IsSupported)
{
WriteByteUnchecked(Position, (byte)Sse2.Extract(Sse.StaticCast<float, ushort>(Value), 0));
}
else
{
throw new PlatformNotSupportedException();
}
}
public void WriteVector16Unchecked(long Position, AVec Value)
public void WriteVector16Unchecked(long Position, Vector128<float> Value)
{
WriteUInt16Unchecked(Position, Value.H0);
if (Sse2.IsSupported)
{
WriteUInt16Unchecked(Position, Sse2.Extract(Sse.StaticCast<float, ushort>(Value), 0));
}
else
{
throw new PlatformNotSupportedException();
}
}
public void WriteVector32Unchecked(long Position, AVec Value)
public void WriteVector32Unchecked(long Position, Vector128<float> Value)
{
WriteUInt32Unchecked(Position, Value.W0);
if (Sse.IsSupported)
{
Sse.StoreScalar((float*)(RamPtr + (uint)Position), Value);
}
else
{
throw new PlatformNotSupportedException();
}
}
public void WriteVector64Unchecked(long Position, AVec Value)
public void WriteVector64Unchecked(long Position, Vector128<float> Value)
{
WriteUInt64Unchecked(Position, Value.X0);
if (Sse2.IsSupported)
{
Sse2.StoreScalar((double*)(RamPtr + (uint)Position), Sse.StaticCast<float, double>(Value));
}
else
{
throw new PlatformNotSupportedException();
}
}
public void WriteVector128Unchecked(long Position, AVec Value)
public void WriteVector128Unchecked(long Position, Vector128<float> Value)
{
WriteUInt64Unchecked(Position + 0, Value.X0);
WriteUInt64Unchecked(Position + 8, Value.X1);
if (Sse.IsSupported)
{
Sse.Store((float*)(RamPtr + (uint)Position), Value);
}
else
{
throw new PlatformNotSupportedException();
}
}
private void EnsureAccessIsValid(long Position, AMemoryPerm Perm)

View file

@ -2,6 +2,7 @@ using ChocolArm64.Events;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Runtime.Intrinsics;
namespace ChocolArm64.State
{
@ -18,7 +19,7 @@ namespace ChocolArm64.State
X16, X17, X18, X19, X20, X21, X22, X23,
X24, X25, X26, X27, X28, X29, X30, X31;
public AVec V0, V1, V2, V3, V4, V5, V6, V7,
public Vector128<float> V0, V1, V2, V3, V4, V5, V6, V7,
V8, V9, V10, V11, V12, V13, V14, V15,
V16, V17, V18, V19, V20, V21, V22, V23,
V24, V25, V26, V27, V28, V29, V30, V31;

View file

@ -1,243 +0,0 @@
using System;
using System.Runtime.InteropServices;
namespace ChocolArm64.State
{
[StructLayout(LayoutKind.Explicit, Size = 16)]
public struct AVec
{
[FieldOffset(0x0)] public byte B0;
[FieldOffset(0x1)] public byte B1;
[FieldOffset(0x2)] public byte B2;
[FieldOffset(0x3)] public byte B3;
[FieldOffset(0x4)] public byte B4;
[FieldOffset(0x5)] public byte B5;
[FieldOffset(0x6)] public byte B6;
[FieldOffset(0x7)] public byte B7;
[FieldOffset(0x8)] public byte B8;
[FieldOffset(0x9)] public byte B9;
[FieldOffset(0xa)] public byte B10;
[FieldOffset(0xb)] public byte B11;
[FieldOffset(0xc)] public byte B12;
[FieldOffset(0xd)] public byte B13;
[FieldOffset(0xe)] public byte B14;
[FieldOffset(0xf)] public byte B15;
[FieldOffset(0x0)] public ushort H0;
[FieldOffset(0x2)] public ushort H1;
[FieldOffset(0x4)] public ushort H2;
[FieldOffset(0x6)] public ushort H3;
[FieldOffset(0x8)] public ushort H4;
[FieldOffset(0xa)] public ushort H5;
[FieldOffset(0xc)] public ushort H6;
[FieldOffset(0xe)] public ushort H7;
[FieldOffset(0x0)] public uint W0;
[FieldOffset(0x4)] public uint W1;
[FieldOffset(0x8)] public uint W2;
[FieldOffset(0xc)] public uint W3;
[FieldOffset(0x0)] public float S0;
[FieldOffset(0x4)] public float S1;
[FieldOffset(0x8)] public float S2;
[FieldOffset(0xc)] public float S3;
[FieldOffset(0x0)] public ulong X0;
[FieldOffset(0x8)] public ulong X1;
[FieldOffset(0x0)] public double D0;
[FieldOffset(0x8)] public double D1;
public byte ExtractByte(int Index)
{
switch (Index)
{
case 0: return B0;
case 1: return B1;
case 2: return B2;
case 3: return B3;
case 4: return B4;
case 5: return B5;
case 6: return B6;
case 7: return B7;
case 8: return B8;
case 9: return B9;
case 10: return B10;
case 11: return B11;
case 12: return B12;
case 13: return B13;
case 14: return B14;
case 15: return B15;
}
throw new ArgumentOutOfRangeException(nameof(Index));
}
public ushort ExtractUInt16(int Index)
{
switch (Index)
{
case 0: return H0;
case 1: return H1;
case 2: return H2;
case 3: return H3;
case 4: return H4;
case 5: return H5;
case 6: return H6;
case 7: return H7;
}
throw new ArgumentOutOfRangeException(nameof(Index));
}
public uint ExtractUInt32(int Index)
{
switch (Index)
{
case 0: return W0;
case 1: return W1;
case 2: return W2;
case 3: return W3;
}
throw new ArgumentOutOfRangeException(nameof(Index));
}
public float ExtractSingle(int Index)
{
switch (Index)
{
case 0: return S0;
case 1: return S1;
case 2: return S2;
case 3: return S3;
}
throw new ArgumentOutOfRangeException(nameof(Index));
}
public ulong ExtractUInt64(int Index)
{
switch (Index)
{
case 0: return X0;
case 1: return X1;
}
throw new ArgumentOutOfRangeException(nameof(Index));
}
public double ExtractDouble(int Index)
{
switch (Index)
{
case 0: return D0;
case 1: return D1;
}
throw new ArgumentOutOfRangeException(nameof(Index));
}
public static AVec InsertByte(AVec Vec, int Index, byte Value)
{
switch (Index)
{
case 0: Vec.B0 = Value; break;
case 1: Vec.B1 = Value; break;
case 2: Vec.B2 = Value; break;
case 3: Vec.B3 = Value; break;
case 4: Vec.B4 = Value; break;
case 5: Vec.B5 = Value; break;
case 6: Vec.B6 = Value; break;
case 7: Vec.B7 = Value; break;
case 8: Vec.B8 = Value; break;
case 9: Vec.B9 = Value; break;
case 10: Vec.B10 = Value; break;
case 11: Vec.B11 = Value; break;
case 12: Vec.B12 = Value; break;
case 13: Vec.B13 = Value; break;
case 14: Vec.B14 = Value; break;
case 15: Vec.B15 = Value; break;
default: throw new ArgumentOutOfRangeException(nameof(Index));
}
return Vec;
}
public static AVec InsertUInt16(AVec Vec, int Index, ushort Value)
{
switch (Index)
{
case 0: Vec.H0 = Value; break;
case 1: Vec.H1 = Value; break;
case 2: Vec.H2 = Value; break;
case 3: Vec.H3 = Value; break;
case 4: Vec.H4 = Value; break;
case 5: Vec.H5 = Value; break;
case 6: Vec.H6 = Value; break;
case 7: Vec.H7 = Value; break;
default: throw new ArgumentOutOfRangeException(nameof(Index));
}
return Vec;
}
public static AVec InsertUInt32(AVec Vec, int Index, uint Value)
{
switch (Index)
{
case 0: Vec.W0 = Value; break;
case 1: Vec.W1 = Value; break;
case 2: Vec.W2 = Value; break;
case 3: Vec.W3 = Value; break;
default: throw new ArgumentOutOfRangeException(nameof(Index));
}
return Vec;
}
public static AVec InsertSingle(AVec Vec, int Index, float Value)
{
switch (Index)
{
case 0: Vec.S0 = Value; break;
case 1: Vec.S1 = Value; break;
case 2: Vec.S2 = Value; break;
case 3: Vec.S3 = Value; break;
default: throw new ArgumentOutOfRangeException(nameof(Index));
}
return Vec;
}
public static AVec InsertUInt64(AVec Vec, int Index, ulong Value)
{
switch (Index)
{
case 0: Vec.X0 = Value; break;
case 1: Vec.X1 = Value; break;
default: throw new ArgumentOutOfRangeException(nameof(Index));
}
return Vec;
}
public static AVec InsertDouble(AVec Vec, int Index, double Value)
{
switch (Index)
{
case 0: Vec.D0 = Value; break;
case 1: Vec.D1 = Value; break;
default: throw new ArgumentOutOfRangeException(nameof(Index));
}
return Vec;
}
}
}

View file

@ -3,6 +3,7 @@ using ChocolArm64.State;
using System;
using System.Collections.Generic;
using System.Reflection.Emit;
using System.Runtime.Intrinsics;
namespace ChocolArm64.Translation
{
@ -157,7 +158,7 @@ namespace ChocolArm64.Translation
{
case ARegisterType.Flag: return typeof(bool);
case ARegisterType.Int: return typeof(ulong);
case ARegisterType.Vector: return typeof(AVec);
case ARegisterType.Vector: return typeof(Vector128<float>);
}
throw new ArgumentException(nameof(RegType));