Ryujinx-git/ChocolArm64/Instructions/InstEmitSimdHelper.cs
LDj3SNuD 16de171c44 Sse optimized the Scalar & Vector fp-to-fp conversion instructions (MNPZ & IX); added the related Tests (AMNPZ & IX). Small refactoring of existing instructions. (#676)
* Nit.

* Update InstEmitSimdCvt.cs

* Update VectorHelper.cs

* Update InstEmitSimdArithmetic.cs

* Update CpuTestSimd.cs

* Superseded.
2019-04-26 08:58:29 +10:00

1537 lines
48 KiB
C#

using ChocolArm64.Decoders;
using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Reflection;
using System.Reflection.Emit;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace ChocolArm64.Instructions
{
static class InstEmitSimdHelper
{
public static readonly Type[] IntTypesPerSizeLog2 = new Type[]
{
typeof(sbyte),
typeof(short),
typeof(int),
typeof(long)
};
public static readonly Type[] UIntTypesPerSizeLog2 = new Type[]
{
typeof(byte),
typeof(ushort),
typeof(uint),
typeof(ulong)
};
public static readonly Type[] VectorIntTypesPerSizeLog2 = new Type[]
{
typeof(Vector128<sbyte>),
typeof(Vector128<short>),
typeof(Vector128<int>),
typeof(Vector128<long>)
};
public static readonly Type[] VectorUIntTypesPerSizeLog2 = new Type[]
{
typeof(Vector128<byte>),
typeof(Vector128<ushort>),
typeof(Vector128<uint>),
typeof(Vector128<ulong>)
};
[Flags]
public enum OperFlags
{
Rd = 1 << 0,
Rn = 1 << 1,
Rm = 1 << 2,
Ra = 1 << 3,
RnRm = Rn | Rm,
RdRn = Rd | Rn,
RaRnRm = Ra | Rn | Rm,
RdRnRm = Rd | Rn | Rm
}
public static int GetImmShl(OpCodeSimdShImm64 op)
{
return op.Imm - (8 << op.Size);
}
public static int GetImmShr(OpCodeSimdShImm64 op)
{
return (8 << (op.Size + 1)) - op.Imm;
}
public static void EmitSse2Op(ILEmitterCtx context, string name)
{
EmitSseOp(context, name, typeof(Sse2));
}
public static void EmitSse41Op(ILEmitterCtx context, string name)
{
EmitSseOp(context, name, typeof(Sse41));
}
public static void EmitSse42Op(ILEmitterCtx context, string name)
{
EmitSseOp(context, name, typeof(Sse42));
}
private static void EmitSseOp(ILEmitterCtx context, string name, Type type)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
context.EmitLdvec(op.Rn);
Type baseType = VectorIntTypesPerSizeLog2[op.Size];
if (op is OpCodeSimdReg64 binOp)
{
context.EmitLdvec(binOp.Rm);
context.EmitCall(type.GetMethod(name, new Type[] { baseType, baseType }));
}
else
{
context.EmitCall(type.GetMethod(name, new Type[] { baseType }));
}
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
public static void EmitScalarSseOrSse2OpF(ILEmitterCtx context, string name)
{
EmitSseOrSse2OpF(context, name, true);
}
public static void EmitVectorSseOrSse2OpF(ILEmitterCtx context, string name)
{
EmitSseOrSse2OpF(context, name, false);
}
public static void EmitSseOrSse2OpF(ILEmitterCtx context, string name, bool scalar)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int sizeF = op.Size & 1;
context.EmitLdvec(op.Rn);
Type type;
Type baseType;
if (sizeF == 0)
{
type = typeof(Sse);
baseType = typeof(Vector128<float>);
}
else /* if (sizeF == 1) */
{
type = typeof(Sse2);
baseType = typeof(Vector128<double>);
}
if (op is OpCodeSimdReg64 binOp)
{
context.EmitLdvec(binOp.Rm);
context.EmitCall(type.GetMethod(name, new Type[] { baseType, baseType }));
}
else
{
context.EmitCall(type.GetMethod(name, new Type[] { baseType }));
}
context.EmitStvec(op.Rd);
if (scalar)
{
if (sizeF == 0)
{
EmitVectorZero32_128(context, op.Rd);
}
else /* if (sizeF == 1) */
{
EmitVectorZeroUpper(context, op.Rd);
}
}
else if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
public static void EmitUnaryMathCall(ILEmitterCtx context, string name)
{
IOpCodeSimd64 op = (IOpCodeSimd64)context.CurrOp;
int sizeF = op.Size & 1;
MethodInfo mthdInfo;
if (sizeF == 0)
{
mthdInfo = typeof(MathF).GetMethod(name, new Type[] { typeof(float) });
}
else /* if (sizeF == 1) */
{
mthdInfo = typeof(Math).GetMethod(name, new Type[] { typeof(double) });
}
context.EmitCall(mthdInfo);
}
public static void EmitBinaryMathCall(ILEmitterCtx context, string name)
{
IOpCodeSimd64 op = (IOpCodeSimd64)context.CurrOp;
int sizeF = op.Size & 1;
MethodInfo mthdInfo;
if (sizeF == 0)
{
mthdInfo = typeof(MathF).GetMethod(name, new Type[] { typeof(float), typeof(float) });
}
else /* if (sizeF == 1) */
{
mthdInfo = typeof(Math).GetMethod(name, new Type[] { typeof(double), typeof(double) });
}
context.EmitCall(mthdInfo);
}
public static void EmitRoundMathCall(ILEmitterCtx context, MidpointRounding roundMode)
{
IOpCodeSimd64 op = (IOpCodeSimd64)context.CurrOp;
int sizeF = op.Size & 1;
MethodInfo mthdInfo;
if (sizeF == 0)
{
mthdInfo = typeof(MathF).GetMethod(nameof(MathF.Round), new Type[] { typeof(float), typeof(MidpointRounding) });
}
else /* if (sizeF == 1) */
{
mthdInfo = typeof(Math).GetMethod(nameof(Math.Round), new Type[] { typeof(double), typeof(MidpointRounding) });
}
context.EmitLdc_I4((int)roundMode);
context.EmitCall(mthdInfo);
}
public static void EmitSoftFloatCall(ILEmitterCtx context, string name)
{
IOpCodeSimd64 op = (IOpCodeSimd64)context.CurrOp;
int sizeF = op.Size & 1;
Type type = sizeF == 0
? typeof(SoftFloat32)
: typeof(SoftFloat64);
context.EmitLdarg(TranslatedSub.StateArgIdx);
context.EmitCall(type, name);
}
public static void EmitScalarBinaryOpByElemF(ILEmitterCtx context, Action emit)
{
OpCodeSimdRegElemF64 op = (OpCodeSimdRegElemF64)context.CurrOp;
EmitScalarOpByElemF(context, emit, op.Index, ternary: false);
}
public static void EmitScalarTernaryOpByElemF(ILEmitterCtx context, Action emit)
{
OpCodeSimdRegElemF64 op = (OpCodeSimdRegElemF64)context.CurrOp;
EmitScalarOpByElemF(context, emit, op.Index, ternary: true);
}
public static void EmitScalarOpByElemF(ILEmitterCtx context, Action emit, int elem, bool ternary)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
int sizeF = op.Size & 1;
if (ternary)
{
EmitVectorExtractF(context, op.Rd, 0, sizeF);
}
EmitVectorExtractF(context, op.Rn, 0, sizeF);
EmitVectorExtractF(context, op.Rm, elem, sizeF);
emit();
EmitScalarSetF(context, op.Rd, sizeF);
}
public static void EmitScalarUnaryOpSx(ILEmitterCtx context, Action emit)
{
EmitScalarOp(context, emit, OperFlags.Rn, true);
}
public static void EmitScalarBinaryOpSx(ILEmitterCtx context, Action emit)
{
EmitScalarOp(context, emit, OperFlags.RnRm, true);
}
public static void EmitScalarUnaryOpZx(ILEmitterCtx context, Action emit)
{
EmitScalarOp(context, emit, OperFlags.Rn, false);
}
public static void EmitScalarBinaryOpZx(ILEmitterCtx context, Action emit)
{
EmitScalarOp(context, emit, OperFlags.RnRm, false);
}
public static void EmitScalarTernaryOpZx(ILEmitterCtx context, Action emit)
{
EmitScalarOp(context, emit, OperFlags.RdRnRm, false);
}
public static void EmitScalarOp(ILEmitterCtx context, Action emit, OperFlags opers, bool signed)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
bool rd = (opers & OperFlags.Rd) != 0;
bool rn = (opers & OperFlags.Rn) != 0;
bool rm = (opers & OperFlags.Rm) != 0;
if (rd)
{
EmitVectorExtract(context, op.Rd, 0, op.Size, signed);
}
if (rn)
{
EmitVectorExtract(context, op.Rn, 0, op.Size, signed);
}
if (rm)
{
EmitVectorExtract(context, ((OpCodeSimdReg64)op).Rm, 0, op.Size, signed);
}
emit();
EmitScalarSet(context, op.Rd, op.Size);
}
public static void EmitScalarUnaryOpF(ILEmitterCtx context, Action emit)
{
EmitScalarOpF(context, emit, OperFlags.Rn);
}
public static void EmitScalarBinaryOpF(ILEmitterCtx context, Action emit)
{
EmitScalarOpF(context, emit, OperFlags.RnRm);
}
public static void EmitScalarTernaryRaOpF(ILEmitterCtx context, Action emit)
{
EmitScalarOpF(context, emit, OperFlags.RaRnRm);
}
public static void EmitScalarOpF(ILEmitterCtx context, Action emit, OperFlags opers)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int sizeF = op.Size & 1;
bool ra = (opers & OperFlags.Ra) != 0;
bool rn = (opers & OperFlags.Rn) != 0;
bool rm = (opers & OperFlags.Rm) != 0;
if (ra)
{
EmitVectorExtractF(context, ((OpCodeSimdReg64)op).Ra, 0, sizeF);
}
if (rn)
{
EmitVectorExtractF(context, op.Rn, 0, sizeF);
}
if (rm)
{
EmitVectorExtractF(context, ((OpCodeSimdReg64)op).Rm, 0, sizeF);
}
emit();
EmitScalarSetF(context, op.Rd, sizeF);
}
public static void EmitVectorUnaryOpF(ILEmitterCtx context, Action emit)
{
EmitVectorOpF(context, emit, OperFlags.Rn);
}
public static void EmitVectorBinaryOpF(ILEmitterCtx context, Action emit)
{
EmitVectorOpF(context, emit, OperFlags.RnRm);
}
public static void EmitVectorTernaryOpF(ILEmitterCtx context, Action emit)
{
EmitVectorOpF(context, emit, OperFlags.RdRnRm);
}
public static void EmitVectorOpF(ILEmitterCtx context, Action emit, OperFlags opers)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int sizeF = op.Size & 1;
int bytes = op.GetBitsCount() >> 3;
int elems = bytes >> sizeF + 2;
bool rd = (opers & OperFlags.Rd) != 0;
bool rn = (opers & OperFlags.Rn) != 0;
bool rm = (opers & OperFlags.Rm) != 0;
for (int index = 0; index < elems; index++)
{
if (rd)
{
EmitVectorExtractF(context, op.Rd, index, sizeF);
}
if (rn)
{
EmitVectorExtractF(context, op.Rn, index, sizeF);
}
if (rm)
{
EmitVectorExtractF(context, ((OpCodeSimdReg64)op).Rm, index, sizeF);
}
emit();
EmitVectorInsertF(context, op.Rd, index, sizeF);
}
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
public static void EmitVectorBinaryOpByElemF(ILEmitterCtx context, Action emit)
{
OpCodeSimdRegElemF64 op = (OpCodeSimdRegElemF64)context.CurrOp;
EmitVectorOpByElemF(context, emit, op.Index, ternary: false);
}
public static void EmitVectorTernaryOpByElemF(ILEmitterCtx context, Action emit)
{
OpCodeSimdRegElemF64 op = (OpCodeSimdRegElemF64)context.CurrOp;
EmitVectorOpByElemF(context, emit, op.Index, ternary: true);
}
public static void EmitVectorOpByElemF(ILEmitterCtx context, Action emit, int elem, bool ternary)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
int sizeF = op.Size & 1;
int bytes = op.GetBitsCount() >> 3;
int elems = bytes >> sizeF + 2;
for (int index = 0; index < elems; index++)
{
if (ternary)
{
EmitVectorExtractF(context, op.Rd, index, sizeF);
}
EmitVectorExtractF(context, op.Rn, index, sizeF);
EmitVectorExtractF(context, op.Rm, elem, sizeF);
emit();
EmitVectorInsertTmpF(context, index, sizeF);
}
context.EmitLdvectmp();
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
public static void EmitVectorUnaryOpSx(ILEmitterCtx context, Action emit)
{
EmitVectorOp(context, emit, OperFlags.Rn, true);
}
public static void EmitVectorBinaryOpSx(ILEmitterCtx context, Action emit)
{
EmitVectorOp(context, emit, OperFlags.RnRm, true);
}
public static void EmitVectorTernaryOpSx(ILEmitterCtx context, Action emit)
{
EmitVectorOp(context, emit, OperFlags.RdRnRm, true);
}
public static void EmitVectorUnaryOpZx(ILEmitterCtx context, Action emit)
{
EmitVectorOp(context, emit, OperFlags.Rn, false);
}
public static void EmitVectorBinaryOpZx(ILEmitterCtx context, Action emit)
{
EmitVectorOp(context, emit, OperFlags.RnRm, false);
}
public static void EmitVectorTernaryOpZx(ILEmitterCtx context, Action emit)
{
EmitVectorOp(context, emit, OperFlags.RdRnRm, false);
}
public static void EmitVectorOp(ILEmitterCtx context, Action emit, OperFlags opers, bool signed)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int bytes = op.GetBitsCount() >> 3;
int elems = bytes >> op.Size;
bool rd = (opers & OperFlags.Rd) != 0;
bool rn = (opers & OperFlags.Rn) != 0;
bool rm = (opers & OperFlags.Rm) != 0;
for (int index = 0; index < elems; index++)
{
if (rd)
{
EmitVectorExtract(context, op.Rd, index, op.Size, signed);
}
if (rn)
{
EmitVectorExtract(context, op.Rn, index, op.Size, signed);
}
if (rm)
{
EmitVectorExtract(context, ((OpCodeSimdReg64)op).Rm, index, op.Size, signed);
}
emit();
EmitVectorInsert(context, op.Rd, index, op.Size);
}
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
public static void EmitVectorBinaryOpByElemSx(ILEmitterCtx context, Action emit)
{
OpCodeSimdRegElem64 op = (OpCodeSimdRegElem64)context.CurrOp;
EmitVectorOpByElem(context, emit, op.Index, ternary: false, signed: true);
}
public static void EmitVectorBinaryOpByElemZx(ILEmitterCtx context, Action emit)
{
OpCodeSimdRegElem64 op = (OpCodeSimdRegElem64)context.CurrOp;
EmitVectorOpByElem(context, emit, op.Index, ternary: false, signed: false);
}
public static void EmitVectorTernaryOpByElemZx(ILEmitterCtx context, Action emit)
{
OpCodeSimdRegElem64 op = (OpCodeSimdRegElem64)context.CurrOp;
EmitVectorOpByElem(context, emit, op.Index, ternary: true, signed: false);
}
public static void EmitVectorOpByElem(ILEmitterCtx context, Action emit, int elem, bool ternary, bool signed)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
int bytes = op.GetBitsCount() >> 3;
int elems = bytes >> op.Size;
EmitVectorExtract(context, op.Rm, elem, op.Size, signed);
context.EmitSttmp();
for (int index = 0; index < elems; index++)
{
if (ternary)
{
EmitVectorExtract(context, op.Rd, index, op.Size, signed);
}
EmitVectorExtract(context, op.Rn, index, op.Size, signed);
context.EmitLdtmp();
emit();
EmitVectorInsert(context, op.Rd, index, op.Size);
}
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
public static void EmitVectorImmUnaryOp(ILEmitterCtx context, Action emit)
{
EmitVectorImmOp(context, emit, false);
}
public static void EmitVectorImmBinaryOp(ILEmitterCtx context, Action emit)
{
EmitVectorImmOp(context, emit, true);
}
public static void EmitVectorImmOp(ILEmitterCtx context, Action emit, bool binary)
{
OpCodeSimdImm64 op = (OpCodeSimdImm64)context.CurrOp;
int bytes = op.GetBitsCount() >> 3;
int elems = bytes >> op.Size;
for (int index = 0; index < elems; index++)
{
if (binary)
{
EmitVectorExtractZx(context, op.Rd, index, op.Size);
}
context.EmitLdc_I8(op.Imm);
emit();
EmitVectorInsert(context, op.Rd, index, op.Size);
}
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
public static void EmitVectorWidenRmBinaryOpSx(ILEmitterCtx context, Action emit)
{
EmitVectorWidenRmBinaryOp(context, emit, true);
}
public static void EmitVectorWidenRmBinaryOpZx(ILEmitterCtx context, Action emit)
{
EmitVectorWidenRmBinaryOp(context, emit, false);
}
public static void EmitVectorWidenRmBinaryOp(ILEmitterCtx context, Action emit, bool signed)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
int elems = 8 >> op.Size;
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
for (int index = 0; index < elems; index++)
{
EmitVectorExtract(context, op.Rn, index, op.Size + 1, signed);
EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
emit();
EmitVectorInsertTmp(context, index, op.Size + 1);
}
context.EmitLdvectmp();
context.EmitStvec(op.Rd);
}
public static void EmitVectorWidenRnRmBinaryOpSx(ILEmitterCtx context, Action emit)
{
EmitVectorWidenRnRmOp(context, emit, false, true);
}
public static void EmitVectorWidenRnRmBinaryOpZx(ILEmitterCtx context, Action emit)
{
EmitVectorWidenRnRmOp(context, emit, false, false);
}
public static void EmitVectorWidenRnRmTernaryOpSx(ILEmitterCtx context, Action emit)
{
EmitVectorWidenRnRmOp(context, emit, true, true);
}
public static void EmitVectorWidenRnRmTernaryOpZx(ILEmitterCtx context, Action emit)
{
EmitVectorWidenRnRmOp(context, emit, true, false);
}
public static void EmitVectorWidenRnRmOp(ILEmitterCtx context, Action emit, bool ternary, bool signed)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
int elems = 8 >> op.Size;
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
for (int index = 0; index < elems; index++)
{
if (ternary)
{
EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed);
}
EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
EmitVectorExtract(context, op.Rm, part + index, op.Size, signed);
emit();
EmitVectorInsertTmp(context, index, op.Size + 1);
}
context.EmitLdvectmp();
context.EmitStvec(op.Rd);
}
public static void EmitVectorWidenBinaryOpByElemSx(ILEmitterCtx context, Action emit)
{
OpCodeSimdRegElem64 op = (OpCodeSimdRegElem64)context.CurrOp;
EmitVectorWidenOpByElem(context, emit, op.Index, ternary: false, signed: true);
}
public static void EmitVectorWidenBinaryOpByElemZx(ILEmitterCtx context, Action emit)
{
OpCodeSimdRegElem64 op = (OpCodeSimdRegElem64)context.CurrOp;
EmitVectorWidenOpByElem(context, emit, op.Index, ternary: false, signed: false);
}
public static void EmitVectorWidenTernaryOpByElemSx(ILEmitterCtx context, Action emit)
{
OpCodeSimdRegElem64 op = (OpCodeSimdRegElem64)context.CurrOp;
EmitVectorWidenOpByElem(context, emit, op.Index, ternary: true, signed: true);
}
public static void EmitVectorWidenTernaryOpByElemZx(ILEmitterCtx context, Action emit)
{
OpCodeSimdRegElem64 op = (OpCodeSimdRegElem64)context.CurrOp;
EmitVectorWidenOpByElem(context, emit, op.Index, ternary: true, signed: false);
}
public static void EmitVectorWidenOpByElem(ILEmitterCtx context, Action emit, int elem, bool ternary, bool signed)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
int elems = 8 >> op.Size;
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
EmitVectorExtract(context, op.Rm, elem, op.Size, signed);
context.EmitSttmp();
for (int index = 0; index < elems; index++)
{
if (ternary)
{
EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed);
}
EmitVectorExtract(context, op.Rn, part + index, op.Size, signed);
context.EmitLdtmp();
emit();
EmitVectorInsertTmp(context, index, op.Size + 1);
}
context.EmitLdvectmp();
context.EmitStvec(op.Rd);
}
public static void EmitVectorPairwiseOpSx(ILEmitterCtx context, Action emit)
{
EmitVectorPairwiseOp(context, emit, true);
}
public static void EmitVectorPairwiseOpZx(ILEmitterCtx context, Action emit)
{
EmitVectorPairwiseOp(context, emit, false);
}
public static void EmitVectorPairwiseOp(ILEmitterCtx context, Action emit, bool signed)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
int words = op.GetBitsCount() >> 4;
int pairs = words >> op.Size;
for (int index = 0; index < pairs; index++)
{
int idx = index << 1;
EmitVectorExtract(context, op.Rn, idx, op.Size, signed);
EmitVectorExtract(context, op.Rn, idx + 1, op.Size, signed);
emit();
EmitVectorExtract(context, op.Rm, idx, op.Size, signed);
EmitVectorExtract(context, op.Rm, idx + 1, op.Size, signed);
emit();
EmitVectorInsertTmp(context, pairs + index, op.Size);
EmitVectorInsertTmp(context, index, op.Size);
}
context.EmitLdvectmp();
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
public static void EmitVectorPairwiseOpF(ILEmitterCtx context, Action emit)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
int sizeF = op.Size & 1;
int words = op.GetBitsCount() >> 4;
int pairs = words >> sizeF + 2;
for (int index = 0; index < pairs; index++)
{
int idx = index << 1;
EmitVectorExtractF(context, op.Rn, idx, sizeF);
EmitVectorExtractF(context, op.Rn, idx + 1, sizeF);
emit();
EmitVectorExtractF(context, op.Rm, idx, sizeF);
EmitVectorExtractF(context, op.Rm, idx + 1, sizeF);
emit();
EmitVectorInsertTmpF(context, pairs + index, sizeF);
EmitVectorInsertTmpF(context, index, sizeF);
}
context.EmitLdvectmp();
context.EmitStvec(op.Rd);
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
public static void EmitVectorPairwiseSseOrSse2OpF(ILEmitterCtx context, string name)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
int sizeF = op.Size & 1;
if (sizeF == 0)
{
if (op.RegisterSize == RegisterSize.Simd64)
{
Type[] types = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.UnpackLow), types));
context.EmitStvectmp();
context.EmitLdvectmp();
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh), types));
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
context.EmitLdvectmp();
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveHighToLow), types));
context.EmitCall(typeof(Sse).GetMethod(name, types));
context.EmitStvec(op.Rd);
}
else /* if (op.RegisterSize == RegisterSize.Simd128) */
{
Type[] typesSfl = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>), typeof(byte) };
Type[] types = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitLdc_I4(2 << 6 | 0 << 4 | 2 << 2 | 0 << 0);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitLdc_I4(3 << 6 | 1 << 4 | 3 << 2 | 1 << 0);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
context.EmitCall(typeof(Sse).GetMethod(name, types));
context.EmitStvec(op.Rd);
}
}
else /* if (sizeF == 1) */
{
Type[] types = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackLow), types));
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackHigh), types));
context.EmitCall(typeof(Sse2).GetMethod(name, types));
context.EmitStvec(op.Rd);
}
}
[Flags]
public enum SaturatingFlags
{
Scalar = 1 << 0,
Signed = 1 << 1,
Add = 1 << 2,
Sub = 1 << 3,
Accumulate = 1 << 4,
ScalarSx = Scalar | Signed,
ScalarZx = Scalar,
VectorSx = Signed,
VectorZx = 0
}
public static void EmitScalarSaturatingUnaryOpSx(ILEmitterCtx context, Action emit)
{
EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.ScalarSx);
}
public static void EmitVectorSaturatingUnaryOpSx(ILEmitterCtx context, Action emit)
{
EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.VectorSx);
}
public static void EmitSaturatingUnaryOpSx(ILEmitterCtx context, Action emit, SaturatingFlags flags)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
bool scalar = (flags & SaturatingFlags.Scalar) != 0;
int bytes = op.GetBitsCount() >> 3;
int elems = !scalar ? bytes >> op.Size : 1;
for (int index = 0; index < elems; index++)
{
EmitVectorExtractSx(context, op.Rn, index, op.Size);
emit();
if (op.Size <= 2)
{
EmitSatQ(context, op.Size, true, true);
}
else /* if (op.Size == 3) */
{
EmitUnarySignedSatQAbsOrNeg(context);
}
if (scalar)
{
EmitVectorZeroAll(context, op.Rd);
}
EmitVectorInsert(context, op.Rd, index, op.Size);
}
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
public static void EmitScalarSaturatingBinaryOpSx(ILEmitterCtx context, SaturatingFlags flags)
{
EmitSaturatingBinaryOp(context, () => { }, SaturatingFlags.ScalarSx | flags);
}
public static void EmitScalarSaturatingBinaryOpZx(ILEmitterCtx context, SaturatingFlags flags)
{
EmitSaturatingBinaryOp(context, () => { }, SaturatingFlags.ScalarZx | flags);
}
public static void EmitVectorSaturatingBinaryOpSx(ILEmitterCtx context, SaturatingFlags flags)
{
EmitSaturatingBinaryOp(context, () => { }, SaturatingFlags.VectorSx | flags);
}
public static void EmitVectorSaturatingBinaryOpZx(ILEmitterCtx context, SaturatingFlags flags)
{
EmitSaturatingBinaryOp(context, () => { }, SaturatingFlags.VectorZx | flags);
}
public static void EmitSaturatingBinaryOp(ILEmitterCtx context, Action emit, SaturatingFlags flags)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
bool scalar = (flags & SaturatingFlags.Scalar) != 0;
bool signed = (flags & SaturatingFlags.Signed) != 0;
bool add = (flags & SaturatingFlags.Add) != 0;
bool sub = (flags & SaturatingFlags.Sub) != 0;
bool accumulate = (flags & SaturatingFlags.Accumulate) != 0;
int bytes = op.GetBitsCount() >> 3;
int elems = !scalar ? bytes >> op.Size : 1;
if (add || sub)
{
for (int index = 0; index < elems; index++)
{
EmitVectorExtract(context, op.Rn, index, op.Size, signed);
EmitVectorExtract(context, ((OpCodeSimdReg64)op).Rm, index, op.Size, signed);
if (op.Size <= 2)
{
context.Emit(add ? OpCodes.Add : OpCodes.Sub);
EmitSatQ(context, op.Size, true, signed);
}
else /* if (op.Size == 3) */
{
if (add)
{
EmitBinarySatQAdd(context, signed);
}
else /* if (sub) */
{
EmitBinarySatQSub(context, signed);
}
}
if (scalar)
{
EmitVectorZeroAll(context, op.Rd);
}
EmitVectorInsert(context, op.Rd, index, op.Size);
}
}
else if (accumulate)
{
for (int index = 0; index < elems; index++)
{
EmitVectorExtract(context, op.Rn, index, op.Size, !signed);
EmitVectorExtract(context, op.Rd, index, op.Size, signed);
if (op.Size <= 2)
{
context.Emit(OpCodes.Add);
EmitSatQ(context, op.Size, true, signed);
}
else /* if (op.Size == 3) */
{
EmitBinarySatQAccumulate(context, signed);
}
if (scalar)
{
EmitVectorZeroAll(context, op.Rd);
}
EmitVectorInsert(context, op.Rd, index, op.Size);
}
}
else
{
for (int index = 0; index < elems; index++)
{
EmitVectorExtract(context, op.Rn, index, op.Size, signed);
EmitVectorExtract(context, ((OpCodeSimdReg64)op).Rm, index, op.Size, signed);
emit();
EmitSatQ(context, op.Size, true, signed);
if (scalar)
{
EmitVectorZeroAll(context, op.Rd);
}
EmitVectorInsert(context, op.Rd, index, op.Size);
}
}
if (op.RegisterSize == RegisterSize.Simd64)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
[Flags]
public enum SaturatingNarrowFlags
{
Scalar = 1 << 0,
SignedSrc = 1 << 1,
SignedDst = 1 << 2,
ScalarSxSx = Scalar | SignedSrc | SignedDst,
ScalarSxZx = Scalar | SignedSrc,
ScalarZxZx = Scalar,
VectorSxSx = SignedSrc | SignedDst,
VectorSxZx = SignedSrc,
VectorZxZx = 0
}
public static void EmitSaturatingNarrowOp(ILEmitterCtx context, SaturatingNarrowFlags flags)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
bool scalar = (flags & SaturatingNarrowFlags.Scalar) != 0;
bool signedSrc = (flags & SaturatingNarrowFlags.SignedSrc) != 0;
bool signedDst = (flags & SaturatingNarrowFlags.SignedDst) != 0;
int elems = !scalar ? 8 >> op.Size : 1;
int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0;
if (scalar)
{
EmitVectorZeroLowerTmp(context);
}
if (part != 0)
{
context.EmitLdvec(op.Rd);
context.EmitStvectmp();
}
for (int index = 0; index < elems; index++)
{
EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc);
EmitSatQ(context, op.Size, signedSrc, signedDst);
EmitVectorInsertTmp(context, part + index, op.Size);
}
context.EmitLdvectmp();
context.EmitStvec(op.Rd);
if (part == 0)
{
EmitVectorZeroUpper(context, op.Rd);
}
}
// TSrc (16bit, 32bit, 64bit; signed, unsigned) > TDst (8bit, 16bit, 32bit; signed, unsigned).
public static void EmitSatQ(ILEmitterCtx context, int sizeDst, bool signedSrc, bool signedDst)
{
if ((uint)sizeDst > 2u)
{
throw new ArgumentOutOfRangeException(nameof(sizeDst));
}
context.EmitLdc_I4(sizeDst);
context.EmitLdarg(TranslatedSub.StateArgIdx);
if (signedSrc)
{
SoftFallback.EmitCall(context, signedDst
? nameof(SoftFallback.SignedSrcSignedDstSatQ)
: nameof(SoftFallback.SignedSrcUnsignedDstSatQ));
}
else
{
SoftFallback.EmitCall(context, signedDst
? nameof(SoftFallback.UnsignedSrcSignedDstSatQ)
: nameof(SoftFallback.UnsignedSrcUnsignedDstSatQ));
}
}
// TSrc (64bit) == TDst (64bit); signed.
public static void EmitUnarySignedSatQAbsOrNeg(ILEmitterCtx context)
{
if (((OpCodeSimd64)context.CurrOp).Size < 3)
{
throw new InvalidOperationException();
}
context.EmitLdarg(TranslatedSub.StateArgIdx);
SoftFallback.EmitCall(context, nameof(SoftFallback.UnarySignedSatQAbsOrNeg));
}
// TSrcs (64bit) == TDst (64bit); signed, unsigned.
public static void EmitBinarySatQAdd(ILEmitterCtx context, bool signed)
{
if (((OpCodeSimdReg64)context.CurrOp).Size < 3)
{
throw new InvalidOperationException();
}
context.EmitLdarg(TranslatedSub.StateArgIdx);
SoftFallback.EmitCall(context, signed
? nameof(SoftFallback.BinarySignedSatQAdd)
: nameof(SoftFallback.BinaryUnsignedSatQAdd));
}
// TSrcs (64bit) == TDst (64bit); signed, unsigned.
public static void EmitBinarySatQSub(ILEmitterCtx context, bool signed)
{
if (((OpCodeSimdReg64)context.CurrOp).Size < 3)
{
throw new InvalidOperationException();
}
context.EmitLdarg(TranslatedSub.StateArgIdx);
SoftFallback.EmitCall(context, signed
? nameof(SoftFallback.BinarySignedSatQSub)
: nameof(SoftFallback.BinaryUnsignedSatQSub));
}
// TSrcs (64bit) == TDst (64bit); signed, unsigned.
public static void EmitBinarySatQAccumulate(ILEmitterCtx context, bool signed)
{
if (((OpCodeSimd64)context.CurrOp).Size < 3)
{
throw new InvalidOperationException();
}
context.EmitLdarg(TranslatedSub.StateArgIdx);
SoftFallback.EmitCall(context, signed
? nameof(SoftFallback.BinarySignedSatQAcc)
: nameof(SoftFallback.BinaryUnsignedSatQAcc));
}
public static void EmitScalarSet(ILEmitterCtx context, int reg, int size)
{
EmitVectorZeroAll(context, reg);
EmitVectorInsert(context, reg, 0, size);
}
public static void EmitScalarSetF(ILEmitterCtx context, int reg, int size)
{
if (Optimizations.UseSse41 && size == 0)
{
//If the type is float, we can perform insertion and
//zero the upper bits with a single instruction (INSERTPS);
context.EmitLdvec(reg);
VectorHelper.EmitCall(context, nameof(VectorHelper.Sse41VectorInsertScalarSingle));
context.EmitStvec(reg);
}
else
{
EmitVectorZeroAll(context, reg);
EmitVectorInsertF(context, reg, 0, size);
}
}
public static void EmitVectorExtractSx(ILEmitterCtx context, int reg, int index, int size)
{
EmitVectorExtract(context, reg, index, size, true);
}
public static void EmitVectorExtractZx(ILEmitterCtx context, int reg, int index, int size)
{
EmitVectorExtract(context, reg, index, size, false);
}
public static void EmitVectorExtract(ILEmitterCtx context, int reg, int index, int size, bool signed)
{
ThrowIfInvalid(index, size);
context.EmitLdvec(reg);
context.EmitLdc_I4(index);
context.EmitLdc_I4(size);
VectorHelper.EmitCall(context, signed
? nameof(VectorHelper.VectorExtractIntSx)
: nameof(VectorHelper.VectorExtractIntZx));
}
public static void EmitVectorExtractF(ILEmitterCtx context, int reg, int index, int size)
{
ThrowIfInvalidF(index, size);
context.EmitLdvec(reg);
context.EmitLdc_I4(index);
if (size == 0)
{
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorExtractSingle));
}
else if (size == 1)
{
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorExtractDouble));
}
else
{
throw new ArgumentOutOfRangeException(nameof(size));
}
}
public static void EmitVectorZeroAll(ILEmitterCtx context, int reg)
{
if (Optimizations.UseSse)
{
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
context.EmitStvec(reg);
}
else
{
EmitVectorZeroLower(context, reg);
EmitVectorZeroUpper(context, reg);
}
}
public static void EmitVectorZeroLower(ILEmitterCtx context, int reg)
{
EmitVectorInsert(context, reg, 0, 3, 0);
}
public static void EmitVectorZeroLowerTmp(ILEmitterCtx context)
{
if (Optimizations.UseSse)
{
context.EmitLdvectmp();
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveHighToLow)));
context.EmitStvectmp();
}
else
{
EmitVectorInsertTmp(context, 0, 3, 0);
}
}
public static void EmitVectorZeroUpper(ILEmitterCtx context, int reg)
{
if (Optimizations.UseSse)
{
// TODO: Use Sse2.MoveScalar once it is fixed (in .NET Core 3.0),
// as of the time of writing it just crashes the JIT.
/*Type[] typesMov = new Type[] { typeof(Vector128<ulong>) };
context.EmitLdvec(reg);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MoveScalar), typesMov));
context.EmitStvec(reg);*/
context.EmitLdvec(reg);
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh)));
context.EmitStvec(reg);
}
else
{
EmitVectorInsert(context, reg, 1, 3, 0);
}
}
public static void EmitVectorZero32_128(ILEmitterCtx context, int reg)
{
if (!Sse.IsSupported)
{
throw new PlatformNotSupportedException();
}
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
context.EmitLdvec(reg);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveScalar)));
context.EmitStvec(reg);
}
public static void EmitVectorInsert(ILEmitterCtx context, int reg, int index, int size)
{
ThrowIfInvalid(index, size);
context.EmitLdvec(reg);
context.EmitLdc_I4(index);
context.EmitLdc_I4(size);
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInsertInt));
context.EmitStvec(reg);
}
public static void EmitVectorInsertTmp(ILEmitterCtx context, int index, int size)
{
ThrowIfInvalid(index, size);
context.EmitLdvectmp();
context.EmitLdc_I4(index);
context.EmitLdc_I4(size);
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInsertInt));
context.EmitStvectmp();
}
public static void EmitVectorInsert(ILEmitterCtx context, int reg, int index, int size, long value)
{
ThrowIfInvalid(index, size);
context.EmitLdc_I8(value);
context.EmitLdvec(reg);
context.EmitLdc_I4(index);
context.EmitLdc_I4(size);
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInsertInt));
context.EmitStvec(reg);
}
public static void EmitVectorInsertTmp(ILEmitterCtx context, int index, int size, long value)
{
ThrowIfInvalid(index, size);
context.EmitLdc_I8(value);
context.EmitLdvectmp();
context.EmitLdc_I4(index);
context.EmitLdc_I4(size);
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInsertInt));
context.EmitStvectmp();
}
public static void EmitVectorInsertF(ILEmitterCtx context, int reg, int index, int size)
{
ThrowIfInvalidF(index, size);
context.EmitLdvec(reg);
context.EmitLdc_I4(index);
if (size == 0)
{
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInsertSingle));
}
else if (size == 1)
{
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInsertDouble));
}
else
{
throw new ArgumentOutOfRangeException(nameof(size));
}
context.EmitStvec(reg);
}
public static void EmitVectorInsertTmpF(ILEmitterCtx context, int index, int size)
{
ThrowIfInvalidF(index, size);
context.EmitLdvectmp();
context.EmitLdc_I4(index);
if (size == 0)
{
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInsertSingle));
}
else if (size == 1)
{
VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInsertDouble));
}
else
{
throw new ArgumentOutOfRangeException(nameof(size));
}
context.EmitStvectmp();
}
private static void ThrowIfInvalid(int index, int size)
{
if ((uint)size > 3u)
{
throw new ArgumentOutOfRangeException(nameof(size));
}
if ((uint)index >= 16u >> size)
{
throw new ArgumentOutOfRangeException(nameof(index));
}
}
private static void ThrowIfInvalidF(int index, int size)
{
if ((uint)size > 1u)
{
throw new ArgumentOutOfRangeException(nameof(size));
}
if ((uint)index >= 4u >> size)
{
throw new ArgumentOutOfRangeException(nameof(index));
}
}
}
}