using ChocolArm64.State; using ChocolArm64.Translation; using System; using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; namespace ChocolArm64.Instruction { static class AVectorHelper { private static readonly Vector128<float> Zero32_128Mask; static AVectorHelper() { if (!Sse2.IsSupported) { throw new PlatformNotSupportedException(); } Zero32_128Mask = Sse.StaticCast<uint, float>(Sse2.SetVector128(0, 0, 0, 0xffffffff)); } public static void EmitCall(AILEmitterCtx Context, string Name64, string Name128) { bool IsSimd64 = Context.CurrOp.RegisterSize == ARegisterSize.SIMD64; Context.EmitCall(typeof(AVectorHelper), IsSimd64 ? Name64 : Name128); } public static void EmitCall(AILEmitterCtx Context, string MthdName) { Context.EmitCall(typeof(AVectorHelper), MthdName); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int SatF32ToS32(float Value) { if (float.IsNaN(Value)) return 0; return Value > int.MaxValue ? int.MaxValue : Value < int.MinValue ? int.MinValue : (int)Value; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static long SatF32ToS64(float Value) { if (float.IsNaN(Value)) return 0; return Value > long.MaxValue ? long.MaxValue : Value < long.MinValue ? long.MinValue : (long)Value; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static uint SatF32ToU32(float Value) { if (float.IsNaN(Value)) return 0; return Value > uint.MaxValue ? uint.MaxValue : Value < uint.MinValue ? uint.MinValue : (uint)Value; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static ulong SatF32ToU64(float Value) { if (float.IsNaN(Value)) return 0; return Value > ulong.MaxValue ? ulong.MaxValue : Value < ulong.MinValue ? ulong.MinValue : (ulong)Value; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int SatF64ToS32(double Value) { if (double.IsNaN(Value)) return 0; return Value > int.MaxValue ? int.MaxValue : Value < int.MinValue ? int.MinValue : (int)Value; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static long SatF64ToS64(double Value) { if (double.IsNaN(Value)) return 0; return Value > long.MaxValue ? long.MaxValue : Value < long.MinValue ? long.MinValue : (long)Value; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static uint SatF64ToU32(double Value) { if (double.IsNaN(Value)) return 0; return Value > uint.MaxValue ? uint.MaxValue : Value < uint.MinValue ? uint.MinValue : (uint)Value; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static ulong SatF64ToU64(double Value) { if (double.IsNaN(Value)) return 0; return Value > ulong.MaxValue ? ulong.MaxValue : Value < ulong.MinValue ? ulong.MinValue : (ulong)Value; } public static double Round(double Value, int Fpcr) { switch ((ARoundMode)((Fpcr >> 22) & 3)) { case ARoundMode.ToNearest: return Math.Round (Value); case ARoundMode.TowardsPlusInfinity: return Math.Ceiling (Value); case ARoundMode.TowardsMinusInfinity: return Math.Floor (Value); case ARoundMode.TowardsZero: return Math.Truncate(Value); } throw new InvalidOperationException(); } public static float RoundF(float Value, int Fpcr) { switch ((ARoundMode)((Fpcr >> 22) & 3)) { case ARoundMode.ToNearest: return MathF.Round (Value); case ARoundMode.TowardsPlusInfinity: return MathF.Ceiling (Value); case ARoundMode.TowardsMinusInfinity: return MathF.Floor (Value); case ARoundMode.TowardsZero: return MathF.Truncate(Value); } throw new InvalidOperationException(); } public static Vector128<float> Tbl1_V64( Vector128<float> Vector, Vector128<float> Tb0) { return Tbl(Vector, 8, Tb0); } public static Vector128<float> Tbl1_V128( Vector128<float> Vector, Vector128<float> Tb0) { return Tbl(Vector, 16, Tb0); } public static Vector128<float> Tbl2_V64( Vector128<float> Vector, Vector128<float> Tb0, Vector128<float> Tb1) { return Tbl(Vector, 8, Tb0, Tb1); } public static Vector128<float> Tbl2_V128( Vector128<float> Vector, Vector128<float> Tb0, Vector128<float> Tb1) { return Tbl(Vector, 16, Tb0, Tb1); } public static Vector128<float> Tbl3_V64( Vector128<float> Vector, Vector128<float> Tb0, Vector128<float> Tb1, Vector128<float> Tb2) { return Tbl(Vector, 8, Tb0, Tb1, Tb2); } public static Vector128<float> Tbl3_V128( Vector128<float> Vector, Vector128<float> Tb0, Vector128<float> Tb1, Vector128<float> Tb2) { return Tbl(Vector, 16, Tb0, Tb1, Tb2); } public static Vector128<float> Tbl4_V64( Vector128<float> Vector, Vector128<float> Tb0, Vector128<float> Tb1, Vector128<float> Tb2, Vector128<float> Tb3) { return Tbl(Vector, 8, Tb0, Tb1, Tb2, Tb3); } public static Vector128<float> Tbl4_V128( Vector128<float> Vector, Vector128<float> Tb0, Vector128<float> Tb1, Vector128<float> Tb2, Vector128<float> Tb3) { return Tbl(Vector, 16, Tb0, Tb1, Tb2, Tb3); } private static Vector128<float> Tbl(Vector128<float> Vector, int Bytes, params Vector128<float>[] Tb) { Vector128<float> Res = new Vector128<float>(); byte[] Table = new byte[Tb.Length * 16]; for (byte Index = 0; Index < Tb.Length; Index++) for (byte Index2 = 0; Index2 < 16; Index2++) { Table[Index * 16 + Index2] = (byte)VectorExtractIntZx(Tb[Index], Index2, 0); } for (byte Index = 0; Index < Bytes; Index++) { byte TblIdx = (byte)VectorExtractIntZx(Vector, Index, 0); if (TblIdx < Table.Length) { Res = VectorInsertInt(Table[TblIdx], Res, Index, 0); } } return Res; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static double VectorExtractDouble(Vector128<float> Vector, byte Index) { return BitConverter.Int64BitsToDouble(VectorExtractIntSx(Vector, Index, 3)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static long VectorExtractIntSx(Vector128<float> Vector, byte Index, int Size) { if (Sse41.IsSupported) { switch (Size) { case 0: return (sbyte)Sse41.Extract(Sse.StaticCast<float, byte>(Vector), Index); case 1: return (short)Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), Index); case 2: return Sse41.Extract(Sse.StaticCast<float, int>(Vector), Index); case 3: return Sse41.Extract(Sse.StaticCast<float, long>(Vector), Index); } throw new ArgumentOutOfRangeException(nameof(Size)); } else if (Sse2.IsSupported) { switch (Size) { case 0: return (sbyte)VectorExtractIntZx(Vector, Index, Size); case 1: return (short)VectorExtractIntZx(Vector, Index, Size); case 2: return (int)VectorExtractIntZx(Vector, Index, Size); case 3: return (long)VectorExtractIntZx(Vector, Index, Size); } throw new ArgumentOutOfRangeException(nameof(Size)); } throw new PlatformNotSupportedException(); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static ulong VectorExtractIntZx(Vector128<float> Vector, byte Index, int Size) { if (Sse41.IsSupported) { switch (Size) { case 0: return Sse41.Extract(Sse.StaticCast<float, byte>(Vector), Index); case 1: return Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), Index); case 2: return Sse41.Extract(Sse.StaticCast<float, uint>(Vector), Index); case 3: return Sse41.Extract(Sse.StaticCast<float, ulong>(Vector), Index); } throw new ArgumentOutOfRangeException(nameof(Size)); } else if (Sse2.IsSupported) { int ShortIdx = Size == 0 ? Index >> 1 : Index << (Size - 1); ushort Value = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)ShortIdx); switch (Size) { case 0: return (byte)(Value >> (Index & 1) * 8); case 1: return Value; case 2: case 3: { ushort Value1 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 1)); if (Size == 2) { return (uint)(Value | (Value1 << 16)); } ushort Value2 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 2)); ushort Value3 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 3)); return ((ulong)Value << 0) | ((ulong)Value1 << 16) | ((ulong)Value2 << 32) | ((ulong)Value3 << 48); } } throw new ArgumentOutOfRangeException(nameof(Size)); } throw new PlatformNotSupportedException(); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static float VectorExtractSingle(Vector128<float> Vector, byte Index) { if (Sse41.IsSupported) { return Sse41.Extract(Vector, Index); } else if (Sse2.IsSupported) { Vector128<ushort> ShortVector = Sse.StaticCast<float, ushort>(Vector); int Low = Sse2.Extract(ShortVector, (byte)(Index * 2 + 0)); int High = Sse2.Extract(ShortVector, (byte)(Index * 2 + 1)); return BitConverter.Int32BitsToSingle(Low | (High << 16)); } throw new PlatformNotSupportedException(); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128<float> VectorInsertDouble(double Value, Vector128<float> Vector, byte Index) { return VectorInsertInt((ulong)BitConverter.DoubleToInt64Bits(Value), Vector, Index, 3); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128<float> VectorInsertInt(ulong Value, Vector128<float> Vector, byte Index, int Size) { if (Sse41.IsSupported) { switch (Size) { case 0: return Sse.StaticCast<byte, float>(Sse41.Insert(Sse.StaticCast<float, byte>(Vector), (byte)Value, Index)); case 1: return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(Vector), (ushort)Value, Index)); case 2: return Sse.StaticCast<uint, float>(Sse41.Insert(Sse.StaticCast<float, uint>(Vector), (uint)Value, Index)); case 3: return Sse.StaticCast<ulong, float>(Sse41.Insert(Sse.StaticCast<float, ulong>(Vector), Value, Index)); } throw new ArgumentOutOfRangeException(nameof(Size)); } else if (Sse2.IsSupported) { Vector128<ushort> ShortVector = Sse.StaticCast<float, ushort>(Vector); int ShortIdx = Size == 0 ? Index >> 1 : Index << (Size - 1); switch (Size) { case 0: { ushort ShortVal = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)ShortIdx); int Shift = (Index & 1) * 8; ShortVal &= (ushort)(0xff00 >> Shift); ShortVal |= (ushort)((byte)Value << Shift); return Sse.StaticCast<ushort, float>(Sse2.Insert(ShortVector, ShortVal, (byte)ShortIdx)); } case 1: return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(Vector), (ushort)Value, Index)); case 2: case 3: { ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 0), (byte)(ShortIdx + 0)); ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 16), (byte)(ShortIdx + 1)); if (Size == 3) { ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 32), (byte)(ShortIdx + 2)); ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 48), (byte)(ShortIdx + 3)); } return Sse.StaticCast<ushort, float>(ShortVector); } } throw new ArgumentOutOfRangeException(nameof(Size)); } throw new PlatformNotSupportedException(); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128<float> VectorInsertSingle(float Value, Vector128<float> Vector, byte Index) { if (Sse41.IsSupported) { return Sse41.Insert(Vector, Value, (byte)(Index << 4)); } else if (Sse2.IsSupported) { int IntValue = BitConverter.SingleToInt32Bits(Value); ushort Low = (ushort)(IntValue >> 0); ushort High = (ushort)(IntValue >> 16); Vector128<ushort> ShortVector = Sse.StaticCast<float, ushort>(Vector); ShortVector = Sse2.Insert(ShortVector, Low, (byte)(Index * 2 + 0)); ShortVector = Sse2.Insert(ShortVector, High, (byte)(Index * 2 + 1)); return Sse.StaticCast<ushort, float>(ShortVector); } throw new PlatformNotSupportedException(); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128<float> VectorZero32_128(Vector128<float> Vector) { if (Sse.IsSupported) { return Sse.And(Vector, Zero32_128Mask); } throw new PlatformNotSupportedException(); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128<sbyte> VectorSingleToSByte(Vector128<float> Vector) { if (Sse.IsSupported) { return Sse.StaticCast<float, sbyte>(Vector); } throw new PlatformNotSupportedException(); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128<short> VectorSingleToInt16(Vector128<float> Vector) { if (Sse.IsSupported) { return Sse.StaticCast<float, short>(Vector); } throw new PlatformNotSupportedException(); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128<int> VectorSingleToInt32(Vector128<float> Vector) { if (Sse.IsSupported) { return Sse.StaticCast<float, int>(Vector); } throw new PlatformNotSupportedException(); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128<long> VectorSingleToInt64(Vector128<float> Vector) { if (Sse.IsSupported) { return Sse.StaticCast<float, long>(Vector); } throw new PlatformNotSupportedException(); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128<double> VectorSingleToDouble(Vector128<float> Vector) { if (Sse.IsSupported) { return Sse.StaticCast<float, double>(Vector); } throw new PlatformNotSupportedException(); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128<float> VectorSByteToSingle(Vector128<sbyte> Vector) { if (Sse.IsSupported) { return Sse.StaticCast<sbyte, float>(Vector); } throw new PlatformNotSupportedException(); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128<float> VectorInt16ToSingle(Vector128<short> Vector) { if (Sse.IsSupported) { return Sse.StaticCast<short, float>(Vector); } throw new PlatformNotSupportedException(); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128<float> VectorInt32ToSingle(Vector128<int> Vector) { if (Sse.IsSupported) { return Sse.StaticCast<int, float>(Vector); } throw new PlatformNotSupportedException(); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128<float> VectorInt64ToSingle(Vector128<long> Vector) { if (Sse.IsSupported) { return Sse.StaticCast<long, float>(Vector); } throw new PlatformNotSupportedException(); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128<float> VectorDoubleToSingle(Vector128<double> Vector) { if (Sse.IsSupported) { return Sse.StaticCast<double, float>(Vector); } throw new PlatformNotSupportedException(); } } }