ChocolArm64: More accurate implementation of Frecpe & Frecps (#228)

* ChocolArm64: More accurate implementation of Frecpe

* ChocolArm64: Handle infinities and zeros in Frecps
This commit is contained in:
Merry 2018-07-08 20:54:47 +01:00 committed by gdkchan
parent 666e7e2e4c
commit a47b96571e
3 changed files with 154 additions and 86 deletions

View file

@ -641,106 +641,34 @@ namespace ChocolArm64.Instruction
public static void Frecpe_S(AILEmitterCtx Context)
{
EmitFrecpe(Context, 0, Scalar: true);
EmitScalarUnaryOpF(Context, () =>
{
EmitUnarySoftFloatCall(Context, nameof(ASoftFloat.RecipEstimate));
});
}
public static void Frecpe_V(AILEmitterCtx Context)
{
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
int SizeF = Op.Size & 1;
int Bytes = Context.CurrOp.GetBitsCount() >> 3;
for (int Index = 0; Index < Bytes >> SizeF + 2; Index++)
EmitVectorUnaryOpF(Context, () =>
{
EmitFrecpe(Context, Index, Scalar: false);
}
if (Op.RegisterSize == ARegisterSize.SIMD64)
{
EmitVectorZeroUpper(Context, Op.Rd);
}
}
private static void EmitFrecpe(AILEmitterCtx Context, int Index, bool Scalar)
{
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
int SizeF = Op.Size & 1;
if (SizeF == 0)
{
Context.EmitLdc_R4(1);
}
else /* if (SizeF == 1) */
{
Context.EmitLdc_R8(1);
}
EmitVectorExtractF(Context, Op.Rn, Index, SizeF);
Context.Emit(OpCodes.Div);
if (Scalar)
{
EmitVectorZeroAll(Context, Op.Rd);
}
EmitVectorInsertF(Context, Op.Rd, Index, SizeF);
EmitUnarySoftFloatCall(Context, nameof(ASoftFloat.RecipEstimate));
});
}
public static void Frecps_S(AILEmitterCtx Context)
{
EmitFrecps(Context, 0, Scalar: true);
EmitScalarBinaryOpF(Context, () =>
{
EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.RecipStep));
});
}
public static void Frecps_V(AILEmitterCtx Context)
{
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
int SizeF = Op.Size & 1;
int Bytes = Context.CurrOp.GetBitsCount() >> 3;
for (int Index = 0; Index < Bytes >> SizeF + 2; Index++)
EmitVectorBinaryOpF(Context, () =>
{
EmitFrecps(Context, Index, Scalar: false);
}
if (Op.RegisterSize == ARegisterSize.SIMD64)
{
EmitVectorZeroUpper(Context, Op.Rd);
}
}
private static void EmitFrecps(AILEmitterCtx Context, int Index, bool Scalar)
{
AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
int SizeF = Op.Size & 1;
if (SizeF == 0)
{
Context.EmitLdc_R4(2);
}
else /* if (SizeF == 1) */
{
Context.EmitLdc_R8(2);
}
EmitVectorExtractF(Context, Op.Rn, Index, SizeF);
EmitVectorExtractF(Context, Op.Rm, Index, SizeF);
Context.Emit(OpCodes.Mul);
Context.Emit(OpCodes.Sub);
if (Scalar)
{
EmitVectorZeroAll(Context, Op.Rd);
}
EmitVectorInsertF(Context, Op.Rd, Index, SizeF);
EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.RecipStep));
});
}
public static void Frinta_S(AILEmitterCtx Context)

View file

@ -253,6 +253,26 @@ namespace ChocolArm64.Instruction
Context.EmitCall(MthdInfo);
}
public static void EmitBinarySoftFloatCall(AILEmitterCtx Context, string Name)
{
IAOpCodeSimd Op = (IAOpCodeSimd)Context.CurrOp;
int SizeF = Op.Size & 1;
MethodInfo MthdInfo;
if (SizeF == 0)
{
MthdInfo = typeof(ASoftFloat).GetMethod(Name, new Type[] { typeof(float), typeof(float) });
}
else /* if (SizeF == 1) */
{
MthdInfo = typeof(ASoftFloat).GetMethod(Name, new Type[] { typeof(double), typeof(double) });
}
Context.EmitCall(MthdInfo);
}
public static void EmitScalarBinaryOpByElemF(AILEmitterCtx Context, Action Emit)
{
AOpCodeSimdRegElemF Op = (AOpCodeSimdRegElemF)Context.CurrOp;

View file

@ -7,8 +7,10 @@ namespace ChocolArm64.Instruction
static ASoftFloat()
{
InvSqrtEstimateTable = BuildInvSqrtEstimateTable();
RecipEstimateTable = BuildRecipEstimateTable();
}
private static readonly byte[] RecipEstimateTable;
private static readonly byte[] InvSqrtEstimateTable;
private static byte[] BuildInvSqrtEstimateTable()
@ -38,6 +40,22 @@ namespace ChocolArm64.Instruction
return Table;
}
private static byte[] BuildRecipEstimateTable()
{
byte[] Table = new byte[256];
for (ulong index = 0; index < 256; index++)
{
ulong a = index | 0x100;
a = (a << 1) + 1;
ulong b = 0x80000 / a;
b = (b + 1) >> 1;
Table[index] = (byte)(b & 0xFF);
}
return Table;
}
public static float InvSqrtEstimate(float x)
{
return (float)InvSqrtEstimate((double)x);
@ -105,5 +123,107 @@ namespace ChocolArm64.Instruction
ulong result = x_sign | (result_exp << 52) | fraction;
return BitConverter.Int64BitsToDouble((long)result);
}
public static float RecipEstimate(float x)
{
return (float)RecipEstimate((double)x);
}
public static double RecipEstimate(double x)
{
ulong x_bits = (ulong)BitConverter.DoubleToInt64Bits(x);
ulong x_sign = x_bits & 0x8000000000000000;
ulong x_exp = (x_bits >> 52) & 0x7FF;
ulong scaled = x_bits & ((1ul << 52) - 1);
if (x_exp >= 2045)
{
if (x_exp == 0x7ff && scaled != 0)
{
// NaN
return BitConverter.Int64BitsToDouble((long)(x_bits | 0x0008000000000000));
}
// Infinity, or Out of range -> Zero
return BitConverter.Int64BitsToDouble((long)x_sign);
}
if (x_exp == 0)
{
if (scaled == 0)
{
// Zero -> Infinity
return BitConverter.Int64BitsToDouble((long)(x_sign | 0x7ff0000000000000));
}
// Denormal
if ((scaled & (1ul << 51)) == 0)
{
x_exp = ~0ul;
scaled <<= 2;
}
else
{
scaled <<= 1;
}
}
scaled >>= 44;
scaled &= 0xFF;
ulong result_exp = (2045 - x_exp) & 0x7FF;
ulong estimate = (ulong)RecipEstimateTable[scaled];
ulong fraction = estimate << 44;
if (result_exp == 0)
{
fraction >>= 1;
fraction |= 1ul << 51;
}
else if (result_exp == 0x7FF)
{
result_exp = 0;
fraction >>= 2;
fraction |= 1ul << 50;
}
ulong result = x_sign | (result_exp << 52) | fraction;
return BitConverter.Int64BitsToDouble((long)result);
}
public static float RecipStep(float op1, float op2)
{
return (float)RecipStep((double)op1, (double)op2);
}
public static double RecipStep(double op1, double op2)
{
op1 = -op1;
ulong op1_bits = (ulong)BitConverter.DoubleToInt64Bits(op1);
ulong op2_bits = (ulong)BitConverter.DoubleToInt64Bits(op2);
ulong op1_sign = op1_bits & 0x8000000000000000;
ulong op2_sign = op2_bits & 0x8000000000000000;
ulong op1_other = op1_bits & 0x7FFFFFFFFFFFFFFF;
ulong op2_other = op2_bits & 0x7FFFFFFFFFFFFFFF;
bool inf1 = op1_other == 0x7ff0000000000000;
bool inf2 = op2_other == 0x7ff0000000000000;
bool zero1 = op1_other == 0;
bool zero2 = op2_other == 0;
if ((inf1 && zero2) || (zero1 && inf2))
{
return 2.0;
}
else if (inf1 || inf2)
{
// Infinity
return BitConverter.Int64BitsToDouble((long)(0x7ff0000000000000 | (op1_sign ^ op2_sign)));
}
return 2.0 + op1 * op2;
}
}
}