Add Fmax/minv_V & S/Ushl_S Inst.s with Tests. Fix Maxps/d & Minps/d d… (#1335)
* Add Fmax/minv_V & S/Ushl_S Inst.s with Tests. Fix Maxps/d & Minps/d double zero sign handling. Allows better handling of NaNs. * Optimized EmitSse2VectorIsNaNOpF() for multiple uses per opF.
This commit is contained in:
parent
d7044b10a2
commit
a804db6eed
11 changed files with 698 additions and 164 deletions
|
@ -332,14 +332,18 @@ namespace ARMeilleure.Decoders
|
||||||
SetA64("0>0011100<1xxxxx111101xxxxxxxxxx", InstName.Fmax_V, InstEmit.Fmax_V, typeof(OpCodeSimdReg));
|
SetA64("0>0011100<1xxxxx111101xxxxxxxxxx", InstName.Fmax_V, InstEmit.Fmax_V, typeof(OpCodeSimdReg));
|
||||||
SetA64("000111100x1xxxxx011010xxxxxxxxxx", InstName.Fmaxnm_S, InstEmit.Fmaxnm_S, typeof(OpCodeSimdReg));
|
SetA64("000111100x1xxxxx011010xxxxxxxxxx", InstName.Fmaxnm_S, InstEmit.Fmaxnm_S, typeof(OpCodeSimdReg));
|
||||||
SetA64("0>0011100<1xxxxx110001xxxxxxxxxx", InstName.Fmaxnm_V, InstEmit.Fmaxnm_V, typeof(OpCodeSimdReg));
|
SetA64("0>0011100<1xxxxx110001xxxxxxxxxx", InstName.Fmaxnm_V, InstEmit.Fmaxnm_V, typeof(OpCodeSimdReg));
|
||||||
|
SetA64("0>1011100<1xxxxx110001xxxxxxxxxx", InstName.Fmaxnmp_V, InstEmit.Fmaxnmp_V, typeof(OpCodeSimdReg));
|
||||||
SetA64("0110111000110000110010xxxxxxxxxx", InstName.Fmaxnmv_V, InstEmit.Fmaxnmv_V, typeof(OpCodeSimd));
|
SetA64("0110111000110000110010xxxxxxxxxx", InstName.Fmaxnmv_V, InstEmit.Fmaxnmv_V, typeof(OpCodeSimd));
|
||||||
SetA64("0>1011100<1xxxxx111101xxxxxxxxxx", InstName.Fmaxp_V, InstEmit.Fmaxp_V, typeof(OpCodeSimdReg));
|
SetA64("0>1011100<1xxxxx111101xxxxxxxxxx", InstName.Fmaxp_V, InstEmit.Fmaxp_V, typeof(OpCodeSimdReg));
|
||||||
|
SetA64("0110111000110000111110xxxxxxxxxx", InstName.Fmaxv_V, InstEmit.Fmaxv_V, typeof(OpCodeSimd));
|
||||||
SetA64("000111100x1xxxxx010110xxxxxxxxxx", InstName.Fmin_S, InstEmit.Fmin_S, typeof(OpCodeSimdReg));
|
SetA64("000111100x1xxxxx010110xxxxxxxxxx", InstName.Fmin_S, InstEmit.Fmin_S, typeof(OpCodeSimdReg));
|
||||||
SetA64("0>0011101<1xxxxx111101xxxxxxxxxx", InstName.Fmin_V, InstEmit.Fmin_V, typeof(OpCodeSimdReg));
|
SetA64("0>0011101<1xxxxx111101xxxxxxxxxx", InstName.Fmin_V, InstEmit.Fmin_V, typeof(OpCodeSimdReg));
|
||||||
SetA64("000111100x1xxxxx011110xxxxxxxxxx", InstName.Fminnm_S, InstEmit.Fminnm_S, typeof(OpCodeSimdReg));
|
SetA64("000111100x1xxxxx011110xxxxxxxxxx", InstName.Fminnm_S, InstEmit.Fminnm_S, typeof(OpCodeSimdReg));
|
||||||
SetA64("0>0011101<1xxxxx110001xxxxxxxxxx", InstName.Fminnm_V, InstEmit.Fminnm_V, typeof(OpCodeSimdReg));
|
SetA64("0>0011101<1xxxxx110001xxxxxxxxxx", InstName.Fminnm_V, InstEmit.Fminnm_V, typeof(OpCodeSimdReg));
|
||||||
|
SetA64("0>1011101<1xxxxx110001xxxxxxxxxx", InstName.Fminnmp_V, InstEmit.Fminnmp_V, typeof(OpCodeSimdReg));
|
||||||
SetA64("0110111010110000110010xxxxxxxxxx", InstName.Fminnmv_V, InstEmit.Fminnmv_V, typeof(OpCodeSimd));
|
SetA64("0110111010110000110010xxxxxxxxxx", InstName.Fminnmv_V, InstEmit.Fminnmv_V, typeof(OpCodeSimd));
|
||||||
SetA64("0>1011101<1xxxxx111101xxxxxxxxxx", InstName.Fminp_V, InstEmit.Fminp_V, typeof(OpCodeSimdReg));
|
SetA64("0>1011101<1xxxxx111101xxxxxxxxxx", InstName.Fminp_V, InstEmit.Fminp_V, typeof(OpCodeSimdReg));
|
||||||
|
SetA64("0110111010110000111110xxxxxxxxxx", InstName.Fminv_V, InstEmit.Fminv_V, typeof(OpCodeSimd));
|
||||||
SetA64("010111111xxxxxxx0001x0xxxxxxxxxx", InstName.Fmla_Se, InstEmit.Fmla_Se, typeof(OpCodeSimdRegElemF));
|
SetA64("010111111xxxxxxx0001x0xxxxxxxxxx", InstName.Fmla_Se, InstEmit.Fmla_Se, typeof(OpCodeSimdRegElemF));
|
||||||
SetA64("0>0011100<1xxxxx110011xxxxxxxxxx", InstName.Fmla_V, InstEmit.Fmla_V, typeof(OpCodeSimdReg));
|
SetA64("0>0011100<1xxxxx110011xxxxxxxxxx", InstName.Fmla_V, InstEmit.Fmla_V, typeof(OpCodeSimdReg));
|
||||||
SetA64("0>0011111<xxxxxx0001x0xxxxxxxxxx", InstName.Fmla_Ve, InstEmit.Fmla_Ve, typeof(OpCodeSimdRegElemF));
|
SetA64("0>0011111<xxxxxx0001x0xxxxxxxxxx", InstName.Fmla_Ve, InstEmit.Fmla_Ve, typeof(OpCodeSimdRegElemF));
|
||||||
|
@ -529,6 +533,7 @@ namespace ARMeilleure.Decoders
|
||||||
SetA64("0101111101xxxxxx001101xxxxxxxxxx", InstName.Srsra_S, InstEmit.Srsra_S, typeof(OpCodeSimdShImm));
|
SetA64("0101111101xxxxxx001101xxxxxxxxxx", InstName.Srsra_S, InstEmit.Srsra_S, typeof(OpCodeSimdShImm));
|
||||||
SetA64("0x00111100>>>xxx001101xxxxxxxxxx", InstName.Srsra_V, InstEmit.Srsra_V, typeof(OpCodeSimdShImm));
|
SetA64("0x00111100>>>xxx001101xxxxxxxxxx", InstName.Srsra_V, InstEmit.Srsra_V, typeof(OpCodeSimdShImm));
|
||||||
SetA64("0100111101xxxxxx001101xxxxxxxxxx", InstName.Srsra_V, InstEmit.Srsra_V, typeof(OpCodeSimdShImm));
|
SetA64("0100111101xxxxxx001101xxxxxxxxxx", InstName.Srsra_V, InstEmit.Srsra_V, typeof(OpCodeSimdShImm));
|
||||||
|
SetA64("01011110111xxxxx010001xxxxxxxxxx", InstName.Sshl_S, InstEmit.Sshl_S, typeof(OpCodeSimdReg));
|
||||||
SetA64("0>001110<<1xxxxx010001xxxxxxxxxx", InstName.Sshl_V, InstEmit.Sshl_V, typeof(OpCodeSimdReg));
|
SetA64("0>001110<<1xxxxx010001xxxxxxxxxx", InstName.Sshl_V, InstEmit.Sshl_V, typeof(OpCodeSimdReg));
|
||||||
SetA64("0x00111100>>>xxx101001xxxxxxxxxx", InstName.Sshll_V, InstEmit.Sshll_V, typeof(OpCodeSimdShImm));
|
SetA64("0x00111100>>>xxx101001xxxxxxxxxx", InstName.Sshll_V, InstEmit.Sshll_V, typeof(OpCodeSimdShImm));
|
||||||
SetA64("0101111101xxxxxx000001xxxxxxxxxx", InstName.Sshr_S, InstEmit.Sshr_S, typeof(OpCodeSimdShImm));
|
SetA64("0101111101xxxxxx000001xxxxxxxxxx", InstName.Sshr_S, InstEmit.Sshr_S, typeof(OpCodeSimdShImm));
|
||||||
|
@ -611,6 +616,7 @@ namespace ARMeilleure.Decoders
|
||||||
SetA64("0111111101xxxxxx001101xxxxxxxxxx", InstName.Ursra_S, InstEmit.Ursra_S, typeof(OpCodeSimdShImm));
|
SetA64("0111111101xxxxxx001101xxxxxxxxxx", InstName.Ursra_S, InstEmit.Ursra_S, typeof(OpCodeSimdShImm));
|
||||||
SetA64("0x10111100>>>xxx001101xxxxxxxxxx", InstName.Ursra_V, InstEmit.Ursra_V, typeof(OpCodeSimdShImm));
|
SetA64("0x10111100>>>xxx001101xxxxxxxxxx", InstName.Ursra_V, InstEmit.Ursra_V, typeof(OpCodeSimdShImm));
|
||||||
SetA64("0110111101xxxxxx001101xxxxxxxxxx", InstName.Ursra_V, InstEmit.Ursra_V, typeof(OpCodeSimdShImm));
|
SetA64("0110111101xxxxxx001101xxxxxxxxxx", InstName.Ursra_V, InstEmit.Ursra_V, typeof(OpCodeSimdShImm));
|
||||||
|
SetA64("01111110111xxxxx010001xxxxxxxxxx", InstName.Ushl_S, InstEmit.Ushl_S, typeof(OpCodeSimdReg));
|
||||||
SetA64("0>101110<<1xxxxx010001xxxxxxxxxx", InstName.Ushl_V, InstEmit.Ushl_V, typeof(OpCodeSimdReg));
|
SetA64("0>101110<<1xxxxx010001xxxxxxxxxx", InstName.Ushl_V, InstEmit.Ushl_V, typeof(OpCodeSimdReg));
|
||||||
SetA64("0x10111100>>>xxx101001xxxxxxxxxx", InstName.Ushll_V, InstEmit.Ushll_V, typeof(OpCodeSimdShImm));
|
SetA64("0x10111100>>>xxx101001xxxxxxxxxx", InstName.Ushll_V, InstEmit.Ushll_V, typeof(OpCodeSimdShImm));
|
||||||
SetA64("0111111101xxxxxx000001xxxxxxxxxx", InstName.Ushr_S, InstEmit.Ushr_S, typeof(OpCodeSimdShImm));
|
SetA64("0111111101xxxxxx000001xxxxxxxxxx", InstName.Ushr_S, InstEmit.Ushr_S, typeof(OpCodeSimdShImm));
|
||||||
|
|
|
@ -382,7 +382,14 @@ namespace ARMeilleure.Instructions
|
||||||
{
|
{
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||||
{
|
{
|
||||||
EmitSse2VectorPairwiseOpF(context, Intrinsic.X86Addps, Intrinsic.X86Addpd);
|
EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
|
||||||
|
|
||||||
|
Intrinsic addInst = (op.Size & 1) == 0 ? Intrinsic.X86Addps : Intrinsic.X86Addpd;
|
||||||
|
|
||||||
|
return context.AddIntrinsic(addInst, op1, op2);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -468,9 +475,12 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
public static void Fmax_S(ArmEmitterContext context)
|
public static void Fmax_S(ArmEmitterContext context)
|
||||||
{
|
{
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
if (Optimizations.FastFP && Optimizations.UseSse41)
|
||||||
{
|
{
|
||||||
EmitScalarBinaryOpF(context, Intrinsic.X86Maxss, Intrinsic.X86Maxsd);
|
EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
|
||||||
|
}, scalar: true);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -483,9 +493,12 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
public static void Fmax_V(ArmEmitterContext context)
|
public static void Fmax_V(ArmEmitterContext context)
|
||||||
{
|
{
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
if (Optimizations.FastFP && Optimizations.UseSse41)
|
||||||
{
|
{
|
||||||
EmitVectorBinaryOpF(context, Intrinsic.X86Maxps, Intrinsic.X86Maxpd);
|
EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
|
||||||
|
}, scalar: false);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -526,19 +539,53 @@ namespace ARMeilleure.Instructions
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void Fmaxnmp_V(ArmEmitterContext context)
|
||||||
|
{
|
||||||
|
if (Optimizations.FastFP && Optimizations.UseSse41)
|
||||||
|
{
|
||||||
|
EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: false, op1, op2);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
EmitVectorPairwiseOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static void Fmaxnmv_V(ArmEmitterContext context)
|
public static void Fmaxnmv_V(ArmEmitterContext context)
|
||||||
|
{
|
||||||
|
if (Optimizations.FastFP && Optimizations.UseSse41)
|
||||||
|
{
|
||||||
|
EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: false, op1, op2);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
EmitVectorAcrossVectorOpF(context, (op1, op2) =>
|
EmitVectorAcrossVectorOpF(context, (op1, op2) =>
|
||||||
{
|
{
|
||||||
return context.Call(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMaxNum)), op1, op2);
|
return context.Call(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMaxNum)), op1, op2);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static void Fmaxp_V(ArmEmitterContext context)
|
public static void Fmaxp_V(ArmEmitterContext context)
|
||||||
{
|
{
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
if (Optimizations.FastFP && Optimizations.UseSse41)
|
||||||
{
|
{
|
||||||
EmitSse2VectorPairwiseOpF(context, Intrinsic.X86Maxps, Intrinsic.X86Maxpd);
|
EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
|
||||||
|
}, scalar: false, op1, op2);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -549,11 +596,35 @@ namespace ARMeilleure.Instructions
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void Fmaxv_V(ArmEmitterContext context)
|
||||||
|
{
|
||||||
|
if (Optimizations.FastFP && Optimizations.UseSse41)
|
||||||
|
{
|
||||||
|
EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
|
||||||
|
}, scalar: false, op1, op2);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
EmitVectorAcrossVectorOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return context.Call(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMax)), op1, op2);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static void Fmin_S(ArmEmitterContext context)
|
public static void Fmin_S(ArmEmitterContext context)
|
||||||
{
|
{
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
if (Optimizations.FastFP && Optimizations.UseSse41)
|
||||||
{
|
{
|
||||||
EmitScalarBinaryOpF(context, Intrinsic.X86Minss, Intrinsic.X86Minsd);
|
EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
|
||||||
|
}, scalar: true);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -566,9 +637,12 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
public static void Fmin_V(ArmEmitterContext context)
|
public static void Fmin_V(ArmEmitterContext context)
|
||||||
{
|
{
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
if (Optimizations.FastFP && Optimizations.UseSse41)
|
||||||
{
|
{
|
||||||
EmitVectorBinaryOpF(context, Intrinsic.X86Minps, Intrinsic.X86Minpd);
|
EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
|
||||||
|
}, scalar: false);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -609,19 +683,53 @@ namespace ARMeilleure.Instructions
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void Fminnmp_V(ArmEmitterContext context)
|
||||||
|
{
|
||||||
|
if (Optimizations.FastFP && Optimizations.UseSse41)
|
||||||
|
{
|
||||||
|
EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: false, op1, op2);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
EmitVectorPairwiseOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static void Fminnmv_V(ArmEmitterContext context)
|
public static void Fminnmv_V(ArmEmitterContext context)
|
||||||
|
{
|
||||||
|
if (Optimizations.FastFP && Optimizations.UseSse41)
|
||||||
|
{
|
||||||
|
EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: false, op1, op2);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
EmitVectorAcrossVectorOpF(context, (op1, op2) =>
|
EmitVectorAcrossVectorOpF(context, (op1, op2) =>
|
||||||
{
|
{
|
||||||
return context.Call(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMinNum)), op1, op2);
|
return context.Call(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMinNum)), op1, op2);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static void Fminp_V(ArmEmitterContext context)
|
public static void Fminp_V(ArmEmitterContext context)
|
||||||
{
|
{
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
if (Optimizations.FastFP && Optimizations.UseSse41)
|
||||||
{
|
{
|
||||||
EmitSse2VectorPairwiseOpF(context, Intrinsic.X86Minps, Intrinsic.X86Minpd);
|
EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
|
||||||
|
}, scalar: false, op1, op2);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -632,6 +740,27 @@ namespace ARMeilleure.Instructions
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void Fminv_V(ArmEmitterContext context)
|
||||||
|
{
|
||||||
|
if (Optimizations.FastFP && Optimizations.UseSse41)
|
||||||
|
{
|
||||||
|
EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
|
||||||
|
}, scalar: false, op1, op2);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
EmitVectorAcrossVectorOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return context.Call(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMin)), op1, op2);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static void Fmla_Se(ArmEmitterContext context) // Fused.
|
public static void Fmla_Se(ArmEmitterContext context) // Fused.
|
||||||
{
|
{
|
||||||
EmitScalarTernaryOpByElemF(context, (op1, op2, op3) =>
|
EmitScalarTernaryOpByElemF(context, (op1, op2, op3) =>
|
||||||
|
@ -3111,7 +3240,12 @@ namespace ARMeilleure.Instructions
|
||||||
context.Copy(GetVec(op.Rd), res);
|
context.Copy(GetVec(op.Rd), res);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Operand EmitSse2VectorIsQNaNOpF(ArmEmitterContext context, Operand opF)
|
public static void EmitSse2VectorIsNaNOpF(
|
||||||
|
ArmEmitterContext context,
|
||||||
|
Operand opF,
|
||||||
|
out Operand qNaNMask,
|
||||||
|
out Operand sNaNMask,
|
||||||
|
bool? isQNaN = null)
|
||||||
{
|
{
|
||||||
IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
|
IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
|
||||||
|
|
||||||
|
@ -3126,7 +3260,8 @@ namespace ARMeilleure.Instructions
|
||||||
Operand mask2 = context.AddIntrinsic(Intrinsic.X86Pand, opF, qMask);
|
Operand mask2 = context.AddIntrinsic(Intrinsic.X86Pand, opF, qMask);
|
||||||
mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, mask2, qMask, Const((int)CmpCondition.Equal));
|
mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, mask2, qMask, Const((int)CmpCondition.Equal));
|
||||||
|
|
||||||
return context.AddIntrinsic(Intrinsic.X86Andps, mask1, mask2);
|
qNaNMask = isQNaN == null || (bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andps, mask2, mask1) : null;
|
||||||
|
sNaNMask = isQNaN == null || !(bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andnps, mask2, mask1) : null;
|
||||||
}
|
}
|
||||||
else /* if ((op.Size & 1) == 1) */
|
else /* if ((op.Size & 1) == 1) */
|
||||||
{
|
{
|
||||||
|
@ -3139,67 +3274,202 @@ namespace ARMeilleure.Instructions
|
||||||
Operand mask2 = context.AddIntrinsic(Intrinsic.X86Pand, opF, qMask);
|
Operand mask2 = context.AddIntrinsic(Intrinsic.X86Pand, opF, qMask);
|
||||||
mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, mask2, qMask, Const((int)CmpCondition.Equal));
|
mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, mask2, qMask, Const((int)CmpCondition.Equal));
|
||||||
|
|
||||||
return context.AddIntrinsic(Intrinsic.X86Andpd, mask1, mask2);
|
qNaNMask = isQNaN == null || (bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andpd, mask2, mask1) : null;
|
||||||
|
sNaNMask = isQNaN == null || !(bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andnpd, mask2, mask1) : null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void EmitSse41MaxMinNumOpF(ArmEmitterContext context, bool isMaxNum, bool scalar)
|
public static Operand EmitSse41ProcessNaNsOpF(
|
||||||
|
ArmEmitterContext context,
|
||||||
|
Func2I emit,
|
||||||
|
bool scalar,
|
||||||
|
Operand n = null,
|
||||||
|
Operand m = null)
|
||||||
{
|
{
|
||||||
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
Operand nCopy = n ?? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rn));
|
||||||
|
Operand mCopy = m ?? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rm));
|
||||||
|
|
||||||
Operand d = GetVec(op.Rd);
|
EmitSse2VectorIsNaNOpF(context, nCopy, out Operand nQNaNMask, out Operand nSNaNMask);
|
||||||
Operand n = GetVec(op.Rn);
|
EmitSse2VectorIsNaNOpF(context, mCopy, out _, out Operand mSNaNMask, isQNaN: false);
|
||||||
Operand m = GetVec(op.Rm);
|
|
||||||
|
|
||||||
Operand nNum = context.Copy(n);
|
int sizeF = ((IOpCodeSimd)context.CurrOp).Size & 1;
|
||||||
Operand mNum = context.Copy(m);
|
|
||||||
|
|
||||||
Operand nQNaNMask = EmitSse2VectorIsQNaNOpF(context, nNum);
|
|
||||||
Operand mQNaNMask = EmitSse2VectorIsQNaNOpF(context, mNum);
|
|
||||||
|
|
||||||
int sizeF = op.Size & 1;
|
|
||||||
|
|
||||||
if (sizeF == 0)
|
if (sizeF == 0)
|
||||||
{
|
{
|
||||||
Operand negInfMask = X86GetAllElements(context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity);
|
const int QBit = 22;
|
||||||
|
|
||||||
Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnps, mQNaNMask, nQNaNMask);
|
Operand qMask = scalar ? X86GetScalar(context, 1 << QBit) : X86GetAllElements(context, 1 << QBit);
|
||||||
Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnps, nQNaNMask, mQNaNMask);
|
|
||||||
|
|
||||||
nNum = context.AddIntrinsic(Intrinsic.X86Blendvps, nNum, negInfMask, nMask);
|
Operand resNaNMask = context.AddIntrinsic(Intrinsic.X86Pandn, mSNaNMask, nQNaNMask);
|
||||||
mNum = context.AddIntrinsic(Intrinsic.X86Blendvps, mNum, negInfMask, mMask);
|
resNaNMask = context.AddIntrinsic(Intrinsic.X86Por, resNaNMask, nSNaNMask);
|
||||||
|
|
||||||
Operand res = context.AddIntrinsic(isMaxNum ? Intrinsic.X86Maxps : Intrinsic.X86Minps, nNum, mNum);
|
Operand resNaN = context.AddIntrinsic(Intrinsic.X86Blendvps, mCopy, nCopy, resNaNMask);
|
||||||
|
resNaN = context.AddIntrinsic(Intrinsic.X86Por, resNaN, qMask);
|
||||||
|
|
||||||
|
Operand resMask = context.AddIntrinsic(Intrinsic.X86Cmpps, nCopy, mCopy, Const((int)CmpCondition.OrderedQ));
|
||||||
|
|
||||||
|
Operand res = context.AddIntrinsic(Intrinsic.X86Blendvps, resNaN, emit(nCopy, mCopy), resMask);
|
||||||
|
|
||||||
|
if (n != null || m != null)
|
||||||
|
{
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
if (scalar)
|
if (scalar)
|
||||||
{
|
{
|
||||||
res = context.VectorZeroUpper96(res);
|
res = context.VectorZeroUpper96(res);
|
||||||
}
|
}
|
||||||
else if (op.RegisterSize == RegisterSize.Simd64)
|
else if (((OpCodeSimdReg)context.CurrOp).RegisterSize == RegisterSize.Simd64)
|
||||||
{
|
{
|
||||||
res = context.VectorZeroUpper64(res);
|
res = context.VectorZeroUpper64(res);
|
||||||
}
|
}
|
||||||
|
|
||||||
context.Copy(d, res);
|
context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res);
|
||||||
|
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
else /* if (sizeF == 1) */
|
else /* if (sizeF == 1) */
|
||||||
{
|
{
|
||||||
Operand negInfMask = X86GetAllElements(context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity);
|
const int QBit = 51;
|
||||||
|
|
||||||
Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnpd, mQNaNMask, nQNaNMask);
|
Operand qMask = scalar ? X86GetScalar(context, 1L << QBit) : X86GetAllElements(context, 1L << QBit);
|
||||||
Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnpd, nQNaNMask, mQNaNMask);
|
|
||||||
|
|
||||||
nNum = context.AddIntrinsic(Intrinsic.X86Blendvpd, nNum, negInfMask, nMask);
|
Operand resNaNMask = context.AddIntrinsic(Intrinsic.X86Pandn, mSNaNMask, nQNaNMask);
|
||||||
mNum = context.AddIntrinsic(Intrinsic.X86Blendvpd, mNum, negInfMask, mMask);
|
resNaNMask = context.AddIntrinsic(Intrinsic.X86Por, resNaNMask, nSNaNMask);
|
||||||
|
|
||||||
Operand res = context.AddIntrinsic(isMaxNum ? Intrinsic.X86Maxpd : Intrinsic.X86Minpd, nNum, mNum);
|
Operand resNaN = context.AddIntrinsic(Intrinsic.X86Blendvpd, mCopy, nCopy, resNaNMask);
|
||||||
|
resNaN = context.AddIntrinsic(Intrinsic.X86Por, resNaN, qMask);
|
||||||
|
|
||||||
|
Operand resMask = context.AddIntrinsic(Intrinsic.X86Cmppd, nCopy, mCopy, Const((int)CmpCondition.OrderedQ));
|
||||||
|
|
||||||
|
Operand res = context.AddIntrinsic(Intrinsic.X86Blendvpd, resNaN, emit(nCopy, mCopy), resMask);
|
||||||
|
|
||||||
|
if (n != null || m != null)
|
||||||
|
{
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
if (scalar)
|
if (scalar)
|
||||||
{
|
{
|
||||||
res = context.VectorZeroUpper64(res);
|
res = context.VectorZeroUpper64(res);
|
||||||
}
|
}
|
||||||
|
|
||||||
context.Copy(d, res);
|
context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res);
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Operand EmitSse2VectorMaxMinOpF(ArmEmitterContext context, Operand n, Operand m, bool isMax)
|
||||||
|
{
|
||||||
|
IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
|
||||||
|
|
||||||
|
if ((op.Size & 1) == 0)
|
||||||
|
{
|
||||||
|
Operand mask = X86GetAllElements(context, -0f);
|
||||||
|
|
||||||
|
Operand res = context.AddIntrinsic(isMax ? Intrinsic.X86Maxps : Intrinsic.X86Minps, n, m);
|
||||||
|
res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, res);
|
||||||
|
|
||||||
|
Operand resSign = context.AddIntrinsic(isMax ? Intrinsic.X86Pand : Intrinsic.X86Por, n, m);
|
||||||
|
resSign = context.AddIntrinsic(Intrinsic.X86Andps, mask, resSign);
|
||||||
|
|
||||||
|
return context.AddIntrinsic(Intrinsic.X86Por, res, resSign);
|
||||||
|
}
|
||||||
|
else /* if ((op.Size & 1) == 1) */
|
||||||
|
{
|
||||||
|
Operand mask = X86GetAllElements(context, -0d);
|
||||||
|
|
||||||
|
Operand res = context.AddIntrinsic(isMax ? Intrinsic.X86Maxpd : Intrinsic.X86Minpd, n, m);
|
||||||
|
res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, res);
|
||||||
|
|
||||||
|
Operand resSign = context.AddIntrinsic(isMax ? Intrinsic.X86Pand : Intrinsic.X86Por, n, m);
|
||||||
|
resSign = context.AddIntrinsic(Intrinsic.X86Andpd, mask, resSign);
|
||||||
|
|
||||||
|
return context.AddIntrinsic(Intrinsic.X86Por, res, resSign);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Operand EmitSse41MaxMinNumOpF(
|
||||||
|
ArmEmitterContext context,
|
||||||
|
bool isMaxNum,
|
||||||
|
bool scalar,
|
||||||
|
Operand n = null,
|
||||||
|
Operand m = null)
|
||||||
|
{
|
||||||
|
Operand nCopy = n ?? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rn));
|
||||||
|
Operand mCopy = m ?? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rm));
|
||||||
|
|
||||||
|
EmitSse2VectorIsNaNOpF(context, nCopy, out Operand nQNaNMask, out _, isQNaN: true);
|
||||||
|
EmitSse2VectorIsNaNOpF(context, mCopy, out Operand mQNaNMask, out _, isQNaN: true);
|
||||||
|
|
||||||
|
int sizeF = ((IOpCodeSimd)context.CurrOp).Size & 1;
|
||||||
|
|
||||||
|
if (sizeF == 0)
|
||||||
|
{
|
||||||
|
Operand negInfMask = scalar
|
||||||
|
? X86GetScalar (context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity)
|
||||||
|
: X86GetAllElements(context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity);
|
||||||
|
|
||||||
|
Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnps, mQNaNMask, nQNaNMask);
|
||||||
|
Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnps, nQNaNMask, mQNaNMask);
|
||||||
|
|
||||||
|
nCopy = context.AddIntrinsic(Intrinsic.X86Blendvps, nCopy, negInfMask, nMask);
|
||||||
|
mCopy = context.AddIntrinsic(Intrinsic.X86Blendvps, mCopy, negInfMask, mMask);
|
||||||
|
|
||||||
|
Operand res = EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: isMaxNum);
|
||||||
|
}, scalar: scalar, nCopy, mCopy);
|
||||||
|
|
||||||
|
if (n != null || m != null)
|
||||||
|
{
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (scalar)
|
||||||
|
{
|
||||||
|
res = context.VectorZeroUpper96(res);
|
||||||
|
}
|
||||||
|
else if (((OpCodeSimdReg)context.CurrOp).RegisterSize == RegisterSize.Simd64)
|
||||||
|
{
|
||||||
|
res = context.VectorZeroUpper64(res);
|
||||||
|
}
|
||||||
|
|
||||||
|
context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res);
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
else /* if (sizeF == 1) */
|
||||||
|
{
|
||||||
|
Operand negInfMask = scalar
|
||||||
|
? X86GetScalar (context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity)
|
||||||
|
: X86GetAllElements(context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity);
|
||||||
|
|
||||||
|
Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnpd, mQNaNMask, nQNaNMask);
|
||||||
|
Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnpd, nQNaNMask, mQNaNMask);
|
||||||
|
|
||||||
|
nCopy = context.AddIntrinsic(Intrinsic.X86Blendvpd, nCopy, negInfMask, nMask);
|
||||||
|
mCopy = context.AddIntrinsic(Intrinsic.X86Blendvpd, mCopy, negInfMask, mMask);
|
||||||
|
|
||||||
|
Operand res = EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
|
||||||
|
{
|
||||||
|
return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: isMaxNum);
|
||||||
|
}, scalar: scalar, nCopy, mCopy);
|
||||||
|
|
||||||
|
if (n != null || m != null)
|
||||||
|
{
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (scalar)
|
||||||
|
{
|
||||||
|
res = context.VectorZeroUpper64(res);
|
||||||
|
}
|
||||||
|
|
||||||
|
context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res);
|
||||||
|
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1200,8 +1200,8 @@ namespace ARMeilleure.Instructions
|
||||||
Operand nNum = context.Copy(n);
|
Operand nNum = context.Copy(n);
|
||||||
Operand mNum = context.Copy(m);
|
Operand mNum = context.Copy(m);
|
||||||
|
|
||||||
Operand nQNaNMask = InstEmit.EmitSse2VectorIsQNaNOpF(context, nNum);
|
InstEmit.EmitSse2VectorIsNaNOpF(context, nNum, out Operand nQNaNMask, out _, isQNaN: true);
|
||||||
Operand mQNaNMask = InstEmit.EmitSse2VectorIsQNaNOpF(context, mNum);
|
InstEmit.EmitSse2VectorIsNaNOpF(context, mNum, out Operand mQNaNMask, out _, isQNaN: true);
|
||||||
|
|
||||||
int sizeF = op.Size & 1;
|
int sizeF = op.Size & 1;
|
||||||
|
|
||||||
|
|
|
@ -1095,6 +1095,29 @@ namespace ARMeilleure.Instructions
|
||||||
context.Copy(GetVec(op.Rd), d);
|
context.Copy(GetVec(op.Rd), d);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void EmitSse2VectorAcrossVectorOpF(ArmEmitterContext context, Func2I emit)
|
||||||
|
{
|
||||||
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
||||||
|
|
||||||
|
Debug.Assert((op.Size & 1) == 0 && op.RegisterSize == RegisterSize.Simd128);
|
||||||
|
|
||||||
|
const int sm0 = 0 << 6 | 0 << 4 | 0 << 2 | 0 << 0;
|
||||||
|
const int sm1 = 1 << 6 | 1 << 4 | 1 << 2 | 1 << 0;
|
||||||
|
const int sm2 = 2 << 6 | 2 << 4 | 2 << 2 | 2 << 0;
|
||||||
|
const int sm3 = 3 << 6 | 3 << 4 | 3 << 2 | 3 << 0;
|
||||||
|
|
||||||
|
Operand nCopy = context.Copy(GetVec(op.Rn));
|
||||||
|
|
||||||
|
Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm0));
|
||||||
|
Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm1));
|
||||||
|
Operand part2 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm2));
|
||||||
|
Operand part3 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm3));
|
||||||
|
|
||||||
|
Operand res = emit(emit(part0, part1), emit(part2, part3));
|
||||||
|
|
||||||
|
context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
|
||||||
|
}
|
||||||
|
|
||||||
public static void EmitVectorPairwiseOpF(ArmEmitterContext context, Func2I emit)
|
public static void EmitVectorPairwiseOpF(ArmEmitterContext context, Func2I emit)
|
||||||
{
|
{
|
||||||
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
||||||
|
@ -1124,12 +1147,12 @@ namespace ARMeilleure.Instructions
|
||||||
context.Copy(GetVec(op.Rd), res);
|
context.Copy(GetVec(op.Rd), res);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void EmitSse2VectorPairwiseOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
|
public static void EmitSse2VectorPairwiseOpF(ArmEmitterContext context, Func2I emit)
|
||||||
{
|
{
|
||||||
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
||||||
|
|
||||||
Operand n = GetVec(op.Rn);
|
Operand nCopy = context.Copy(GetVec(op.Rn));
|
||||||
Operand m = GetVec(op.Rm);
|
Operand mCopy = context.Copy(GetVec(op.Rm));
|
||||||
|
|
||||||
int sizeF = op.Size & 1;
|
int sizeF = op.Size & 1;
|
||||||
|
|
||||||
|
@ -1137,32 +1160,32 @@ namespace ARMeilleure.Instructions
|
||||||
{
|
{
|
||||||
if (op.RegisterSize == RegisterSize.Simd64)
|
if (op.RegisterSize == RegisterSize.Simd64)
|
||||||
{
|
{
|
||||||
Operand unpck = context.AddIntrinsic(Intrinsic.X86Unpcklps, n, m);
|
Operand unpck = context.AddIntrinsic(Intrinsic.X86Unpcklps, nCopy, mCopy);
|
||||||
|
|
||||||
Operand zero = context.VectorZero();
|
Operand zero = context.VectorZero();
|
||||||
|
|
||||||
Operand part0 = context.AddIntrinsic(Intrinsic.X86Movlhps, unpck, zero);
|
Operand part0 = context.AddIntrinsic(Intrinsic.X86Movlhps, unpck, zero);
|
||||||
Operand part1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, unpck);
|
Operand part1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, unpck);
|
||||||
|
|
||||||
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst32, part0, part1));
|
context.Copy(GetVec(op.Rd), emit(part0, part1));
|
||||||
}
|
}
|
||||||
else /* if (op.RegisterSize == RegisterSize.Simd128) */
|
else /* if (op.RegisterSize == RegisterSize.Simd128) */
|
||||||
{
|
{
|
||||||
const int sm0 = 2 << 6 | 0 << 4 | 2 << 2 | 0 << 0;
|
const int sm0 = 2 << 6 | 0 << 4 | 2 << 2 | 0 << 0;
|
||||||
const int sm1 = 3 << 6 | 1 << 4 | 3 << 2 | 1 << 0;
|
const int sm1 = 3 << 6 | 1 << 4 | 3 << 2 | 1 << 0;
|
||||||
|
|
||||||
Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, n, m, Const(sm0));
|
Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, mCopy, Const(sm0));
|
||||||
Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, n, m, Const(sm1));
|
Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, mCopy, Const(sm1));
|
||||||
|
|
||||||
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst32, part0, part1));
|
context.Copy(GetVec(op.Rd), emit(part0, part1));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else /* if (sizeF == 1) */
|
else /* if (sizeF == 1) */
|
||||||
{
|
{
|
||||||
Operand part0 = context.AddIntrinsic(Intrinsic.X86Unpcklpd, n, m);
|
Operand part0 = context.AddIntrinsic(Intrinsic.X86Unpcklpd, nCopy, mCopy);
|
||||||
Operand part1 = context.AddIntrinsic(Intrinsic.X86Unpckhpd, n, m);
|
Operand part1 = context.AddIntrinsic(Intrinsic.X86Unpckhpd, nCopy, mCopy);
|
||||||
|
|
||||||
context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst64, part0, part1));
|
context.Copy(GetVec(op.Rd), emit(part0, part1));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -391,25 +391,14 @@ namespace ARMeilleure.Instructions
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void Sshl_V(ArmEmitterContext context)
|
public static void Sshl_S(ArmEmitterContext context)
|
||||||
{
|
{
|
||||||
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
EmitSshlOrUshl(context, signed: true, scalar: true);
|
||||||
|
|
||||||
Operand res = context.VectorZero();
|
|
||||||
|
|
||||||
int elems = op.GetBytesCount() >> op.Size;
|
|
||||||
|
|
||||||
for (int index = 0; index < elems; index++)
|
|
||||||
{
|
|
||||||
Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
|
|
||||||
Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
|
|
||||||
|
|
||||||
Operand e = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.SignedShlReg)), ne, me, Const(0), Const(op.Size));
|
|
||||||
|
|
||||||
res = EmitVectorInsert(context, res, e, index, op.Size);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
context.Copy(GetVec(op.Rd), res);
|
public static void Sshl_V(ArmEmitterContext context)
|
||||||
|
{
|
||||||
|
EmitSshlOrUshl(context, signed: true, scalar: false);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void Sshll_V(ArmEmitterContext context)
|
public static void Sshll_V(ArmEmitterContext context)
|
||||||
|
@ -686,25 +675,14 @@ namespace ARMeilleure.Instructions
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void Ushl_V(ArmEmitterContext context)
|
public static void Ushl_S(ArmEmitterContext context)
|
||||||
{
|
{
|
||||||
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
EmitSshlOrUshl(context, signed: false, scalar: true);
|
||||||
|
|
||||||
Operand res = context.VectorZero();
|
|
||||||
|
|
||||||
int elems = op.GetBytesCount() >> op.Size;
|
|
||||||
|
|
||||||
for (int index = 0; index < elems; index++)
|
|
||||||
{
|
|
||||||
Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
|
|
||||||
Operand me = EmitVectorExtractSx(context, op.Rm, index << op.Size, 0);
|
|
||||||
|
|
||||||
Operand e = EmitUnsignedShlRegOp(context, ne, context.ConvertI64ToI32(me), op.Size);
|
|
||||||
|
|
||||||
res = EmitVectorInsert(context, res, e, index, op.Size);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
context.Copy(GetVec(op.Rd), res);
|
public static void Ushl_V(ArmEmitterContext context)
|
||||||
|
{
|
||||||
|
EmitSshlOrUshl(context, signed: false, scalar: false);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void Ushll_V(ArmEmitterContext context)
|
public static void Ushll_V(ArmEmitterContext context)
|
||||||
|
@ -894,7 +872,7 @@ namespace ARMeilleure.Instructions
|
||||||
context.Copy(GetVec(op.Rd), res);
|
context.Copy(GetVec(op.Rd), res);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Operand EmitUnsignedShlRegOp(ArmEmitterContext context, Operand op, Operand shiftLsB, int size)
|
private static Operand EmitShlRegOp(ArmEmitterContext context, Operand op, Operand shiftLsB, int size, bool signed)
|
||||||
{
|
{
|
||||||
Debug.Assert(op.Type == OperandType.I64);
|
Debug.Assert(op.Type == OperandType.I64);
|
||||||
Debug.Assert(shiftLsB.Type == OperandType.I32);
|
Debug.Assert(shiftLsB.Type == OperandType.I32);
|
||||||
|
@ -902,18 +880,33 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
Operand negShiftLsB = context.Negate(shiftLsB);
|
Operand negShiftLsB = context.Negate(shiftLsB);
|
||||||
|
|
||||||
|
Operand isInRange = context.BitwiseAnd(
|
||||||
|
context.ICompareLess(shiftLsB, Const(8 << size)),
|
||||||
|
context.ICompareLess(negShiftLsB, Const(8 << size)));
|
||||||
|
|
||||||
Operand isPositive = context.ICompareGreaterOrEqual(shiftLsB, Const(0));
|
Operand isPositive = context.ICompareGreaterOrEqual(shiftLsB, Const(0));
|
||||||
|
|
||||||
Operand shl = context.ShiftLeft (op, shiftLsB);
|
Operand shl = context.ShiftLeft(op, shiftLsB);
|
||||||
Operand shr = context.ShiftRightUI(op, negShiftLsB);
|
|
||||||
|
|
||||||
Operand res = context.ConditionalSelect(isPositive, shl, shr);
|
Operand sarOrShr = signed
|
||||||
|
? context.ShiftRightSI(op, negShiftLsB)
|
||||||
|
: context.ShiftRightUI(op, negShiftLsB);
|
||||||
|
|
||||||
Operand isOutOfRange = context.BitwiseOr(
|
Operand res = context.ConditionalSelect(isPositive, shl, sarOrShr);
|
||||||
context.ICompareGreaterOrEqual(shiftLsB, Const(8 << size)),
|
|
||||||
context.ICompareGreaterOrEqual(negShiftLsB, Const(8 << size)));
|
|
||||||
|
|
||||||
return context.ConditionalSelect(isOutOfRange, Const(0UL), res);
|
if (signed)
|
||||||
|
{
|
||||||
|
Operand isPositive2 = context.ICompareGreaterOrEqual(op, Const(0L));
|
||||||
|
|
||||||
|
Operand res2 = context.ConditionalSelect(isPositive2, Const(0L), Const(-1L));
|
||||||
|
res2 = context.ConditionalSelect(isPositive, Const(0L), res2);
|
||||||
|
|
||||||
|
return context.ConditionalSelect(isInRange, res, res2);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return context.ConditionalSelect(isInRange, res, Const(0UL));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void EmitVectorShrImmNarrowOpZx(ArmEmitterContext context, bool round)
|
private static void EmitVectorShrImmNarrowOpZx(ArmEmitterContext context, bool round)
|
||||||
|
@ -1174,5 +1167,26 @@ namespace ARMeilleure.Instructions
|
||||||
context.Copy(GetVec(op.Rd), res);
|
context.Copy(GetVec(op.Rd), res);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void EmitSshlOrUshl(ArmEmitterContext context, bool signed, bool scalar)
|
||||||
|
{
|
||||||
|
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
||||||
|
|
||||||
|
Operand res = context.VectorZero();
|
||||||
|
|
||||||
|
int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
|
||||||
|
|
||||||
|
for (int index = 0; index < elems; index++)
|
||||||
|
{
|
||||||
|
Operand ne = EmitVectorExtract (context, op.Rn, index, op.Size, signed);
|
||||||
|
Operand me = EmitVectorExtractSx(context, op.Rm, index << op.Size, 0);
|
||||||
|
|
||||||
|
Operand e = EmitShlRegOp(context, ne, context.ConvertI64ToI32(me), op.Size, signed);
|
||||||
|
|
||||||
|
res = EmitVectorInsert(context, res, e, index, op.Size);
|
||||||
|
}
|
||||||
|
|
||||||
|
context.Copy(GetVec(op.Rd), res);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -212,14 +212,18 @@ namespace ARMeilleure.Instructions
|
||||||
Fmax_V,
|
Fmax_V,
|
||||||
Fmaxnm_S,
|
Fmaxnm_S,
|
||||||
Fmaxnm_V,
|
Fmaxnm_V,
|
||||||
|
Fmaxnmp_V,
|
||||||
Fmaxnmv_V,
|
Fmaxnmv_V,
|
||||||
Fmaxp_V,
|
Fmaxp_V,
|
||||||
|
Fmaxv_V,
|
||||||
Fmin_S,
|
Fmin_S,
|
||||||
Fmin_V,
|
Fmin_V,
|
||||||
Fminnm_S,
|
Fminnm_S,
|
||||||
Fminnm_V,
|
Fminnm_V,
|
||||||
|
Fminnmp_V,
|
||||||
Fminnmv_V,
|
Fminnmv_V,
|
||||||
Fminp_V,
|
Fminp_V,
|
||||||
|
Fminv_V,
|
||||||
Fmla_Se,
|
Fmla_Se,
|
||||||
Fmla_V,
|
Fmla_V,
|
||||||
Fmla_Ve,
|
Fmla_Ve,
|
||||||
|
@ -378,6 +382,7 @@ namespace ARMeilleure.Instructions
|
||||||
Srshr_V,
|
Srshr_V,
|
||||||
Srsra_S,
|
Srsra_S,
|
||||||
Srsra_V,
|
Srsra_V,
|
||||||
|
Sshl_S,
|
||||||
Sshl_V,
|
Sshl_V,
|
||||||
Sshll_V,
|
Sshll_V,
|
||||||
Sshr_S,
|
Sshr_S,
|
||||||
|
@ -444,6 +449,7 @@ namespace ARMeilleure.Instructions
|
||||||
Urshr_V,
|
Urshr_V,
|
||||||
Ursra_S,
|
Ursra_S,
|
||||||
Ursra_V,
|
Ursra_V,
|
||||||
|
Ushl_S,
|
||||||
Ushl_V,
|
Ushl_V,
|
||||||
Ushll_V,
|
Ushll_V,
|
||||||
Ushr_S,
|
Ushr_S,
|
||||||
|
|
|
@ -20,7 +20,7 @@ namespace ARMeilleure.Translation.PTC
|
||||||
{
|
{
|
||||||
private const string HeaderMagic = "PTChd";
|
private const string HeaderMagic = "PTChd";
|
||||||
|
|
||||||
private const int InternalVersion = 9; //! To be incremented manually for each change to the ARMeilleure project.
|
private const int InternalVersion = 10; //! To be incremented manually for each change to the ARMeilleure project.
|
||||||
|
|
||||||
private const string BaseDir = "Ryujinx";
|
private const string BaseDir = "Ryujinx";
|
||||||
|
|
||||||
|
|
|
@ -12,10 +12,14 @@ namespace Ryujinx.Tests.Cpu
|
||||||
[TestFixture]
|
[TestFixture]
|
||||||
public class CpuTest
|
public class CpuTest
|
||||||
{
|
{
|
||||||
private ulong _currAddress;
|
protected const ulong Size = 0x1000;
|
||||||
private ulong _size;
|
protected const ulong CodeBaseAddress = 0x1000;
|
||||||
|
protected const ulong DataBaseAddress = CodeBaseAddress + Size;
|
||||||
|
|
||||||
private ulong _entryPoint;
|
private const bool Ignore_FpcrFz_FpcrDn = false;
|
||||||
|
private const bool IgnoreAllExcept_FpsrQc = false;
|
||||||
|
|
||||||
|
private ulong _currAddress;
|
||||||
|
|
||||||
private MemoryBlock _ram;
|
private MemoryBlock _ram;
|
||||||
|
|
||||||
|
@ -28,6 +32,8 @@ namespace Ryujinx.Tests.Cpu
|
||||||
private static bool _unicornAvailable;
|
private static bool _unicornAvailable;
|
||||||
private UnicornAArch64 _unicornEmu;
|
private UnicornAArch64 _unicornEmu;
|
||||||
|
|
||||||
|
private bool _usingMemory;
|
||||||
|
|
||||||
static CpuTest()
|
static CpuTest()
|
||||||
{
|
{
|
||||||
_unicornAvailable = UnicornAArch64.IsAvailable();
|
_unicornAvailable = UnicornAArch64.IsAvailable();
|
||||||
|
@ -41,14 +47,11 @@ namespace Ryujinx.Tests.Cpu
|
||||||
[SetUp]
|
[SetUp]
|
||||||
public void Setup()
|
public void Setup()
|
||||||
{
|
{
|
||||||
_currAddress = 0x1000;
|
_currAddress = CodeBaseAddress;
|
||||||
_size = 0x1000;
|
|
||||||
|
|
||||||
_entryPoint = _currAddress;
|
_ram = new MemoryBlock(Size * 2);
|
||||||
|
_memory = new MemoryManager(_ram, 1ul << 16);
|
||||||
_ram = new MemoryBlock(_size);
|
_memory.Map(CodeBaseAddress, 0, Size * 2);
|
||||||
_memory = new MemoryManager(_ram, 1UL << 16);
|
|
||||||
_memory.Map(_currAddress, 0, _size);
|
|
||||||
|
|
||||||
_context = CpuContext.CreateExecutionContext();
|
_context = CpuContext.CreateExecutionContext();
|
||||||
|
|
||||||
|
@ -57,8 +60,9 @@ namespace Ryujinx.Tests.Cpu
|
||||||
if (_unicornAvailable)
|
if (_unicornAvailable)
|
||||||
{
|
{
|
||||||
_unicornEmu = new UnicornAArch64();
|
_unicornEmu = new UnicornAArch64();
|
||||||
_unicornEmu.MemoryMap(_currAddress, _size, MemoryPermission.READ | MemoryPermission.EXEC);
|
_unicornEmu.MemoryMap(CodeBaseAddress, Size, MemoryPermission.READ | MemoryPermission.EXEC);
|
||||||
_unicornEmu.PC = _entryPoint;
|
_unicornEmu.MemoryMap(DataBaseAddress, Size, MemoryPermission.READ | MemoryPermission.WRITE);
|
||||||
|
_unicornEmu.PC = CodeBaseAddress;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -73,6 +77,8 @@ namespace Ryujinx.Tests.Cpu
|
||||||
_context = null;
|
_context = null;
|
||||||
_cpuContext = null;
|
_cpuContext = null;
|
||||||
_unicornEmu = null;
|
_unicornEmu = null;
|
||||||
|
|
||||||
|
_usingMemory = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void Reset()
|
protected void Reset()
|
||||||
|
@ -169,11 +175,11 @@ namespace Ryujinx.Tests.Cpu
|
||||||
|
|
||||||
protected void ExecuteOpcodes(bool runUnicorn = true)
|
protected void ExecuteOpcodes(bool runUnicorn = true)
|
||||||
{
|
{
|
||||||
_cpuContext.Execute(_context, _entryPoint);
|
_cpuContext.Execute(_context, CodeBaseAddress);
|
||||||
|
|
||||||
if (_unicornAvailable && runUnicorn)
|
if (_unicornAvailable && runUnicorn)
|
||||||
{
|
{
|
||||||
_unicornEmu.RunForCount((_currAddress - _entryPoint - 4) / 4);
|
_unicornEmu.RunForCount((_currAddress - CodeBaseAddress - 4) / 4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -199,6 +205,11 @@ namespace Ryujinx.Tests.Cpu
|
||||||
int fpsr = 0,
|
int fpsr = 0,
|
||||||
bool runUnicorn = true)
|
bool runUnicorn = true)
|
||||||
{
|
{
|
||||||
|
if (Ignore_FpcrFz_FpcrDn)
|
||||||
|
{
|
||||||
|
fpcr &= ~((int)FPCR.Fz | (int)FPCR.Dn);
|
||||||
|
}
|
||||||
|
|
||||||
Opcode(opcode);
|
Opcode(opcode);
|
||||||
Opcode(0xD65F03C0); // RET
|
Opcode(0xD65F03C0); // RET
|
||||||
SetContext(x0, x1, x2, x3, x31, v0, v1, v2, v3, v4, v5, v30, v31, overflow, carry, zero, negative, fpcr, fpsr);
|
SetContext(x0, x1, x2, x3, x31, v0, v1, v2, v3, v4, v5, v30, v31, overflow, carry, zero, negative, fpcr, fpsr);
|
||||||
|
@ -207,6 +218,30 @@ namespace Ryujinx.Tests.Cpu
|
||||||
return GetContext();
|
return GetContext();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected void SetWorkingMemory(ulong offset, byte[] data)
|
||||||
|
{
|
||||||
|
_memory.Write(DataBaseAddress + offset, data);
|
||||||
|
|
||||||
|
if (_unicornAvailable)
|
||||||
|
{
|
||||||
|
_unicornEmu.MemoryWrite(DataBaseAddress + offset, data);
|
||||||
|
}
|
||||||
|
|
||||||
|
_usingMemory = true; // When true, CompareAgainstUnicorn checks the working memory for equality too.
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void SetWorkingMemory(ulong offset, byte data)
|
||||||
|
{
|
||||||
|
_memory.Write(DataBaseAddress + offset, data);
|
||||||
|
|
||||||
|
if (_unicornAvailable)
|
||||||
|
{
|
||||||
|
_unicornEmu.MemoryWrite8(DataBaseAddress + offset, data);
|
||||||
|
}
|
||||||
|
|
||||||
|
_usingMemory = true; // When true, CompareAgainstUnicorn checks the working memory for equality too.
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>Rounding Mode control field.</summary>
|
/// <summary>Rounding Mode control field.</summary>
|
||||||
public enum RMode
|
public enum RMode
|
||||||
{
|
{
|
||||||
|
@ -284,15 +319,20 @@ namespace Ryujinx.Tests.Cpu
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (IgnoreAllExcept_FpsrQc)
|
||||||
|
{
|
||||||
|
fpsrMask &= Fpsr.Qc;
|
||||||
|
}
|
||||||
|
|
||||||
if (fpSkips != FpSkips.None)
|
if (fpSkips != FpSkips.None)
|
||||||
{
|
{
|
||||||
ManageFpSkips(fpSkips);
|
ManageFpSkips(fpSkips);
|
||||||
}
|
}
|
||||||
|
|
||||||
Assert.That(_context.GetX(0), Is.EqualTo(_unicornEmu.X[0]));
|
Assert.That(_context.GetX(0), Is.EqualTo(_unicornEmu.X[0]), "X0");
|
||||||
Assert.That(_context.GetX(1), Is.EqualTo(_unicornEmu.X[1]));
|
Assert.That(_context.GetX(1), Is.EqualTo(_unicornEmu.X[1]), "X1");
|
||||||
Assert.That(_context.GetX(2), Is.EqualTo(_unicornEmu.X[2]));
|
Assert.That(_context.GetX(2), Is.EqualTo(_unicornEmu.X[2]), "X2");
|
||||||
Assert.That(_context.GetX(3), Is.EqualTo(_unicornEmu.X[3]));
|
Assert.That(_context.GetX(3), Is.EqualTo(_unicornEmu.X[3]), "X3");
|
||||||
Assert.That(_context.GetX(4), Is.EqualTo(_unicornEmu.X[4]));
|
Assert.That(_context.GetX(4), Is.EqualTo(_unicornEmu.X[4]));
|
||||||
Assert.That(_context.GetX(5), Is.EqualTo(_unicornEmu.X[5]));
|
Assert.That(_context.GetX(5), Is.EqualTo(_unicornEmu.X[5]));
|
||||||
Assert.That(_context.GetX(6), Is.EqualTo(_unicornEmu.X[6]));
|
Assert.That(_context.GetX(6), Is.EqualTo(_unicornEmu.X[6]));
|
||||||
|
@ -321,21 +361,21 @@ namespace Ryujinx.Tests.Cpu
|
||||||
Assert.That(_context.GetX(29), Is.EqualTo(_unicornEmu.X[29]));
|
Assert.That(_context.GetX(29), Is.EqualTo(_unicornEmu.X[29]));
|
||||||
Assert.That(_context.GetX(30), Is.EqualTo(_unicornEmu.X[30]));
|
Assert.That(_context.GetX(30), Is.EqualTo(_unicornEmu.X[30]));
|
||||||
|
|
||||||
Assert.That(_context.GetX(31), Is.EqualTo(_unicornEmu.SP));
|
Assert.That(_context.GetX(31), Is.EqualTo(_unicornEmu.SP), "X31");
|
||||||
|
|
||||||
if (fpTolerances == FpTolerances.None)
|
if (fpTolerances == FpTolerances.None)
|
||||||
{
|
{
|
||||||
Assert.That(V128ToSimdValue(_context.GetV(0)), Is.EqualTo(_unicornEmu.Q[0]));
|
Assert.That(V128ToSimdValue(_context.GetV(0)), Is.EqualTo(_unicornEmu.Q[0]), "V0");
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ManageFpTolerances(fpTolerances);
|
ManageFpTolerances(fpTolerances);
|
||||||
}
|
}
|
||||||
Assert.That(V128ToSimdValue(_context.GetV(1)), Is.EqualTo(_unicornEmu.Q[1]));
|
Assert.That(V128ToSimdValue(_context.GetV(1)), Is.EqualTo(_unicornEmu.Q[1]), "V1");
|
||||||
Assert.That(V128ToSimdValue(_context.GetV(2)), Is.EqualTo(_unicornEmu.Q[2]));
|
Assert.That(V128ToSimdValue(_context.GetV(2)), Is.EqualTo(_unicornEmu.Q[2]), "V2");
|
||||||
Assert.That(V128ToSimdValue(_context.GetV(3)), Is.EqualTo(_unicornEmu.Q[3]));
|
Assert.That(V128ToSimdValue(_context.GetV(3)), Is.EqualTo(_unicornEmu.Q[3]), "V3");
|
||||||
Assert.That(V128ToSimdValue(_context.GetV(4)), Is.EqualTo(_unicornEmu.Q[4]));
|
Assert.That(V128ToSimdValue(_context.GetV(4)), Is.EqualTo(_unicornEmu.Q[4]), "V4");
|
||||||
Assert.That(V128ToSimdValue(_context.GetV(5)), Is.EqualTo(_unicornEmu.Q[5]));
|
Assert.That(V128ToSimdValue(_context.GetV(5)), Is.EqualTo(_unicornEmu.Q[5]), "V5");
|
||||||
Assert.That(V128ToSimdValue(_context.GetV(6)), Is.EqualTo(_unicornEmu.Q[6]));
|
Assert.That(V128ToSimdValue(_context.GetV(6)), Is.EqualTo(_unicornEmu.Q[6]));
|
||||||
Assert.That(V128ToSimdValue(_context.GetV(7)), Is.EqualTo(_unicornEmu.Q[7]));
|
Assert.That(V128ToSimdValue(_context.GetV(7)), Is.EqualTo(_unicornEmu.Q[7]));
|
||||||
Assert.That(V128ToSimdValue(_context.GetV(8)), Is.EqualTo(_unicornEmu.Q[8]));
|
Assert.That(V128ToSimdValue(_context.GetV(8)), Is.EqualTo(_unicornEmu.Q[8]));
|
||||||
|
@ -360,16 +400,27 @@ namespace Ryujinx.Tests.Cpu
|
||||||
Assert.That(V128ToSimdValue(_context.GetV(27)), Is.EqualTo(_unicornEmu.Q[27]));
|
Assert.That(V128ToSimdValue(_context.GetV(27)), Is.EqualTo(_unicornEmu.Q[27]));
|
||||||
Assert.That(V128ToSimdValue(_context.GetV(28)), Is.EqualTo(_unicornEmu.Q[28]));
|
Assert.That(V128ToSimdValue(_context.GetV(28)), Is.EqualTo(_unicornEmu.Q[28]));
|
||||||
Assert.That(V128ToSimdValue(_context.GetV(29)), Is.EqualTo(_unicornEmu.Q[29]));
|
Assert.That(V128ToSimdValue(_context.GetV(29)), Is.EqualTo(_unicornEmu.Q[29]));
|
||||||
Assert.That(V128ToSimdValue(_context.GetV(30)), Is.EqualTo(_unicornEmu.Q[30]));
|
Assert.That(V128ToSimdValue(_context.GetV(30)), Is.EqualTo(_unicornEmu.Q[30]), "V30");
|
||||||
Assert.That(V128ToSimdValue(_context.GetV(31)), Is.EqualTo(_unicornEmu.Q[31]));
|
Assert.That(V128ToSimdValue(_context.GetV(31)), Is.EqualTo(_unicornEmu.Q[31]), "V31");
|
||||||
|
|
||||||
Assert.That((int)_context.Fpcr, Is.EqualTo(_unicornEmu.Fpcr));
|
Assert.That((int)_context.Fpcr, Is.EqualTo(_unicornEmu.Fpcr), "Fpcr");
|
||||||
Assert.That((int)_context.Fpsr & (int)fpsrMask, Is.EqualTo(_unicornEmu.Fpsr & (int)fpsrMask));
|
Assert.That((int)_context.Fpsr & (int)fpsrMask, Is.EqualTo(_unicornEmu.Fpsr & (int)fpsrMask), "Fpsr");
|
||||||
|
|
||||||
Assert.That(_context.GetPstateFlag(PState.VFlag), Is.EqualTo(_unicornEmu.OverflowFlag));
|
Assert.Multiple(() =>
|
||||||
Assert.That(_context.GetPstateFlag(PState.CFlag), Is.EqualTo(_unicornEmu.CarryFlag));
|
{
|
||||||
Assert.That(_context.GetPstateFlag(PState.ZFlag), Is.EqualTo(_unicornEmu.ZeroFlag));
|
Assert.That(_context.GetPstateFlag(PState.VFlag), Is.EqualTo(_unicornEmu.OverflowFlag), "VFlag");
|
||||||
Assert.That(_context.GetPstateFlag(PState.NFlag), Is.EqualTo(_unicornEmu.NegativeFlag));
|
Assert.That(_context.GetPstateFlag(PState.CFlag), Is.EqualTo(_unicornEmu.CarryFlag), "CFlag");
|
||||||
|
Assert.That(_context.GetPstateFlag(PState.ZFlag), Is.EqualTo(_unicornEmu.ZeroFlag), "ZFlag");
|
||||||
|
Assert.That(_context.GetPstateFlag(PState.NFlag), Is.EqualTo(_unicornEmu.NegativeFlag), "NFlag");
|
||||||
|
});
|
||||||
|
|
||||||
|
if (_usingMemory)
|
||||||
|
{
|
||||||
|
byte[] mem = _memory.GetSpan(DataBaseAddress, (int)Size).ToArray();
|
||||||
|
byte[] unicornMem = _unicornEmu.MemoryRead(DataBaseAddress, Size);
|
||||||
|
|
||||||
|
Assert.That(mem, Is.EqualTo(unicornMem), "Data");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void ManageFpSkips(FpSkips fpSkips)
|
private void ManageFpSkips(FpSkips fpSkips)
|
||||||
|
@ -417,15 +468,18 @@ namespace Ryujinx.Tests.Cpu
|
||||||
{
|
{
|
||||||
if (IsNormalOrSubnormalS(_unicornEmu.Q[0].AsFloat()) &&
|
if (IsNormalOrSubnormalS(_unicornEmu.Q[0].AsFloat()) &&
|
||||||
IsNormalOrSubnormalS(_context.GetV(0).As<float>()))
|
IsNormalOrSubnormalS(_context.GetV(0).As<float>()))
|
||||||
|
{
|
||||||
|
Assert.Multiple(() =>
|
||||||
{
|
{
|
||||||
Assert.That (_context.GetV(0).Extract<float>(0),
|
Assert.That (_context.GetV(0).Extract<float>(0),
|
||||||
Is.EqualTo(_unicornEmu.Q[0].GetFloat(0)).Within(1).Ulps);
|
Is.EqualTo(_unicornEmu.Q[0].GetFloat(0)).Within(1).Ulps, "V0[0]");
|
||||||
Assert.That (_context.GetV(0).Extract<float>(1),
|
Assert.That (_context.GetV(0).Extract<float>(1),
|
||||||
Is.EqualTo(_unicornEmu.Q[0].GetFloat(1)).Within(1).Ulps);
|
Is.EqualTo(_unicornEmu.Q[0].GetFloat(1)).Within(1).Ulps, "V0[1]");
|
||||||
Assert.That (_context.GetV(0).Extract<float>(2),
|
Assert.That (_context.GetV(0).Extract<float>(2),
|
||||||
Is.EqualTo(_unicornEmu.Q[0].GetFloat(2)).Within(1).Ulps);
|
Is.EqualTo(_unicornEmu.Q[0].GetFloat(2)).Within(1).Ulps, "V0[2]");
|
||||||
Assert.That (_context.GetV(0).Extract<float>(3),
|
Assert.That (_context.GetV(0).Extract<float>(3),
|
||||||
Is.EqualTo(_unicornEmu.Q[0].GetFloat(3)).Within(1).Ulps);
|
Is.EqualTo(_unicornEmu.Q[0].GetFloat(3)).Within(1).Ulps, "V0[3]");
|
||||||
|
});
|
||||||
|
|
||||||
Console.WriteLine(fpTolerances);
|
Console.WriteLine(fpTolerances);
|
||||||
}
|
}
|
||||||
|
@ -439,11 +493,14 @@ namespace Ryujinx.Tests.Cpu
|
||||||
{
|
{
|
||||||
if (IsNormalOrSubnormalD(_unicornEmu.Q[0].AsDouble()) &&
|
if (IsNormalOrSubnormalD(_unicornEmu.Q[0].AsDouble()) &&
|
||||||
IsNormalOrSubnormalD(_context.GetV(0).As<double>()))
|
IsNormalOrSubnormalD(_context.GetV(0).As<double>()))
|
||||||
|
{
|
||||||
|
Assert.Multiple(() =>
|
||||||
{
|
{
|
||||||
Assert.That (_context.GetV(0).Extract<double>(0),
|
Assert.That (_context.GetV(0).Extract<double>(0),
|
||||||
Is.EqualTo(_unicornEmu.Q[0].GetDouble(0)).Within(1).Ulps);
|
Is.EqualTo(_unicornEmu.Q[0].GetDouble(0)).Within(1).Ulps, "V0[0]");
|
||||||
Assert.That (_context.GetV(0).Extract<double>(1),
|
Assert.That (_context.GetV(0).Extract<double>(1),
|
||||||
Is.EqualTo(_unicornEmu.Q[0].GetDouble(1)).Within(1).Ulps);
|
Is.EqualTo(_unicornEmu.Q[0].GetDouble(1)).Within(1).Ulps, "V0[1]");
|
||||||
|
});
|
||||||
|
|
||||||
Console.WriteLine(fpTolerances);
|
Console.WriteLine(fpTolerances);
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,15 +4,67 @@ using ARMeilleure.State;
|
||||||
|
|
||||||
using NUnit.Framework;
|
using NUnit.Framework;
|
||||||
|
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
|
||||||
namespace Ryujinx.Tests.Cpu
|
namespace Ryujinx.Tests.Cpu
|
||||||
{
|
{
|
||||||
[Category("Misc")]
|
[Category("Misc")]
|
||||||
public sealed class CpuTestMisc : CpuTest
|
public sealed class CpuTestMisc : CpuTest
|
||||||
{
|
{
|
||||||
#if Misc
|
#if Misc
|
||||||
|
|
||||||
|
#region "ValueSource (Types)"
|
||||||
|
private static IEnumerable<ulong> _1S_F_()
|
||||||
|
{
|
||||||
|
yield return 0x00000000FF7FFFFFul; // -Max Normal (float.MinValue)
|
||||||
|
yield return 0x0000000080800000ul; // -Min Normal
|
||||||
|
yield return 0x00000000807FFFFFul; // -Max Subnormal
|
||||||
|
yield return 0x0000000080000001ul; // -Min Subnormal (-float.Epsilon)
|
||||||
|
yield return 0x000000007F7FFFFFul; // +Max Normal (float.MaxValue)
|
||||||
|
yield return 0x0000000000800000ul; // +Min Normal
|
||||||
|
yield return 0x00000000007FFFFFul; // +Max Subnormal
|
||||||
|
yield return 0x0000000000000001ul; // +Min Subnormal (float.Epsilon)
|
||||||
|
|
||||||
|
if (!NoZeros)
|
||||||
|
{
|
||||||
|
yield return 0x0000000080000000ul; // -Zero
|
||||||
|
yield return 0x0000000000000000ul; // +Zero
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!NoInfs)
|
||||||
|
{
|
||||||
|
yield return 0x00000000FF800000ul; // -Infinity
|
||||||
|
yield return 0x000000007F800000ul; // +Infinity
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!NoNaNs)
|
||||||
|
{
|
||||||
|
yield return 0x00000000FFC00000ul; // -QNaN (all zeros payload) (float.NaN)
|
||||||
|
yield return 0x00000000FFBFFFFFul; // -SNaN (all ones payload)
|
||||||
|
yield return 0x000000007FC00000ul; // +QNaN (all zeros payload) (-float.NaN) (DefaultNaN)
|
||||||
|
yield return 0x000000007FBFFFFFul; // +SNaN (all ones payload)
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int cnt = 1; cnt <= RndCnt; cnt++)
|
||||||
|
{
|
||||||
|
ulong grbg = TestContext.CurrentContext.Random.NextUInt();
|
||||||
|
ulong rnd1 = GenNormalS();
|
||||||
|
ulong rnd2 = GenSubnormalS();
|
||||||
|
|
||||||
|
yield return (grbg << 32) | rnd1;
|
||||||
|
yield return (grbg << 32) | rnd2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endregion
|
||||||
|
|
||||||
private const int RndCnt = 2;
|
private const int RndCnt = 2;
|
||||||
private const int RndCntImm = 2;
|
private const int RndCntImm = 2;
|
||||||
|
|
||||||
|
private static readonly bool NoZeros = false;
|
||||||
|
private static readonly bool NoInfs = false;
|
||||||
|
private static readonly bool NoNaNs = false;
|
||||||
|
|
||||||
#region "AluImm & Csel"
|
#region "AluImm & Csel"
|
||||||
[Test, Pairwise]
|
[Test, Pairwise]
|
||||||
public void Adds_Csinc_64bit([Values(0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul,
|
public void Adds_Csinc_64bit([Values(0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul,
|
||||||
|
@ -357,6 +409,77 @@ namespace Ryujinx.Tests.Cpu
|
||||||
|
|
||||||
Assert.That(context.GetX(0), Is.EqualTo(a));
|
Assert.That(context.GetX(0), Is.EqualTo(a));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Explicit]
|
||||||
|
[Test, Pairwise]
|
||||||
|
public void Misc4([ValueSource("_1S_F_")] ulong a,
|
||||||
|
[ValueSource("_1S_F_")] ulong b,
|
||||||
|
[ValueSource("_1S_F_")] ulong c,
|
||||||
|
[Values(0ul, 1ul, 2ul, 3ul)] ulong displacement)
|
||||||
|
{
|
||||||
|
if (!BitConverter.IsLittleEndian)
|
||||||
|
{
|
||||||
|
Assert.Ignore();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (ulong gapOffset = 0; gapOffset < displacement; gapOffset++)
|
||||||
|
{
|
||||||
|
SetWorkingMemory(gapOffset, TestContext.CurrentContext.Random.NextByte());
|
||||||
|
}
|
||||||
|
|
||||||
|
SetWorkingMemory(0x0 + displacement, BitConverter.GetBytes((uint)b));
|
||||||
|
|
||||||
|
SetWorkingMemory(0x4 + displacement, BitConverter.GetBytes((uint)c));
|
||||||
|
|
||||||
|
SetWorkingMemory(0x8 + displacement, TestContext.CurrentContext.Random.NextByte());
|
||||||
|
SetWorkingMemory(0x9 + displacement, TestContext.CurrentContext.Random.NextByte());
|
||||||
|
SetWorkingMemory(0xA + displacement, TestContext.CurrentContext.Random.NextByte());
|
||||||
|
SetWorkingMemory(0xB + displacement, TestContext.CurrentContext.Random.NextByte());
|
||||||
|
|
||||||
|
SetContext(
|
||||||
|
x0: DataBaseAddress + displacement,
|
||||||
|
v0: MakeVectorE0E1(a, TestContext.CurrentContext.Random.NextULong()),
|
||||||
|
v1: MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()),
|
||||||
|
v2: MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()),
|
||||||
|
overflow: TestContext.CurrentContext.Random.NextBool(),
|
||||||
|
carry: TestContext.CurrentContext.Random.NextBool(),
|
||||||
|
zero: TestContext.CurrentContext.Random.NextBool(),
|
||||||
|
negative: TestContext.CurrentContext.Random.NextBool());
|
||||||
|
|
||||||
|
Opcode(0xBD400001); // LDR S1, [X0,#0]
|
||||||
|
Opcode(0xBD400402); // LDR S2, [X0,#4]
|
||||||
|
Opcode(0x1E215801); // FMIN S1, S0, S1
|
||||||
|
Opcode(0x1E222000); // FCMP S0, S2
|
||||||
|
Opcode(0x1E214C40); // FCSEL S0, S2, S1, MI
|
||||||
|
Opcode(0xBD000800); // STR S0, [X0,#8]
|
||||||
|
Opcode(0xD65F03C0); // RET
|
||||||
|
ExecuteOpcodes();
|
||||||
|
|
||||||
|
CompareAgainstUnicorn();
|
||||||
|
}
|
||||||
|
|
||||||
|
[Explicit]
|
||||||
|
[Test]
|
||||||
|
public void Misc5([ValueSource("_1S_F_")] ulong a)
|
||||||
|
{
|
||||||
|
SetContext(
|
||||||
|
v0: MakeVectorE0E1(a, TestContext.CurrentContext.Random.NextULong()),
|
||||||
|
v1: MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()),
|
||||||
|
overflow: TestContext.CurrentContext.Random.NextBool(),
|
||||||
|
carry: TestContext.CurrentContext.Random.NextBool(),
|
||||||
|
zero: TestContext.CurrentContext.Random.NextBool(),
|
||||||
|
negative: TestContext.CurrentContext.Random.NextBool());
|
||||||
|
|
||||||
|
Opcode(0x1E202008); // FCMP S0, #0.0
|
||||||
|
Opcode(0x1E2E1001); // FMOV S1, #1.0
|
||||||
|
Opcode(0x1E215800); // FMIN S0, S0, S1
|
||||||
|
Opcode(0x1E2703E1); // FMOV S1, WZR
|
||||||
|
Opcode(0x1E204C20); // FCSEL S0, S1, S0, MI
|
||||||
|
Opcode(0xD65F03C0); // RET
|
||||||
|
ExecuteOpcodes();
|
||||||
|
|
||||||
|
CompareAgainstUnicorn();
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -918,7 +918,9 @@ namespace Ryujinx.Tests.Cpu
|
||||||
return new uint[]
|
return new uint[]
|
||||||
{
|
{
|
||||||
0x6E30C800u, // FMAXNMV S0, V0.4S
|
0x6E30C800u, // FMAXNMV S0, V0.4S
|
||||||
0x6EB0C800u // FMINNMV S0, V0.4S
|
0x6E30F800u, // FMAXV S0, V0.4S
|
||||||
|
0x6EB0C800u, // FMINNMV S0, V0.4S
|
||||||
|
0x6EB0F800u // FMINV S0, V0.4S
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -375,9 +375,11 @@ namespace Ryujinx.Tests.Cpu
|
||||||
{
|
{
|
||||||
0x0E20F400u, // FMAX V0.2S, V0.2S, V0.2S
|
0x0E20F400u, // FMAX V0.2S, V0.2S, V0.2S
|
||||||
0x0E20C400u, // FMAXNM V0.2S, V0.2S, V0.2S
|
0x0E20C400u, // FMAXNM V0.2S, V0.2S, V0.2S
|
||||||
|
0x2E20C400u, // FMAXNMP V0.2S, V0.2S, V0.2S
|
||||||
0x2E20F400u, // FMAXP V0.2S, V0.2S, V0.2S
|
0x2E20F400u, // FMAXP V0.2S, V0.2S, V0.2S
|
||||||
0x0EA0F400u, // FMIN V0.2S, V0.2S, V0.2S
|
0x0EA0F400u, // FMIN V0.2S, V0.2S, V0.2S
|
||||||
0x0EA0C400u, // FMINNM V0.2S, V0.2S, V0.2S
|
0x0EA0C400u, // FMINNM V0.2S, V0.2S, V0.2S
|
||||||
|
0x2EA0C400u, // FMINNMP V0.2S, V0.2S, V0.2S
|
||||||
0x2EA0F400u // FMINP V0.2S, V0.2S, V0.2S
|
0x2EA0F400u // FMINP V0.2S, V0.2S, V0.2S
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -388,9 +390,11 @@ namespace Ryujinx.Tests.Cpu
|
||||||
{
|
{
|
||||||
0x4E60F400u, // FMAX V0.2D, V0.2D, V0.2D
|
0x4E60F400u, // FMAX V0.2D, V0.2D, V0.2D
|
||||||
0x4E60C400u, // FMAXNM V0.2D, V0.2D, V0.2D
|
0x4E60C400u, // FMAXNM V0.2D, V0.2D, V0.2D
|
||||||
|
0x6E60C400u, // FMAXNMP V0.2D, V0.2D, V0.2D
|
||||||
0x6E60F400u, // FMAXP V0.2D, V0.2D, V0.2D
|
0x6E60F400u, // FMAXP V0.2D, V0.2D, V0.2D
|
||||||
0x4EE0F400u, // FMIN V0.2D, V0.2D, V0.2D
|
0x4EE0F400u, // FMIN V0.2D, V0.2D, V0.2D
|
||||||
0x4EE0C400u, // FMINNM V0.2D, V0.2D, V0.2D
|
0x4EE0C400u, // FMINNM V0.2D, V0.2D, V0.2D
|
||||||
|
0x6EE0C400u, // FMINNMP V0.2D, V0.2D, V0.2D
|
||||||
0x6EE0F400u // FMINP V0.2D, V0.2D, V0.2D
|
0x6EE0F400u // FMINP V0.2D, V0.2D, V0.2D
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -531,6 +535,15 @@ namespace Ryujinx.Tests.Cpu
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static uint[] _ShlReg_S_D_()
|
||||||
|
{
|
||||||
|
return new uint[]
|
||||||
|
{
|
||||||
|
0x5EE04400u, // SSHL D0, D0, D0
|
||||||
|
0x7EE04400u // USHL D0, D0, D0
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
private static uint[] _ShlReg_V_8B_4H_2S_()
|
private static uint[] _ShlReg_V_8B_4H_2S_()
|
||||||
{
|
{
|
||||||
return new uint[]
|
return new uint[]
|
||||||
|
@ -2820,6 +2833,26 @@ namespace Ryujinx.Tests.Cpu
|
||||||
CompareAgainstUnicorn();
|
CompareAgainstUnicorn();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Test, Pairwise]
|
||||||
|
public void ShlReg_S_D([ValueSource("_ShlReg_S_D_")] uint opcodes,
|
||||||
|
[Values(0u)] uint rd,
|
||||||
|
[Values(1u, 0u)] uint rn,
|
||||||
|
[Values(2u, 0u)] uint rm,
|
||||||
|
[ValueSource("_1D_")] [Random(RndCnt)] ulong z,
|
||||||
|
[ValueSource("_1D_")] [Random(RndCnt)] ulong a,
|
||||||
|
[ValueSource("_1D_")] [Random(0ul, 255ul, RndCnt)] ulong b)
|
||||||
|
{
|
||||||
|
opcodes |= ((rm & 31) << 16) | ((rn & 31) << 5) | ((rd & 31) << 0);
|
||||||
|
|
||||||
|
V128 v0 = MakeVectorE0E1(z, z);
|
||||||
|
V128 v1 = MakeVectorE0(a);
|
||||||
|
V128 v2 = MakeVectorE0(b);
|
||||||
|
|
||||||
|
SingleOpcode(opcodes, v0: v0, v1: v1, v2: v2);
|
||||||
|
|
||||||
|
CompareAgainstUnicorn(fpsrMask: Fpsr.Qc);
|
||||||
|
}
|
||||||
|
|
||||||
[Test, Pairwise]
|
[Test, Pairwise]
|
||||||
public void ShlReg_V_8B_4H_2S([ValueSource("_ShlReg_V_8B_4H_2S_")] uint opcodes,
|
public void ShlReg_V_8B_4H_2S([ValueSource("_ShlReg_V_8B_4H_2S_")] uint opcodes,
|
||||||
[Values(0u)] uint rd,
|
[Values(0u)] uint rd,
|
||||||
|
|
Loading…
Reference in a new issue