Remove cold methods from the CPU cache (#224)
* Remove unused tracing functionality from the CPU * GetNsoExecutable -> GetExecutable * Unsigned comparison * Re-add cpu tracing * Config change * Remove cold methods from the translation cache on the cpu * Replace lock with try lock, pass new ATranslatorCache instead of ATranslator * Rebase fixups
This commit is contained in:
parent
c08479877e
commit
0cf462669d
9 changed files with 216 additions and 173 deletions
|
@ -2,8 +2,6 @@ using System.Runtime.Intrinsics.X86;
|
|||
|
||||
public static class AOptimizations
|
||||
{
|
||||
public static bool GenerateCallStack = true;
|
||||
|
||||
private static bool UseAllSseIfAvailable = true;
|
||||
|
||||
private static bool UseSseIfAvailable = true;
|
||||
|
|
|
@ -13,6 +13,8 @@ namespace ChocolArm64
|
|||
{
|
||||
private delegate long AA64Subroutine(AThreadState Register, AMemory Memory);
|
||||
|
||||
private const int MinCallCountForReJit = 250;
|
||||
|
||||
private AA64Subroutine ExecDelegate;
|
||||
|
||||
public static int StateArgIdx { get; private set; }
|
||||
|
@ -32,8 +34,6 @@ namespace ChocolArm64
|
|||
|
||||
private bool NeedsReJit;
|
||||
|
||||
private int MinCallCountForReJit = 250;
|
||||
|
||||
public ATranslatedSub(DynamicMethod Method, List<ARegister> Params)
|
||||
{
|
||||
if (Method == null)
|
||||
|
|
|
@ -1,38 +1,24 @@
|
|||
using ChocolArm64.Decoder;
|
||||
using ChocolArm64.Events;
|
||||
using ChocolArm64.Instruction;
|
||||
using ChocolArm64.Memory;
|
||||
using ChocolArm64.State;
|
||||
using ChocolArm64.Translation;
|
||||
using System;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Generic;
|
||||
using System.Reflection.Emit;
|
||||
|
||||
namespace ChocolArm64
|
||||
{
|
||||
public class ATranslator
|
||||
{
|
||||
private ConcurrentDictionary<long, ATranslatedSub> CachedSubs;
|
||||
|
||||
private ConcurrentDictionary<long, string> SymbolTable;
|
||||
private ATranslatorCache Cache;
|
||||
|
||||
public event EventHandler<ACpuTraceEventArgs> CpuTrace;
|
||||
|
||||
public bool EnableCpuTrace { get; set; }
|
||||
|
||||
public ATranslator(IReadOnlyDictionary<long, string> SymbolTable = null)
|
||||
public ATranslator()
|
||||
{
|
||||
CachedSubs = new ConcurrentDictionary<long, ATranslatedSub>();
|
||||
|
||||
if (SymbolTable != null)
|
||||
{
|
||||
this.SymbolTable = new ConcurrentDictionary<long, string>(SymbolTable);
|
||||
}
|
||||
else
|
||||
{
|
||||
this.SymbolTable = new ConcurrentDictionary<long, string>();
|
||||
}
|
||||
Cache = new ATranslatorCache();
|
||||
}
|
||||
|
||||
internal void ExecuteSubroutine(AThread Thread, long Position)
|
||||
|
@ -70,15 +56,10 @@ namespace ChocolArm64
|
|||
{
|
||||
if (EnableCpuTrace)
|
||||
{
|
||||
if (!SymbolTable.TryGetValue(Position, out string SubName))
|
||||
{
|
||||
SubName = string.Empty;
|
||||
CpuTrace?.Invoke(this, new ACpuTraceEventArgs(Position));
|
||||
}
|
||||
|
||||
CpuTrace?.Invoke(this, new ACpuTraceEventArgs(Position, SubName));
|
||||
}
|
||||
|
||||
if (!CachedSubs.TryGetValue(Position, out ATranslatedSub Sub))
|
||||
if (!Cache.TryGetSubroutine(Position, out ATranslatedSub Sub))
|
||||
{
|
||||
Sub = TranslateTier0(State, Memory, Position);
|
||||
}
|
||||
|
@ -93,37 +74,20 @@ namespace ChocolArm64
|
|||
while (Position != 0 && State.Running);
|
||||
}
|
||||
|
||||
internal bool TryGetCachedSub(AOpCode OpCode, out ATranslatedSub Sub)
|
||||
{
|
||||
if (OpCode.Emitter != AInstEmit.Bl)
|
||||
{
|
||||
Sub = null;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return TryGetCachedSub(((AOpCodeBImmAl)OpCode).Imm, out Sub);
|
||||
}
|
||||
|
||||
internal bool TryGetCachedSub(long Position, out ATranslatedSub Sub)
|
||||
{
|
||||
return CachedSubs.TryGetValue(Position, out Sub);
|
||||
}
|
||||
|
||||
internal bool HasCachedSub(long Position)
|
||||
{
|
||||
return CachedSubs.ContainsKey(Position);
|
||||
return Cache.HasSubroutine(Position);
|
||||
}
|
||||
|
||||
private ATranslatedSub TranslateTier0(AThreadState State, AMemory Memory, long Position)
|
||||
{
|
||||
ABlock Block = ADecoder.DecodeBasicBlock(State, this, Memory, Position);
|
||||
ABlock Block = ADecoder.DecodeBasicBlock(State, Memory, Position);
|
||||
|
||||
ABlock[] Graph = new ABlock[] { Block };
|
||||
|
||||
string SubName = GetSubName(Position);
|
||||
string SubName = GetSubroutineName(Position);
|
||||
|
||||
AILEmitterCtx Context = new AILEmitterCtx(this, Graph, Block, SubName);
|
||||
AILEmitterCtx Context = new AILEmitterCtx(Cache, Graph, Block, SubName);
|
||||
|
||||
do
|
||||
{
|
||||
|
@ -135,7 +99,7 @@ namespace ChocolArm64
|
|||
|
||||
Subroutine.SetType(ATranslatedSubType.SubTier0);
|
||||
|
||||
CachedSubs.AddOrUpdate(Position, Subroutine, (Key, OldVal) => Subroutine);
|
||||
Cache.AddOrUpdate(Position, Subroutine, Block.OpCodes.Count);
|
||||
|
||||
AOpCode LastOp = Block.GetLastOp();
|
||||
|
||||
|
@ -144,13 +108,11 @@ namespace ChocolArm64
|
|||
|
||||
private void TranslateTier1(AThreadState State, AMemory Memory, long Position)
|
||||
{
|
||||
(ABlock[] Graph, ABlock Root) Cfg = ADecoder.DecodeSubroutine(State, this, Memory, Position);
|
||||
(ABlock[] Graph, ABlock Root) = ADecoder.DecodeSubroutine(Cache, State, Memory, Position);
|
||||
|
||||
string SubName = GetSubName(Position);
|
||||
string SubName = GetSubroutineName(Position);
|
||||
|
||||
PropagateName(Cfg.Graph, SubName);
|
||||
|
||||
AILEmitterCtx Context = new AILEmitterCtx(this, Cfg.Graph, Cfg.Root, SubName);
|
||||
AILEmitterCtx Context = new AILEmitterCtx(Cache, Graph, Root, SubName);
|
||||
|
||||
if (Context.CurrBlock.Position != Position)
|
||||
{
|
||||
|
@ -165,11 +127,11 @@ namespace ChocolArm64
|
|||
|
||||
//Mark all methods that calls this method for ReJiting,
|
||||
//since we can now call it directly which is faster.
|
||||
if (CachedSubs.TryGetValue(Position, out ATranslatedSub OldSub))
|
||||
if (Cache.TryGetSubroutine(Position, out ATranslatedSub OldSub))
|
||||
{
|
||||
foreach (long CallerPos in OldSub.GetCallerPositions())
|
||||
{
|
||||
if (CachedSubs.TryGetValue(Position, out ATranslatedSub CallerSub))
|
||||
if (Cache.TryGetSubroutine(Position, out ATranslatedSub CallerSub))
|
||||
{
|
||||
CallerSub.MarkForReJit();
|
||||
}
|
||||
|
@ -180,27 +142,24 @@ namespace ChocolArm64
|
|||
|
||||
Subroutine.SetType(ATranslatedSubType.SubTier1);
|
||||
|
||||
CachedSubs.AddOrUpdate(Position, Subroutine, (Key, OldVal) => Subroutine);
|
||||
Cache.AddOrUpdate(Position, Subroutine, GetGraphInstCount(Graph));
|
||||
}
|
||||
|
||||
private string GetSubName(long Position)
|
||||
private string GetSubroutineName(long Position)
|
||||
{
|
||||
return SymbolTable.GetOrAdd(Position, $"Sub{Position:x16}");
|
||||
return $"Sub{Position:x16}";
|
||||
}
|
||||
|
||||
private void PropagateName(ABlock[] Graph, string Name)
|
||||
private int GetGraphInstCount(ABlock[] Graph)
|
||||
{
|
||||
int Size = 0;
|
||||
|
||||
foreach (ABlock Block in Graph)
|
||||
{
|
||||
AOpCode LastOp = Block.GetLastOp();
|
||||
Size += Block.OpCodes.Count;
|
||||
}
|
||||
|
||||
if (LastOp != null &&
|
||||
(LastOp.Emitter == AInstEmit.Bl ||
|
||||
LastOp.Emitter == AInstEmit.Blr))
|
||||
{
|
||||
SymbolTable.TryAdd(LastOp.Position + 4, Name);
|
||||
}
|
||||
}
|
||||
return Size;
|
||||
}
|
||||
}
|
||||
}
|
164
ATranslatorCache.cs
Normal file
164
ATranslatorCache.cs
Normal file
|
@ -0,0 +1,164 @@
|
|||
using System;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Generic;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Threading;
|
||||
|
||||
namespace ChocolArm64
|
||||
{
|
||||
class ATranslatorCache
|
||||
{
|
||||
private const int MaxTotalSize = 2 * 1024 * 256;
|
||||
private const int MaxTimeDelta = 30000;
|
||||
private const int MinCallCountForUpdate = 1000;
|
||||
|
||||
private class CacheBucket
|
||||
{
|
||||
public ATranslatedSub Subroutine { get; private set; }
|
||||
|
||||
public LinkedListNode<long> Node { get; private set; }
|
||||
|
||||
public int CallCount { get; set; }
|
||||
|
||||
public int Size { get; private set; }
|
||||
|
||||
public int Timestamp { get; private set; }
|
||||
|
||||
public CacheBucket(ATranslatedSub Subroutine, LinkedListNode<long> Node, int Size)
|
||||
{
|
||||
this.Subroutine = Subroutine;
|
||||
this.Size = Size;
|
||||
|
||||
UpdateNode(Node);
|
||||
}
|
||||
|
||||
public void UpdateNode(LinkedListNode<long> Node)
|
||||
{
|
||||
this.Node = Node;
|
||||
|
||||
Timestamp = Environment.TickCount;
|
||||
}
|
||||
}
|
||||
|
||||
private ConcurrentDictionary<long, CacheBucket> Cache;
|
||||
|
||||
private LinkedList<long> SortedCache;
|
||||
|
||||
private int TotalSize;
|
||||
|
||||
public ATranslatorCache()
|
||||
{
|
||||
Cache = new ConcurrentDictionary<long, CacheBucket>();
|
||||
|
||||
SortedCache = new LinkedList<long>();
|
||||
}
|
||||
|
||||
public void AddOrUpdate(long Position, ATranslatedSub Subroutine, int Size)
|
||||
{
|
||||
ClearCacheIfNeeded();
|
||||
|
||||
TotalSize += Size;
|
||||
|
||||
lock (SortedCache)
|
||||
{
|
||||
LinkedListNode<long> Node = SortedCache.AddLast(Position);
|
||||
|
||||
CacheBucket NewBucket = new CacheBucket(Subroutine, Node, Size);
|
||||
|
||||
Cache.AddOrUpdate(Position, NewBucket, (Key, Bucket) =>
|
||||
{
|
||||
TotalSize -= Bucket.Size;
|
||||
|
||||
SortedCache.Remove(Bucket.Node);
|
||||
|
||||
return NewBucket;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
public bool HasSubroutine(long Position)
|
||||
{
|
||||
return Cache.ContainsKey(Position);
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public bool TryGetSubroutine(long Position, out ATranslatedSub Subroutine)
|
||||
{
|
||||
if (Cache.TryGetValue(Position, out CacheBucket Bucket))
|
||||
{
|
||||
if (Bucket.CallCount++ > MinCallCountForUpdate)
|
||||
{
|
||||
if (Monitor.TryEnter(SortedCache))
|
||||
{
|
||||
try
|
||||
{
|
||||
Bucket.CallCount = 0;
|
||||
|
||||
SortedCache.Remove(Bucket.Node);
|
||||
|
||||
Bucket.UpdateNode(SortedCache.AddLast(Position));
|
||||
}
|
||||
finally
|
||||
{
|
||||
Monitor.Exit(SortedCache);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Subroutine = Bucket.Subroutine;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
Subroutine = default(ATranslatedSub);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private void ClearCacheIfNeeded()
|
||||
{
|
||||
int Timestamp = Environment.TickCount;
|
||||
|
||||
while (TotalSize > MaxTotalSize)
|
||||
{
|
||||
lock (SortedCache)
|
||||
{
|
||||
LinkedListNode<long> Node = SortedCache.First;
|
||||
|
||||
if (Node == null)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
CacheBucket Bucket = Cache[Node.Value];
|
||||
|
||||
int TimeDelta = RingDelta(Bucket.Timestamp, Timestamp);
|
||||
|
||||
if ((uint)TimeDelta <= (uint)MaxTimeDelta)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (Cache.TryRemove(Node.Value, out Bucket))
|
||||
{
|
||||
TotalSize -= Bucket.Size;
|
||||
|
||||
SortedCache.Remove(Bucket.Node);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static int RingDelta(int Old, int New)
|
||||
{
|
||||
if ((uint)New < (uint)Old)
|
||||
{
|
||||
return New + (~Old + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
return New - Old;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -19,11 +19,7 @@ namespace ChocolArm64.Decoder
|
|||
OpActivators = new ConcurrentDictionary<Type, OpActivator>();
|
||||
}
|
||||
|
||||
public static ABlock DecodeBasicBlock(
|
||||
AThreadState State,
|
||||
ATranslator Translator,
|
||||
AMemory Memory,
|
||||
long Start)
|
||||
public static ABlock DecodeBasicBlock(AThreadState State, AMemory Memory, long Start)
|
||||
{
|
||||
ABlock Block = new ABlock(Start);
|
||||
|
||||
|
@ -33,8 +29,8 @@ namespace ChocolArm64.Decoder
|
|||
}
|
||||
|
||||
public static (ABlock[] Graph, ABlock Root) DecodeSubroutine(
|
||||
ATranslatorCache Cache,
|
||||
AThreadState State,
|
||||
ATranslator Translator,
|
||||
AMemory Memory,
|
||||
long Start)
|
||||
{
|
||||
|
@ -79,7 +75,7 @@ namespace ChocolArm64.Decoder
|
|||
{
|
||||
if (Op.Emitter == AInstEmit.Bl)
|
||||
{
|
||||
HasCachedSub = Translator.HasCachedSub(Op.Imm);
|
||||
HasCachedSub = Cache.HasSubroutine(Op.Imm);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -6,12 +6,9 @@ namespace ChocolArm64.Events
|
|||
{
|
||||
public long Position { get; private set; }
|
||||
|
||||
public string SubName { get; private set; }
|
||||
|
||||
public ACpuTraceEventArgs(long Position, string SubName)
|
||||
public ACpuTraceEventArgs(long Position)
|
||||
{
|
||||
this.Position = Position;
|
||||
this.SubName = SubName;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -35,14 +35,6 @@ namespace ChocolArm64.Instruction
|
|||
{
|
||||
AOpCodeBImmAl Op = (AOpCodeBImmAl)Context.CurrOp;
|
||||
|
||||
if (AOptimizations.GenerateCallStack)
|
||||
{
|
||||
Context.EmitLdarg(ATranslatedSub.StateArgIdx);
|
||||
Context.EmitLdc_I8(Op.Imm);
|
||||
|
||||
Context.EmitPrivateCall(typeof(AThreadState), nameof(AThreadState.EnterMethod));
|
||||
}
|
||||
|
||||
Context.EmitLdc_I(Op.Position + 4);
|
||||
Context.EmitStint(AThreadState.LRIndex);
|
||||
Context.EmitStoreState();
|
||||
|
@ -80,14 +72,6 @@ namespace ChocolArm64.Instruction
|
|||
{
|
||||
AOpCodeBReg Op = (AOpCodeBReg)Context.CurrOp;
|
||||
|
||||
if (AOptimizations.GenerateCallStack)
|
||||
{
|
||||
Context.EmitLdarg(ATranslatedSub.StateArgIdx);
|
||||
Context.EmitLdintzr(Op.Rn);
|
||||
|
||||
Context.EmitPrivateCall(typeof(AThreadState), nameof(AThreadState.EnterMethod));
|
||||
}
|
||||
|
||||
Context.EmitLdc_I(Op.Position + 4);
|
||||
Context.EmitStint(AThreadState.LRIndex);
|
||||
Context.EmitStoreState();
|
||||
|
@ -100,14 +84,6 @@ namespace ChocolArm64.Instruction
|
|||
{
|
||||
AOpCodeBReg Op = (AOpCodeBReg)Context.CurrOp;
|
||||
|
||||
if (AOptimizations.GenerateCallStack)
|
||||
{
|
||||
Context.EmitLdarg(ATranslatedSub.StateArgIdx);
|
||||
Context.EmitLdintzr(Op.Rn);
|
||||
|
||||
Context.EmitPrivateCall(typeof(AThreadState), nameof(AThreadState.JumpMethod));
|
||||
}
|
||||
|
||||
Context.EmitStoreState();
|
||||
Context.EmitLdintzr(Op.Rn);
|
||||
|
||||
|
@ -129,13 +105,6 @@ namespace ChocolArm64.Instruction
|
|||
|
||||
public static void Ret(AILEmitterCtx Context)
|
||||
{
|
||||
if (AOptimizations.GenerateCallStack)
|
||||
{
|
||||
Context.EmitLdarg(ATranslatedSub.StateArgIdx);
|
||||
|
||||
Context.EmitPrivateCall(typeof(AThreadState), nameof(AThreadState.ExitMethod));
|
||||
}
|
||||
|
||||
Context.EmitStoreState();
|
||||
Context.EmitLdint(AThreadState.LRIndex);
|
||||
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
using ChocolArm64.Events;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.Intrinsics;
|
||||
|
@ -86,17 +85,10 @@ namespace ChocolArm64.State
|
|||
public event EventHandler<AInstExceptionEventArgs> SvcCall;
|
||||
public event EventHandler<AInstUndefinedEventArgs> Undefined;
|
||||
|
||||
private Stack<long> CallStack;
|
||||
|
||||
private static Stopwatch TickCounter;
|
||||
|
||||
private static double HostTickFreq;
|
||||
|
||||
public AThreadState()
|
||||
{
|
||||
CallStack = new Stack<long>();
|
||||
}
|
||||
|
||||
static AThreadState()
|
||||
{
|
||||
HostTickFreq = 1.0 / Stopwatch.Frequency;
|
||||
|
@ -153,27 +145,5 @@ namespace ChocolArm64.State
|
|||
{
|
||||
Undefined?.Invoke(this, new AInstUndefinedEventArgs(Position, RawOpCode));
|
||||
}
|
||||
|
||||
internal void EnterMethod(long Position)
|
||||
{
|
||||
CallStack.Push(Position);
|
||||
}
|
||||
|
||||
internal void ExitMethod()
|
||||
{
|
||||
CallStack.TryPop(out _);
|
||||
}
|
||||
|
||||
internal void JumpMethod(long Position)
|
||||
{
|
||||
CallStack.TryPop(out _);
|
||||
|
||||
CallStack.Push(Position);
|
||||
}
|
||||
|
||||
public long[] GetCallStack()
|
||||
{
|
||||
return CallStack.ToArray();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -10,7 +10,7 @@ namespace ChocolArm64.Translation
|
|||
{
|
||||
class AILEmitterCtx
|
||||
{
|
||||
private ATranslator Translator;
|
||||
private ATranslatorCache Cache;
|
||||
|
||||
private Dictionary<long, AILLabel> Labels;
|
||||
|
||||
|
@ -40,29 +40,14 @@ namespace ChocolArm64.Translation
|
|||
private const int Tmp5Index = -5;
|
||||
|
||||
public AILEmitterCtx(
|
||||
ATranslator Translator,
|
||||
ATranslatorCache Cache,
|
||||
ABlock[] Graph,
|
||||
ABlock Root,
|
||||
string SubName)
|
||||
{
|
||||
if (Translator == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(Translator));
|
||||
}
|
||||
|
||||
if (Graph == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(Graph));
|
||||
}
|
||||
|
||||
if (Root == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(Root));
|
||||
}
|
||||
|
||||
this.Translator = Translator;
|
||||
this.Graph = Graph;
|
||||
this.Root = Root;
|
||||
this.Cache = Cache ?? throw new ArgumentNullException(nameof(Cache));
|
||||
this.Graph = Graph ?? throw new ArgumentNullException(nameof(Graph));
|
||||
this.Root = Root ?? throw new ArgumentNullException(nameof(Root));
|
||||
|
||||
Labels = new Dictionary<long, AILLabel>();
|
||||
|
||||
|
@ -147,7 +132,12 @@ namespace ChocolArm64.Translation
|
|||
return false;
|
||||
}
|
||||
|
||||
if (!Translator.TryGetCachedSub(CurrOp, out ATranslatedSub Sub))
|
||||
if (CurrOp.Emitter != AInstEmit.Bl)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!Cache.TryGetSubroutine(((AOpCodeBImmAl)CurrOp).Imm, out ATranslatedSub Subroutine))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
@ -157,7 +147,7 @@ namespace ChocolArm64.Translation
|
|||
EmitLdarg(Index);
|
||||
}
|
||||
|
||||
foreach (ARegister Reg in Sub.Params)
|
||||
foreach (ARegister Reg in Subroutine.Params)
|
||||
{
|
||||
switch (Reg.Type)
|
||||
{
|
||||
|
@ -167,9 +157,9 @@ namespace ChocolArm64.Translation
|
|||
}
|
||||
}
|
||||
|
||||
EmitCall(Sub.Method);
|
||||
EmitCall(Subroutine.Method);
|
||||
|
||||
Sub.AddCaller(Root.Position);
|
||||
Subroutine.AddCaller(Root.Position);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue