Remove cold methods from the CPU cache (#224)

* Remove unused tracing functionality from the CPU

* GetNsoExecutable -> GetExecutable

* Unsigned comparison

* Re-add cpu tracing

* Config change

* Remove cold methods from the translation cache on the cpu

* Replace lock with try lock, pass new ATranslatorCache instead of ATranslator

* Rebase fixups
This commit is contained in:
gdkchan 2018-09-19 17:07:56 -03:00 committed by GitHub
parent c08479877e
commit 0cf462669d
9 changed files with 216 additions and 173 deletions

View file

@ -2,8 +2,6 @@ using System.Runtime.Intrinsics.X86;
public static class AOptimizations
{
public static bool GenerateCallStack = true;
private static bool UseAllSseIfAvailable = true;
private static bool UseSseIfAvailable = true;

View file

@ -13,6 +13,8 @@ namespace ChocolArm64
{
private delegate long AA64Subroutine(AThreadState Register, AMemory Memory);
private const int MinCallCountForReJit = 250;
private AA64Subroutine ExecDelegate;
public static int StateArgIdx { get; private set; }
@ -32,8 +34,6 @@ namespace ChocolArm64
private bool NeedsReJit;
private int MinCallCountForReJit = 250;
public ATranslatedSub(DynamicMethod Method, List<ARegister> Params)
{
if (Method == null)

View file

@ -1,38 +1,24 @@
using ChocolArm64.Decoder;
using ChocolArm64.Events;
using ChocolArm64.Instruction;
using ChocolArm64.Memory;
using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Reflection.Emit;
namespace ChocolArm64
{
public class ATranslator
{
private ConcurrentDictionary<long, ATranslatedSub> CachedSubs;
private ConcurrentDictionary<long, string> SymbolTable;
private ATranslatorCache Cache;
public event EventHandler<ACpuTraceEventArgs> CpuTrace;
public bool EnableCpuTrace { get; set; }
public ATranslator(IReadOnlyDictionary<long, string> SymbolTable = null)
public ATranslator()
{
CachedSubs = new ConcurrentDictionary<long, ATranslatedSub>();
if (SymbolTable != null)
{
this.SymbolTable = new ConcurrentDictionary<long, string>(SymbolTable);
}
else
{
this.SymbolTable = new ConcurrentDictionary<long, string>();
}
Cache = new ATranslatorCache();
}
internal void ExecuteSubroutine(AThread Thread, long Position)
@ -70,15 +56,10 @@ namespace ChocolArm64
{
if (EnableCpuTrace)
{
if (!SymbolTable.TryGetValue(Position, out string SubName))
{
SubName = string.Empty;
CpuTrace?.Invoke(this, new ACpuTraceEventArgs(Position));
}
CpuTrace?.Invoke(this, new ACpuTraceEventArgs(Position, SubName));
}
if (!CachedSubs.TryGetValue(Position, out ATranslatedSub Sub))
if (!Cache.TryGetSubroutine(Position, out ATranslatedSub Sub))
{
Sub = TranslateTier0(State, Memory, Position);
}
@ -93,37 +74,20 @@ namespace ChocolArm64
while (Position != 0 && State.Running);
}
internal bool TryGetCachedSub(AOpCode OpCode, out ATranslatedSub Sub)
{
if (OpCode.Emitter != AInstEmit.Bl)
{
Sub = null;
return false;
}
return TryGetCachedSub(((AOpCodeBImmAl)OpCode).Imm, out Sub);
}
internal bool TryGetCachedSub(long Position, out ATranslatedSub Sub)
{
return CachedSubs.TryGetValue(Position, out Sub);
}
internal bool HasCachedSub(long Position)
{
return CachedSubs.ContainsKey(Position);
return Cache.HasSubroutine(Position);
}
private ATranslatedSub TranslateTier0(AThreadState State, AMemory Memory, long Position)
{
ABlock Block = ADecoder.DecodeBasicBlock(State, this, Memory, Position);
ABlock Block = ADecoder.DecodeBasicBlock(State, Memory, Position);
ABlock[] Graph = new ABlock[] { Block };
string SubName = GetSubName(Position);
string SubName = GetSubroutineName(Position);
AILEmitterCtx Context = new AILEmitterCtx(this, Graph, Block, SubName);
AILEmitterCtx Context = new AILEmitterCtx(Cache, Graph, Block, SubName);
do
{
@ -135,7 +99,7 @@ namespace ChocolArm64
Subroutine.SetType(ATranslatedSubType.SubTier0);
CachedSubs.AddOrUpdate(Position, Subroutine, (Key, OldVal) => Subroutine);
Cache.AddOrUpdate(Position, Subroutine, Block.OpCodes.Count);
AOpCode LastOp = Block.GetLastOp();
@ -144,13 +108,11 @@ namespace ChocolArm64
private void TranslateTier1(AThreadState State, AMemory Memory, long Position)
{
(ABlock[] Graph, ABlock Root) Cfg = ADecoder.DecodeSubroutine(State, this, Memory, Position);
(ABlock[] Graph, ABlock Root) = ADecoder.DecodeSubroutine(Cache, State, Memory, Position);
string SubName = GetSubName(Position);
string SubName = GetSubroutineName(Position);
PropagateName(Cfg.Graph, SubName);
AILEmitterCtx Context = new AILEmitterCtx(this, Cfg.Graph, Cfg.Root, SubName);
AILEmitterCtx Context = new AILEmitterCtx(Cache, Graph, Root, SubName);
if (Context.CurrBlock.Position != Position)
{
@ -165,11 +127,11 @@ namespace ChocolArm64
//Mark all methods that calls this method for ReJiting,
//since we can now call it directly which is faster.
if (CachedSubs.TryGetValue(Position, out ATranslatedSub OldSub))
if (Cache.TryGetSubroutine(Position, out ATranslatedSub OldSub))
{
foreach (long CallerPos in OldSub.GetCallerPositions())
{
if (CachedSubs.TryGetValue(Position, out ATranslatedSub CallerSub))
if (Cache.TryGetSubroutine(Position, out ATranslatedSub CallerSub))
{
CallerSub.MarkForReJit();
}
@ -180,27 +142,24 @@ namespace ChocolArm64
Subroutine.SetType(ATranslatedSubType.SubTier1);
CachedSubs.AddOrUpdate(Position, Subroutine, (Key, OldVal) => Subroutine);
Cache.AddOrUpdate(Position, Subroutine, GetGraphInstCount(Graph));
}
private string GetSubName(long Position)
private string GetSubroutineName(long Position)
{
return SymbolTable.GetOrAdd(Position, $"Sub{Position:x16}");
return $"Sub{Position:x16}";
}
private void PropagateName(ABlock[] Graph, string Name)
private int GetGraphInstCount(ABlock[] Graph)
{
int Size = 0;
foreach (ABlock Block in Graph)
{
AOpCode LastOp = Block.GetLastOp();
Size += Block.OpCodes.Count;
}
if (LastOp != null &&
(LastOp.Emitter == AInstEmit.Bl ||
LastOp.Emitter == AInstEmit.Blr))
{
SymbolTable.TryAdd(LastOp.Position + 4, Name);
}
}
return Size;
}
}
}

164
ATranslatorCache.cs Normal file
View file

@ -0,0 +1,164 @@
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using System.Threading;
namespace ChocolArm64
{
class ATranslatorCache
{
private const int MaxTotalSize = 2 * 1024 * 256;
private const int MaxTimeDelta = 30000;
private const int MinCallCountForUpdate = 1000;
private class CacheBucket
{
public ATranslatedSub Subroutine { get; private set; }
public LinkedListNode<long> Node { get; private set; }
public int CallCount { get; set; }
public int Size { get; private set; }
public int Timestamp { get; private set; }
public CacheBucket(ATranslatedSub Subroutine, LinkedListNode<long> Node, int Size)
{
this.Subroutine = Subroutine;
this.Size = Size;
UpdateNode(Node);
}
public void UpdateNode(LinkedListNode<long> Node)
{
this.Node = Node;
Timestamp = Environment.TickCount;
}
}
private ConcurrentDictionary<long, CacheBucket> Cache;
private LinkedList<long> SortedCache;
private int TotalSize;
public ATranslatorCache()
{
Cache = new ConcurrentDictionary<long, CacheBucket>();
SortedCache = new LinkedList<long>();
}
public void AddOrUpdate(long Position, ATranslatedSub Subroutine, int Size)
{
ClearCacheIfNeeded();
TotalSize += Size;
lock (SortedCache)
{
LinkedListNode<long> Node = SortedCache.AddLast(Position);
CacheBucket NewBucket = new CacheBucket(Subroutine, Node, Size);
Cache.AddOrUpdate(Position, NewBucket, (Key, Bucket) =>
{
TotalSize -= Bucket.Size;
SortedCache.Remove(Bucket.Node);
return NewBucket;
});
}
}
public bool HasSubroutine(long Position)
{
return Cache.ContainsKey(Position);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public bool TryGetSubroutine(long Position, out ATranslatedSub Subroutine)
{
if (Cache.TryGetValue(Position, out CacheBucket Bucket))
{
if (Bucket.CallCount++ > MinCallCountForUpdate)
{
if (Monitor.TryEnter(SortedCache))
{
try
{
Bucket.CallCount = 0;
SortedCache.Remove(Bucket.Node);
Bucket.UpdateNode(SortedCache.AddLast(Position));
}
finally
{
Monitor.Exit(SortedCache);
}
}
}
Subroutine = Bucket.Subroutine;
return true;
}
Subroutine = default(ATranslatedSub);
return false;
}
private void ClearCacheIfNeeded()
{
int Timestamp = Environment.TickCount;
while (TotalSize > MaxTotalSize)
{
lock (SortedCache)
{
LinkedListNode<long> Node = SortedCache.First;
if (Node == null)
{
break;
}
CacheBucket Bucket = Cache[Node.Value];
int TimeDelta = RingDelta(Bucket.Timestamp, Timestamp);
if ((uint)TimeDelta <= (uint)MaxTimeDelta)
{
break;
}
if (Cache.TryRemove(Node.Value, out Bucket))
{
TotalSize -= Bucket.Size;
SortedCache.Remove(Bucket.Node);
}
}
}
}
private static int RingDelta(int Old, int New)
{
if ((uint)New < (uint)Old)
{
return New + (~Old + 1);
}
else
{
return New - Old;
}
}
}
}

View file

@ -19,11 +19,7 @@ namespace ChocolArm64.Decoder
OpActivators = new ConcurrentDictionary<Type, OpActivator>();
}
public static ABlock DecodeBasicBlock(
AThreadState State,
ATranslator Translator,
AMemory Memory,
long Start)
public static ABlock DecodeBasicBlock(AThreadState State, AMemory Memory, long Start)
{
ABlock Block = new ABlock(Start);
@ -33,8 +29,8 @@ namespace ChocolArm64.Decoder
}
public static (ABlock[] Graph, ABlock Root) DecodeSubroutine(
ATranslatorCache Cache,
AThreadState State,
ATranslator Translator,
AMemory Memory,
long Start)
{
@ -79,7 +75,7 @@ namespace ChocolArm64.Decoder
{
if (Op.Emitter == AInstEmit.Bl)
{
HasCachedSub = Translator.HasCachedSub(Op.Imm);
HasCachedSub = Cache.HasSubroutine(Op.Imm);
}
else
{

View file

@ -6,12 +6,9 @@ namespace ChocolArm64.Events
{
public long Position { get; private set; }
public string SubName { get; private set; }
public ACpuTraceEventArgs(long Position, string SubName)
public ACpuTraceEventArgs(long Position)
{
this.Position = Position;
this.SubName = SubName;
}
}
}

View file

@ -35,14 +35,6 @@ namespace ChocolArm64.Instruction
{
AOpCodeBImmAl Op = (AOpCodeBImmAl)Context.CurrOp;
if (AOptimizations.GenerateCallStack)
{
Context.EmitLdarg(ATranslatedSub.StateArgIdx);
Context.EmitLdc_I8(Op.Imm);
Context.EmitPrivateCall(typeof(AThreadState), nameof(AThreadState.EnterMethod));
}
Context.EmitLdc_I(Op.Position + 4);
Context.EmitStint(AThreadState.LRIndex);
Context.EmitStoreState();
@ -80,14 +72,6 @@ namespace ChocolArm64.Instruction
{
AOpCodeBReg Op = (AOpCodeBReg)Context.CurrOp;
if (AOptimizations.GenerateCallStack)
{
Context.EmitLdarg(ATranslatedSub.StateArgIdx);
Context.EmitLdintzr(Op.Rn);
Context.EmitPrivateCall(typeof(AThreadState), nameof(AThreadState.EnterMethod));
}
Context.EmitLdc_I(Op.Position + 4);
Context.EmitStint(AThreadState.LRIndex);
Context.EmitStoreState();
@ -100,14 +84,6 @@ namespace ChocolArm64.Instruction
{
AOpCodeBReg Op = (AOpCodeBReg)Context.CurrOp;
if (AOptimizations.GenerateCallStack)
{
Context.EmitLdarg(ATranslatedSub.StateArgIdx);
Context.EmitLdintzr(Op.Rn);
Context.EmitPrivateCall(typeof(AThreadState), nameof(AThreadState.JumpMethod));
}
Context.EmitStoreState();
Context.EmitLdintzr(Op.Rn);
@ -129,13 +105,6 @@ namespace ChocolArm64.Instruction
public static void Ret(AILEmitterCtx Context)
{
if (AOptimizations.GenerateCallStack)
{
Context.EmitLdarg(ATranslatedSub.StateArgIdx);
Context.EmitPrivateCall(typeof(AThreadState), nameof(AThreadState.ExitMethod));
}
Context.EmitStoreState();
Context.EmitLdint(AThreadState.LRIndex);

View file

@ -1,6 +1,5 @@
using ChocolArm64.Events;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
@ -86,17 +85,10 @@ namespace ChocolArm64.State
public event EventHandler<AInstExceptionEventArgs> SvcCall;
public event EventHandler<AInstUndefinedEventArgs> Undefined;
private Stack<long> CallStack;
private static Stopwatch TickCounter;
private static double HostTickFreq;
public AThreadState()
{
CallStack = new Stack<long>();
}
static AThreadState()
{
HostTickFreq = 1.0 / Stopwatch.Frequency;
@ -153,27 +145,5 @@ namespace ChocolArm64.State
{
Undefined?.Invoke(this, new AInstUndefinedEventArgs(Position, RawOpCode));
}
internal void EnterMethod(long Position)
{
CallStack.Push(Position);
}
internal void ExitMethod()
{
CallStack.TryPop(out _);
}
internal void JumpMethod(long Position)
{
CallStack.TryPop(out _);
CallStack.Push(Position);
}
public long[] GetCallStack()
{
return CallStack.ToArray();
}
}
}

View file

@ -10,7 +10,7 @@ namespace ChocolArm64.Translation
{
class AILEmitterCtx
{
private ATranslator Translator;
private ATranslatorCache Cache;
private Dictionary<long, AILLabel> Labels;
@ -40,29 +40,14 @@ namespace ChocolArm64.Translation
private const int Tmp5Index = -5;
public AILEmitterCtx(
ATranslator Translator,
ATranslatorCache Cache,
ABlock[] Graph,
ABlock Root,
string SubName)
{
if (Translator == null)
{
throw new ArgumentNullException(nameof(Translator));
}
if (Graph == null)
{
throw new ArgumentNullException(nameof(Graph));
}
if (Root == null)
{
throw new ArgumentNullException(nameof(Root));
}
this.Translator = Translator;
this.Graph = Graph;
this.Root = Root;
this.Cache = Cache ?? throw new ArgumentNullException(nameof(Cache));
this.Graph = Graph ?? throw new ArgumentNullException(nameof(Graph));
this.Root = Root ?? throw new ArgumentNullException(nameof(Root));
Labels = new Dictionary<long, AILLabel>();
@ -147,7 +132,12 @@ namespace ChocolArm64.Translation
return false;
}
if (!Translator.TryGetCachedSub(CurrOp, out ATranslatedSub Sub))
if (CurrOp.Emitter != AInstEmit.Bl)
{
return false;
}
if (!Cache.TryGetSubroutine(((AOpCodeBImmAl)CurrOp).Imm, out ATranslatedSub Subroutine))
{
return false;
}
@ -157,7 +147,7 @@ namespace ChocolArm64.Translation
EmitLdarg(Index);
}
foreach (ARegister Reg in Sub.Params)
foreach (ARegister Reg in Subroutine.Params)
{
switch (Reg.Type)
{
@ -167,9 +157,9 @@ namespace ChocolArm64.Translation
}
}
EmitCall(Sub.Method);
EmitCall(Subroutine.Method);
Sub.AddCaller(Root.Position);
Subroutine.AddCaller(Root.Position);
return true;
}