2020-06-16 20:28:02 +02:00
|
|
|
using ARMeilleure.CodeGen;
|
2021-09-14 03:23:37 +04:00
|
|
|
using ARMeilleure.CodeGen.Linking;
|
2020-06-16 20:28:02 +02:00
|
|
|
using ARMeilleure.CodeGen.Unwinding;
|
2021-04-19 01:43:53 +04:00
|
|
|
using ARMeilleure.Common;
|
2020-06-16 20:28:02 +02:00
|
|
|
using ARMeilleure.Memory;
|
2021-02-22 03:23:48 +01:00
|
|
|
using Ryujinx.Common;
|
2020-08-30 22:21:53 +05:30
|
|
|
using Ryujinx.Common.Configuration;
|
2020-06-16 20:28:02 +02:00
|
|
|
using Ryujinx.Common.Logging;
|
2023-03-17 08:14:50 -04:00
|
|
|
using Ryujinx.Common.Memory;
|
2020-06-16 20:28:02 +02:00
|
|
|
using System;
|
|
|
|
using System.Buffers.Binary;
|
2020-12-24 03:58:36 +01:00
|
|
|
using System.Collections.Generic;
|
2020-06-16 20:28:02 +02:00
|
|
|
using System.Diagnostics;
|
|
|
|
using System.IO;
|
|
|
|
using System.IO.Compression;
|
2021-02-22 03:23:48 +01:00
|
|
|
using System.Runtime;
|
|
|
|
using System.Runtime.CompilerServices;
|
2020-06-16 20:28:02 +02:00
|
|
|
using System.Runtime.InteropServices;
|
|
|
|
using System.Threading;
|
|
|
|
|
2021-02-22 03:23:48 +01:00
|
|
|
using static ARMeilleure.Translation.PTC.PtcFormatter;
|
|
|
|
|
2020-06-16 20:28:02 +02:00
|
|
|
namespace ARMeilleure.Translation.PTC
|
|
|
|
{
|
2023-01-12 07:05:18 +00:00
|
|
|
using Arm64HardwareCapabilities = ARMeilleure.CodeGen.Arm64.HardwareCapabilities;
|
|
|
|
using X86HardwareCapabilities = ARMeilleure.CodeGen.X86.HardwareCapabilities;
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
class Ptc : IPtcLoadState
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
2021-04-13 03:24:36 +02:00
|
|
|
private const string OuterHeaderMagicString = "PTCohd\0\0";
|
|
|
|
private const string InnerHeaderMagicString = "PTCihd\0\0";
|
2020-07-17 15:57:49 +02:00
|
|
|
|
ARMeilleure: Respect FZ/RM flags for all floating point operations (#4618)
* ARMeilleure: Respect Fz flag for all floating point operations.
This is a change in strategy for emulating the Fz FPCR flag. Before, it was set before instructions that "needed it" and reset after. However, this missed a few hot instructions like the multiplication instruction, and the entirety of A32.
The new strategy is to set the Fz flag only in the following circumstances:
- Set to match FPCR before translated functions/loop are executed.
- Reset when calling SoftFloat methods, set when returning.
- Reset when exiting execution.
This allows us to remove the code around the existing Fz aware instructions, and get the accuracy benefits on all floating point instructions executed while in translated code.
Single step executions now need to be called with a context wrapper - right now it just contains the Fz flag initialization, and won't actually do anything on ARM.
This fixes a bug in Breath of the Wild where some physics interactions could randomly crash the game due to subnormal values not flushing to zero.
This is draft right now because I need to answer the questions:
- Does dotnet avoid changing the value of Mxcsr?
- Is it a good idea to assume that? Or should the flag set/restore be done on every managed method call, not just softfloat?
- If we assume that, do we want a unit test to verify the behaviour?
I recommend testing a bunch of games, especially games affected when this was originally added, such as #1611.
* Remove unused method
* Use FMA for Fmadd, Fmsub, Fnmadd, Fnmsub, Fmla, Fmls
...when available.
Similar implementation to A32
* Use FMA for Frecps, Frsqrts
* Don't set DAZ.
* Add round mode to ARM FP mode
* Fix mistakes
* Add test for FP state when calling managed methods
* Add explanatory comment to test.
* Cleanup
* Add A64 FPCR flags
* Vrintx_S A32 fast path on A64 backend
* Address feedback 1, re-enable DAZ
* Fix FMA instructions By Elem
* Address feedback
2023-04-10 11:22:58 +01:00
|
|
|
private const uint InternalVersion = 4626; //! To be incremented manually for each change to the ARMeilleure project.
|
2020-06-16 20:28:02 +02:00
|
|
|
|
|
|
|
private const string ActualDir = "0";
|
|
|
|
private const string BackupDir = "1";
|
|
|
|
|
|
|
|
private const string TitleIdTextDefault = "0000000000000000";
|
|
|
|
private const string DisplayVersionDefault = "0";
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
public static readonly Symbol PageTableSymbol = new(SymbolType.Special, 1);
|
|
|
|
public static readonly Symbol CountTableSymbol = new(SymbolType.Special, 2);
|
|
|
|
public static readonly Symbol DispatchStubSymbol = new(SymbolType.Special, 3);
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2020-12-17 20:32:09 +01:00
|
|
|
private const byte FillingByte = 0x00;
|
2020-06-16 20:28:02 +02:00
|
|
|
private const CompressionLevel SaveCompressionLevel = CompressionLevel.Fastest;
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
public PtcProfiler Profiler { get; }
|
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
// Carriers.
|
2023-01-04 20:01:44 -03:00
|
|
|
private MemoryStream _infosStream;
|
|
|
|
private List<byte[]> _codesList;
|
|
|
|
private MemoryStream _relocsStream;
|
|
|
|
private MemoryStream _unwindInfosStream;
|
2021-02-22 03:23:48 +01:00
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
private readonly ulong _outerHeaderMagic;
|
|
|
|
private readonly ulong _innerHeaderMagic;
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
private readonly ManualResetEvent _waitEvent;
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
private readonly object _lock;
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
private bool _disposed;
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
public string TitleIdText { get; private set; }
|
|
|
|
public string DisplayVersion { get; private set; }
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
private MemoryManagerType _memoryMode;
|
POWER - Performance Optimizations With Extensive Ramifications (#2286)
* Refactoring of KMemoryManager class
* Replace some trivial uses of DRAM address with VA
* Get rid of GetDramAddressFromVa
* Abstracting more operations on derived page table class
* Run auto-format on KPageTableBase
* Managed to make TryConvertVaToPa private, few uses remains now
* Implement guest physical pages ref counting, remove manual freeing
* Make DoMmuOperation private and call new abstract methods only from the base class
* Pass pages count rather than size on Map/UnmapMemory
* Change memory managers to take host pointers
* Fix a guest memory leak and simplify KPageTable
* Expose new methods for host range query and mapping
* Some refactoring of MapPagesFromClientProcess to allow proper page ref counting and mapping without KPageLists
* Remove more uses of AddVaRangeToPageList, now only one remains (shared memory page checking)
* Add a SharedMemoryStorage class, will be useful for host mapping
* Sayonara AddVaRangeToPageList, you served us well
* Start to implement host memory mapping (WIP)
* Support memory tracking through host exception handling
* Fix some access violations from HLE service guest memory access and CPU
* Fix memory tracking
* Fix mapping list bugs, including a race and a error adding mapping ranges
* Simple page table for memory tracking
* Simple "volatile" region handle mode
* Update UBOs directly (experimental, rough)
* Fix the overlap check
* Only set non-modified buffers as volatile
* Fix some memory tracking issues
* Fix possible race in MapBufferFromClientProcess (block list updates were not locked)
* Write uniform update to memory immediately, only defer the buffer set.
* Fix some memory tracking issues
* Pass correct pages count on shared memory unmap
* Armeilleure Signal Handler v1 + Unix changes
Unix currently behaves like windows, rather than remapping physical
* Actually check if the host platform is unix
* Fix decommit on linux.
* Implement windows 10 placeholder shared memory, fix a buffer issue.
* Make PTC version something that will never match with master
* Remove testing variable for block count
* Add reference count for memory manager, fix dispose
Can still deadlock with OpenAL
* Add address validation, use page table for mapped check, add docs
Might clean up the page table traversing routines.
* Implement batched mapping/tracking.
* Move documentation, fix tests.
* Cleanup uniform buffer update stuff.
* Remove unnecessary assignment.
* Add unsafe host mapped memory switch
On by default. Would be good to turn this off for untrusted code (homebrew, exefs mods) and give the user the option to turn it on manually, though that requires some UI work.
* Remove C# exception handlers
They have issues due to current .NET limitations, so the meilleure one fully replaces them for now.
* Fix MapPhysicalMemory on the software MemoryManager.
* Null check for GetHostAddress, docs
* Add configuration for setting memory manager mode (not in UI yet)
* Add config to UI
* Fix type mismatch on Unix signal handler code emit
* Fix 6GB DRAM mode.
The size can be greater than `uint.MaxValue` when the DRAM is >4GB.
* Address some feedback.
* More detailed error if backing memory cannot be mapped.
* SetLastError on all OS functions for consistency
* Force pages dirty with UBO update instead of setting them directly.
Seems to be much faster across a few games. Need retesting.
* Rebase, configuration rework, fix mem tracking regression
* Fix race in FreePages
* Set memory managers null after decrementing ref count
* Remove readonly keyword, as this is now modified.
* Use a local variable for the signal handler rather than a register.
* Fix bug with buffer resize, and index/uniform buffer binding.
Should fix flickering in games.
* Add InvalidAccessHandler to MemoryTracking
Doesn't do anything yet
* Call invalid access handler on unmapped read/write.
Same rules as the regular memory manager.
* Make unsafe mapped memory its own MemoryManagerType
* Move FlushUboDirty into UpdateState.
* Buffer dirty cache, rather than ubo cache
Much cleaner, may be reusable for Inline2Memory updates.
* This doesn't return anything anymore.
* Add sigaction remove methods, correct a few function signatures.
* Return empty list of physical regions for size 0.
* Also on AddressSpaceManager
Co-authored-by: gdkchan <gab.dark.100@gmail.com>
2021-05-24 21:52:44 +01:00
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
public string CachePathActual { get; private set; }
|
|
|
|
public string CachePathBackup { get; private set; }
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
public PtcState State { get; private set; }
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
// Progress reporting helpers.
|
2023-01-04 20:01:44 -03:00
|
|
|
private volatile int _translateCount;
|
|
|
|
private volatile int _translateTotalCount;
|
|
|
|
public event Action<PtcLoadingState, int, int> PtcStateChanged;
|
2021-03-03 06:09:36 +05:30
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
public Ptc()
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
2023-01-04 20:01:44 -03:00
|
|
|
Profiler = new PtcProfiler(this);
|
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
InitializeCarriers();
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
_outerHeaderMagic = BinaryPrimitives.ReadUInt64LittleEndian(EncodingCache.UTF8NoBOM.GetBytes(OuterHeaderMagicString).AsSpan());
|
|
|
|
_innerHeaderMagic = BinaryPrimitives.ReadUInt64LittleEndian(EncodingCache.UTF8NoBOM.GetBytes(InnerHeaderMagicString).AsSpan());
|
2020-06-16 20:28:02 +02:00
|
|
|
|
|
|
|
_waitEvent = new ManualResetEvent(true);
|
|
|
|
|
|
|
|
_lock = new object();
|
|
|
|
|
|
|
|
_disposed = false;
|
|
|
|
|
|
|
|
TitleIdText = TitleIdTextDefault;
|
|
|
|
DisplayVersion = DisplayVersionDefault;
|
|
|
|
|
|
|
|
CachePathActual = string.Empty;
|
|
|
|
CachePathBackup = string.Empty;
|
|
|
|
|
|
|
|
Disable();
|
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
public void Initialize(string titleIdText, string displayVersion, bool enabled, MemoryManagerType memoryMode)
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
|
|
|
Wait();
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
Profiler.Wait();
|
|
|
|
Profiler.ClearEntries();
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-02-22 03:23:48 +01:00
|
|
|
Logger.Info?.Print(LogClass.Ptc, $"Initializing Profiled Persistent Translation Cache (enabled: {enabled}).");
|
|
|
|
|
|
|
|
if (!enabled || string.IsNullOrEmpty(titleIdText) || titleIdText == TitleIdTextDefault)
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
|
|
|
TitleIdText = TitleIdTextDefault;
|
|
|
|
DisplayVersion = DisplayVersionDefault;
|
|
|
|
|
|
|
|
CachePathActual = string.Empty;
|
|
|
|
CachePathBackup = string.Empty;
|
|
|
|
|
|
|
|
Disable();
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
TitleIdText = titleIdText;
|
2021-02-22 03:23:48 +01:00
|
|
|
DisplayVersion = !string.IsNullOrEmpty(displayVersion) ? displayVersion : DisplayVersionDefault;
|
POWER - Performance Optimizations With Extensive Ramifications (#2286)
* Refactoring of KMemoryManager class
* Replace some trivial uses of DRAM address with VA
* Get rid of GetDramAddressFromVa
* Abstracting more operations on derived page table class
* Run auto-format on KPageTableBase
* Managed to make TryConvertVaToPa private, few uses remains now
* Implement guest physical pages ref counting, remove manual freeing
* Make DoMmuOperation private and call new abstract methods only from the base class
* Pass pages count rather than size on Map/UnmapMemory
* Change memory managers to take host pointers
* Fix a guest memory leak and simplify KPageTable
* Expose new methods for host range query and mapping
* Some refactoring of MapPagesFromClientProcess to allow proper page ref counting and mapping without KPageLists
* Remove more uses of AddVaRangeToPageList, now only one remains (shared memory page checking)
* Add a SharedMemoryStorage class, will be useful for host mapping
* Sayonara AddVaRangeToPageList, you served us well
* Start to implement host memory mapping (WIP)
* Support memory tracking through host exception handling
* Fix some access violations from HLE service guest memory access and CPU
* Fix memory tracking
* Fix mapping list bugs, including a race and a error adding mapping ranges
* Simple page table for memory tracking
* Simple "volatile" region handle mode
* Update UBOs directly (experimental, rough)
* Fix the overlap check
* Only set non-modified buffers as volatile
* Fix some memory tracking issues
* Fix possible race in MapBufferFromClientProcess (block list updates were not locked)
* Write uniform update to memory immediately, only defer the buffer set.
* Fix some memory tracking issues
* Pass correct pages count on shared memory unmap
* Armeilleure Signal Handler v1 + Unix changes
Unix currently behaves like windows, rather than remapping physical
* Actually check if the host platform is unix
* Fix decommit on linux.
* Implement windows 10 placeholder shared memory, fix a buffer issue.
* Make PTC version something that will never match with master
* Remove testing variable for block count
* Add reference count for memory manager, fix dispose
Can still deadlock with OpenAL
* Add address validation, use page table for mapped check, add docs
Might clean up the page table traversing routines.
* Implement batched mapping/tracking.
* Move documentation, fix tests.
* Cleanup uniform buffer update stuff.
* Remove unnecessary assignment.
* Add unsafe host mapped memory switch
On by default. Would be good to turn this off for untrusted code (homebrew, exefs mods) and give the user the option to turn it on manually, though that requires some UI work.
* Remove C# exception handlers
They have issues due to current .NET limitations, so the meilleure one fully replaces them for now.
* Fix MapPhysicalMemory on the software MemoryManager.
* Null check for GetHostAddress, docs
* Add configuration for setting memory manager mode (not in UI yet)
* Add config to UI
* Fix type mismatch on Unix signal handler code emit
* Fix 6GB DRAM mode.
The size can be greater than `uint.MaxValue` when the DRAM is >4GB.
* Address some feedback.
* More detailed error if backing memory cannot be mapped.
* SetLastError on all OS functions for consistency
* Force pages dirty with UBO update instead of setting them directly.
Seems to be much faster across a few games. Need retesting.
* Rebase, configuration rework, fix mem tracking regression
* Fix race in FreePages
* Set memory managers null after decrementing ref count
* Remove readonly keyword, as this is now modified.
* Use a local variable for the signal handler rather than a register.
* Fix bug with buffer resize, and index/uniform buffer binding.
Should fix flickering in games.
* Add InvalidAccessHandler to MemoryTracking
Doesn't do anything yet
* Call invalid access handler on unmapped read/write.
Same rules as the regular memory manager.
* Make unsafe mapped memory its own MemoryManagerType
* Move FlushUboDirty into UpdateState.
* Buffer dirty cache, rather than ubo cache
Much cleaner, may be reusable for Inline2Memory updates.
* This doesn't return anything anymore.
* Add sigaction remove methods, correct a few function signatures.
* Return empty list of physical regions for size 0.
* Also on AddressSpaceManager
Co-authored-by: gdkchan <gab.dark.100@gmail.com>
2021-05-24 21:52:44 +01:00
|
|
|
_memoryMode = memoryMode;
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-02-22 03:23:48 +01:00
|
|
|
string workPathActual = Path.Combine(AppDataManager.GamesDirPath, TitleIdText, "cache", "cpu", ActualDir);
|
|
|
|
string workPathBackup = Path.Combine(AppDataManager.GamesDirPath, TitleIdText, "cache", "cpu", BackupDir);
|
|
|
|
|
|
|
|
if (!Directory.Exists(workPathActual))
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
2021-02-22 03:23:48 +01:00
|
|
|
Directory.CreateDirectory(workPathActual);
|
|
|
|
}
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-02-22 03:23:48 +01:00
|
|
|
if (!Directory.Exists(workPathBackup))
|
|
|
|
{
|
|
|
|
Directory.CreateDirectory(workPathBackup);
|
|
|
|
}
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-02-22 03:23:48 +01:00
|
|
|
CachePathActual = Path.Combine(workPathActual, DisplayVersion);
|
|
|
|
CachePathBackup = Path.Combine(workPathBackup, DisplayVersion);
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-02-22 03:23:48 +01:00
|
|
|
PreLoad();
|
2023-01-04 20:01:44 -03:00
|
|
|
Profiler.PreLoad();
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-02-22 03:23:48 +01:00
|
|
|
Enable();
|
|
|
|
}
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
private void InitializeCarriers()
|
2021-02-22 03:23:48 +01:00
|
|
|
{
|
2023-03-17 08:14:50 -04:00
|
|
|
_infosStream = MemoryStreamManager.Shared.GetStream();
|
2021-04-13 03:24:36 +02:00
|
|
|
_codesList = new List<byte[]>();
|
2023-03-17 08:14:50 -04:00
|
|
|
_relocsStream = MemoryStreamManager.Shared.GetStream();
|
|
|
|
_unwindInfosStream = MemoryStreamManager.Shared.GetStream();
|
2020-06-16 20:28:02 +02:00
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
private void DisposeCarriers()
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
2021-02-22 03:23:48 +01:00
|
|
|
_infosStream.Dispose();
|
2021-04-13 03:24:36 +02:00
|
|
|
_codesList.Clear();
|
2021-02-22 03:23:48 +01:00
|
|
|
_relocsStream.Dispose();
|
|
|
|
_unwindInfosStream.Dispose();
|
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
private bool AreCarriersEmpty()
|
2021-02-22 03:23:48 +01:00
|
|
|
{
|
2021-04-13 03:24:36 +02:00
|
|
|
return _infosStream.Length == 0L && _codesList.Count == 0 && _relocsStream.Length == 0L && _unwindInfosStream.Length == 0L;
|
2021-02-22 03:23:48 +01:00
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
private void ResetCarriersIfNeeded()
|
2021-02-22 03:23:48 +01:00
|
|
|
{
|
2021-04-13 03:24:36 +02:00
|
|
|
if (AreCarriersEmpty())
|
2021-02-22 03:23:48 +01:00
|
|
|
{
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
DisposeCarriers();
|
2021-02-22 03:23:48 +01:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
InitializeCarriers();
|
2020-06-16 20:28:02 +02:00
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
private void PreLoad()
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
2023-01-19 01:25:16 +03:00
|
|
|
string fileNameActual = $"{CachePathActual}.cache";
|
|
|
|
string fileNameBackup = $"{CachePathBackup}.cache";
|
2020-06-16 20:28:02 +02:00
|
|
|
|
|
|
|
FileInfo fileInfoActual = new FileInfo(fileNameActual);
|
|
|
|
FileInfo fileInfoBackup = new FileInfo(fileNameBackup);
|
|
|
|
|
|
|
|
if (fileInfoActual.Exists && fileInfoActual.Length != 0L)
|
|
|
|
{
|
2020-12-17 20:32:09 +01:00
|
|
|
if (!Load(fileNameActual, false))
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
|
|
|
if (fileInfoBackup.Exists && fileInfoBackup.Length != 0L)
|
|
|
|
{
|
2020-12-17 20:32:09 +01:00
|
|
|
Load(fileNameBackup, true);
|
2020-06-16 20:28:02 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (fileInfoBackup.Exists && fileInfoBackup.Length != 0L)
|
|
|
|
{
|
2020-12-17 20:32:09 +01:00
|
|
|
Load(fileNameBackup, true);
|
2020-06-16 20:28:02 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
private unsafe bool Load(string fileName, bool isBackup)
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
2021-02-22 03:23:48 +01:00
|
|
|
using (FileStream compressedStream = new(fileName, FileMode.Open))
|
|
|
|
using (DeflateStream deflateStream = new(compressedStream, CompressionMode.Decompress, true))
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
2021-04-13 03:24:36 +02:00
|
|
|
OuterHeader outerHeader = DeserializeStructure<OuterHeader>(compressedStream);
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
if (!outerHeader.IsHeaderValid())
|
|
|
|
{
|
|
|
|
InvalidateCompressedStream(compressedStream);
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (outerHeader.Magic != _outerHeaderMagic)
|
|
|
|
{
|
|
|
|
InvalidateCompressedStream(compressedStream);
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (outerHeader.CacheFileVersion != InternalVersion)
|
|
|
|
{
|
|
|
|
InvalidateCompressedStream(compressedStream);
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
2021-02-22 03:23:48 +01:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
if (outerHeader.Endianness != GetEndianness())
|
2020-11-20 01:51:59 +00:00
|
|
|
{
|
|
|
|
InvalidateCompressedStream(compressedStream);
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
if (outerHeader.FeatureInfo != GetFeatureInfo())
|
|
|
|
{
|
|
|
|
InvalidateCompressedStream(compressedStream);
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
POWER - Performance Optimizations With Extensive Ramifications (#2286)
* Refactoring of KMemoryManager class
* Replace some trivial uses of DRAM address with VA
* Get rid of GetDramAddressFromVa
* Abstracting more operations on derived page table class
* Run auto-format on KPageTableBase
* Managed to make TryConvertVaToPa private, few uses remains now
* Implement guest physical pages ref counting, remove manual freeing
* Make DoMmuOperation private and call new abstract methods only from the base class
* Pass pages count rather than size on Map/UnmapMemory
* Change memory managers to take host pointers
* Fix a guest memory leak and simplify KPageTable
* Expose new methods for host range query and mapping
* Some refactoring of MapPagesFromClientProcess to allow proper page ref counting and mapping without KPageLists
* Remove more uses of AddVaRangeToPageList, now only one remains (shared memory page checking)
* Add a SharedMemoryStorage class, will be useful for host mapping
* Sayonara AddVaRangeToPageList, you served us well
* Start to implement host memory mapping (WIP)
* Support memory tracking through host exception handling
* Fix some access violations from HLE service guest memory access and CPU
* Fix memory tracking
* Fix mapping list bugs, including a race and a error adding mapping ranges
* Simple page table for memory tracking
* Simple "volatile" region handle mode
* Update UBOs directly (experimental, rough)
* Fix the overlap check
* Only set non-modified buffers as volatile
* Fix some memory tracking issues
* Fix possible race in MapBufferFromClientProcess (block list updates were not locked)
* Write uniform update to memory immediately, only defer the buffer set.
* Fix some memory tracking issues
* Pass correct pages count on shared memory unmap
* Armeilleure Signal Handler v1 + Unix changes
Unix currently behaves like windows, rather than remapping physical
* Actually check if the host platform is unix
* Fix decommit on linux.
* Implement windows 10 placeholder shared memory, fix a buffer issue.
* Make PTC version something that will never match with master
* Remove testing variable for block count
* Add reference count for memory manager, fix dispose
Can still deadlock with OpenAL
* Add address validation, use page table for mapped check, add docs
Might clean up the page table traversing routines.
* Implement batched mapping/tracking.
* Move documentation, fix tests.
* Cleanup uniform buffer update stuff.
* Remove unnecessary assignment.
* Add unsafe host mapped memory switch
On by default. Would be good to turn this off for untrusted code (homebrew, exefs mods) and give the user the option to turn it on manually, though that requires some UI work.
* Remove C# exception handlers
They have issues due to current .NET limitations, so the meilleure one fully replaces them for now.
* Fix MapPhysicalMemory on the software MemoryManager.
* Null check for GetHostAddress, docs
* Add configuration for setting memory manager mode (not in UI yet)
* Add config to UI
* Fix type mismatch on Unix signal handler code emit
* Fix 6GB DRAM mode.
The size can be greater than `uint.MaxValue` when the DRAM is >4GB.
* Address some feedback.
* More detailed error if backing memory cannot be mapped.
* SetLastError on all OS functions for consistency
* Force pages dirty with UBO update instead of setting them directly.
Seems to be much faster across a few games. Need retesting.
* Rebase, configuration rework, fix mem tracking regression
* Fix race in FreePages
* Set memory managers null after decrementing ref count
* Remove readonly keyword, as this is now modified.
* Use a local variable for the signal handler rather than a register.
* Fix bug with buffer resize, and index/uniform buffer binding.
Should fix flickering in games.
* Add InvalidAccessHandler to MemoryTracking
Doesn't do anything yet
* Call invalid access handler on unmapped read/write.
Same rules as the regular memory manager.
* Make unsafe mapped memory its own MemoryManagerType
* Move FlushUboDirty into UpdateState.
* Buffer dirty cache, rather than ubo cache
Much cleaner, may be reusable for Inline2Memory updates.
* This doesn't return anything anymore.
* Add sigaction remove methods, correct a few function signatures.
* Return empty list of physical regions for size 0.
* Also on AddressSpaceManager
Co-authored-by: gdkchan <gab.dark.100@gmail.com>
2021-05-24 21:52:44 +01:00
|
|
|
if (outerHeader.MemoryManagerMode != GetMemoryManagerMode())
|
|
|
|
{
|
|
|
|
InvalidateCompressedStream(compressedStream);
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
if (outerHeader.OSPlatform != GetOSPlatform())
|
|
|
|
{
|
|
|
|
InvalidateCompressedStream(compressedStream);
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2023-01-12 07:50:45 +00:00
|
|
|
if (outerHeader.Architecture != (uint)RuntimeInformation.ProcessArchitecture)
|
|
|
|
{
|
|
|
|
InvalidateCompressedStream(compressedStream);
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-02-22 03:23:48 +01:00
|
|
|
IntPtr intPtr = IntPtr.Zero;
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-02-22 03:23:48 +01:00
|
|
|
try
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
2021-04-13 03:24:36 +02:00
|
|
|
intPtr = Marshal.AllocHGlobal(new IntPtr(outerHeader.UncompressedStreamSize));
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
using (UnmanagedMemoryStream stream = new((byte*)intPtr.ToPointer(), outerHeader.UncompressedStreamSize, outerHeader.UncompressedStreamSize, FileAccess.ReadWrite))
|
2021-02-22 03:23:48 +01:00
|
|
|
{
|
|
|
|
try
|
|
|
|
{
|
|
|
|
deflateStream.CopyTo(stream);
|
|
|
|
}
|
|
|
|
catch
|
|
|
|
{
|
|
|
|
InvalidateCompressedStream(compressedStream);
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-02-22 03:23:48 +01:00
|
|
|
return false;
|
|
|
|
}
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
Debug.Assert(stream.Position == stream.Length);
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-02-22 03:23:48 +01:00
|
|
|
stream.Seek(0L, SeekOrigin.Begin);
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
InnerHeader innerHeader = DeserializeStructure<InnerHeader>(stream);
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
if (!innerHeader.IsHeaderValid())
|
2021-02-22 03:23:48 +01:00
|
|
|
{
|
|
|
|
InvalidateCompressedStream(compressedStream);
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-02-22 03:23:48 +01:00
|
|
|
return false;
|
|
|
|
}
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
if (innerHeader.Magic != _innerHeaderMagic)
|
2021-02-22 03:23:48 +01:00
|
|
|
{
|
|
|
|
InvalidateCompressedStream(compressedStream);
|
2020-12-17 20:32:09 +01:00
|
|
|
|
2021-02-22 03:23:48 +01:00
|
|
|
return false;
|
|
|
|
}
|
2020-12-17 20:32:09 +01:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
ReadOnlySpan<byte> infosBytes = new(stream.PositionPointer, innerHeader.InfosLength);
|
|
|
|
stream.Seek(innerHeader.InfosLength, SeekOrigin.Current);
|
|
|
|
|
|
|
|
Hash128 infosHash = XXHash128.ComputeHash(infosBytes);
|
|
|
|
|
|
|
|
if (innerHeader.InfosHash != infosHash)
|
2021-02-22 03:23:48 +01:00
|
|
|
{
|
|
|
|
InvalidateCompressedStream(compressedStream);
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-02-22 03:23:48 +01:00
|
|
|
return false;
|
|
|
|
}
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
ReadOnlySpan<byte> codesBytes = (int)innerHeader.CodesLength > 0 ? new(stream.PositionPointer, (int)innerHeader.CodesLength) : ReadOnlySpan<byte>.Empty;
|
|
|
|
stream.Seek(innerHeader.CodesLength, SeekOrigin.Current);
|
|
|
|
|
|
|
|
Hash128 codesHash = XXHash128.ComputeHash(codesBytes);
|
|
|
|
|
|
|
|
if (innerHeader.CodesHash != codesHash)
|
2021-02-22 03:23:48 +01:00
|
|
|
{
|
|
|
|
InvalidateCompressedStream(compressedStream);
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-02-22 03:23:48 +01:00
|
|
|
return false;
|
|
|
|
}
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
ReadOnlySpan<byte> relocsBytes = new(stream.PositionPointer, innerHeader.RelocsLength);
|
|
|
|
stream.Seek(innerHeader.RelocsLength, SeekOrigin.Current);
|
|
|
|
|
|
|
|
Hash128 relocsHash = XXHash128.ComputeHash(relocsBytes);
|
|
|
|
|
|
|
|
if (innerHeader.RelocsHash != relocsHash)
|
2021-02-22 03:23:48 +01:00
|
|
|
{
|
|
|
|
InvalidateCompressedStream(compressedStream);
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-02-22 03:23:48 +01:00
|
|
|
return false;
|
|
|
|
}
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
ReadOnlySpan<byte> unwindInfosBytes = new(stream.PositionPointer, innerHeader.UnwindInfosLength);
|
|
|
|
stream.Seek(innerHeader.UnwindInfosLength, SeekOrigin.Current);
|
|
|
|
|
|
|
|
Hash128 unwindInfosHash = XXHash128.ComputeHash(unwindInfosBytes);
|
|
|
|
|
|
|
|
if (innerHeader.UnwindInfosHash != unwindInfosHash)
|
2021-02-22 03:23:48 +01:00
|
|
|
{
|
|
|
|
InvalidateCompressedStream(compressedStream);
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
Debug.Assert(stream.Position == stream.Length);
|
|
|
|
|
|
|
|
stream.Seek((long)Unsafe.SizeOf<InnerHeader>(), SeekOrigin.Begin);
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
_infosStream.Write(infosBytes);
|
|
|
|
stream.Seek(innerHeader.InfosLength, SeekOrigin.Current);
|
2021-02-22 03:23:48 +01:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
_codesList.ReadFrom(stream);
|
2021-02-22 03:23:48 +01:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
_relocsStream.Write(relocsBytes);
|
|
|
|
stream.Seek(innerHeader.RelocsLength, SeekOrigin.Current);
|
2021-02-22 03:23:48 +01:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
_unwindInfosStream.Write(unwindInfosBytes);
|
|
|
|
stream.Seek(innerHeader.UnwindInfosLength, SeekOrigin.Current);
|
2021-02-22 03:23:48 +01:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
Debug.Assert(stream.Position == stream.Length);
|
2021-02-22 03:23:48 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
finally
|
|
|
|
{
|
|
|
|
if (intPtr != IntPtr.Zero)
|
|
|
|
{
|
|
|
|
Marshal.FreeHGlobal(intPtr);
|
|
|
|
}
|
|
|
|
}
|
2020-06-16 20:28:02 +02:00
|
|
|
}
|
2020-12-17 20:32:09 +01:00
|
|
|
|
|
|
|
long fileSize = new FileInfo(fileName).Length;
|
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
Logger.Info?.Print(LogClass.Ptc, $"{(isBackup ? "Loaded Backup Translation Cache" : "Loaded Translation Cache")} (size: {fileSize} bytes, translated functions: {GetEntriesCount()}).");
|
2020-12-17 20:32:09 +01:00
|
|
|
|
|
|
|
return true;
|
2020-06-16 20:28:02 +02:00
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
private void InvalidateCompressedStream(FileStream compressedStream)
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
|
|
|
compressedStream.SetLength(0L);
|
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
private void PreSave()
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
|
|
|
_waitEvent.Reset();
|
|
|
|
|
2021-02-22 03:23:48 +01:00
|
|
|
try
|
|
|
|
{
|
2023-01-19 01:25:16 +03:00
|
|
|
string fileNameActual = $"{CachePathActual}.cache";
|
|
|
|
string fileNameBackup = $"{CachePathBackup}.cache";
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-02-22 03:23:48 +01:00
|
|
|
FileInfo fileInfoActual = new FileInfo(fileNameActual);
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-02-22 03:23:48 +01:00
|
|
|
if (fileInfoActual.Exists && fileInfoActual.Length != 0L)
|
|
|
|
{
|
|
|
|
File.Copy(fileNameActual, fileNameBackup, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
Save(fileNameActual);
|
2020-06-16 20:28:02 +02:00
|
|
|
}
|
2021-02-22 03:23:48 +01:00
|
|
|
finally
|
|
|
|
{
|
2021-04-13 03:24:36 +02:00
|
|
|
ResetCarriersIfNeeded();
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-02-22 03:23:48 +01:00
|
|
|
GCSettings.LargeObjectHeapCompactionMode = GCLargeObjectHeapCompactionMode.CompactOnce;
|
|
|
|
}
|
2020-06-16 20:28:02 +02:00
|
|
|
|
|
|
|
_waitEvent.Set();
|
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
private unsafe void Save(string fileName)
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
2021-01-12 19:04:02 +01:00
|
|
|
int translatedFuncsCount;
|
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
InnerHeader innerHeader = new InnerHeader();
|
|
|
|
|
|
|
|
innerHeader.Magic = _innerHeaderMagic;
|
2021-02-22 03:23:48 +01:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
innerHeader.InfosLength = (int)_infosStream.Length;
|
|
|
|
innerHeader.CodesLength = _codesList.Length();
|
|
|
|
innerHeader.RelocsLength = (int)_relocsStream.Length;
|
|
|
|
innerHeader.UnwindInfosLength = (int)_unwindInfosStream.Length;
|
2021-02-22 03:23:48 +01:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
OuterHeader outerHeader = new OuterHeader();
|
2021-02-22 03:23:48 +01:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
outerHeader.Magic = _outerHeaderMagic;
|
|
|
|
|
|
|
|
outerHeader.CacheFileVersion = InternalVersion;
|
|
|
|
outerHeader.Endianness = GetEndianness();
|
|
|
|
outerHeader.FeatureInfo = GetFeatureInfo();
|
POWER - Performance Optimizations With Extensive Ramifications (#2286)
* Refactoring of KMemoryManager class
* Replace some trivial uses of DRAM address with VA
* Get rid of GetDramAddressFromVa
* Abstracting more operations on derived page table class
* Run auto-format on KPageTableBase
* Managed to make TryConvertVaToPa private, few uses remains now
* Implement guest physical pages ref counting, remove manual freeing
* Make DoMmuOperation private and call new abstract methods only from the base class
* Pass pages count rather than size on Map/UnmapMemory
* Change memory managers to take host pointers
* Fix a guest memory leak and simplify KPageTable
* Expose new methods for host range query and mapping
* Some refactoring of MapPagesFromClientProcess to allow proper page ref counting and mapping without KPageLists
* Remove more uses of AddVaRangeToPageList, now only one remains (shared memory page checking)
* Add a SharedMemoryStorage class, will be useful for host mapping
* Sayonara AddVaRangeToPageList, you served us well
* Start to implement host memory mapping (WIP)
* Support memory tracking through host exception handling
* Fix some access violations from HLE service guest memory access and CPU
* Fix memory tracking
* Fix mapping list bugs, including a race and a error adding mapping ranges
* Simple page table for memory tracking
* Simple "volatile" region handle mode
* Update UBOs directly (experimental, rough)
* Fix the overlap check
* Only set non-modified buffers as volatile
* Fix some memory tracking issues
* Fix possible race in MapBufferFromClientProcess (block list updates were not locked)
* Write uniform update to memory immediately, only defer the buffer set.
* Fix some memory tracking issues
* Pass correct pages count on shared memory unmap
* Armeilleure Signal Handler v1 + Unix changes
Unix currently behaves like windows, rather than remapping physical
* Actually check if the host platform is unix
* Fix decommit on linux.
* Implement windows 10 placeholder shared memory, fix a buffer issue.
* Make PTC version something that will never match with master
* Remove testing variable for block count
* Add reference count for memory manager, fix dispose
Can still deadlock with OpenAL
* Add address validation, use page table for mapped check, add docs
Might clean up the page table traversing routines.
* Implement batched mapping/tracking.
* Move documentation, fix tests.
* Cleanup uniform buffer update stuff.
* Remove unnecessary assignment.
* Add unsafe host mapped memory switch
On by default. Would be good to turn this off for untrusted code (homebrew, exefs mods) and give the user the option to turn it on manually, though that requires some UI work.
* Remove C# exception handlers
They have issues due to current .NET limitations, so the meilleure one fully replaces them for now.
* Fix MapPhysicalMemory on the software MemoryManager.
* Null check for GetHostAddress, docs
* Add configuration for setting memory manager mode (not in UI yet)
* Add config to UI
* Fix type mismatch on Unix signal handler code emit
* Fix 6GB DRAM mode.
The size can be greater than `uint.MaxValue` when the DRAM is >4GB.
* Address some feedback.
* More detailed error if backing memory cannot be mapped.
* SetLastError on all OS functions for consistency
* Force pages dirty with UBO update instead of setting them directly.
Seems to be much faster across a few games. Need retesting.
* Rebase, configuration rework, fix mem tracking regression
* Fix race in FreePages
* Set memory managers null after decrementing ref count
* Remove readonly keyword, as this is now modified.
* Use a local variable for the signal handler rather than a register.
* Fix bug with buffer resize, and index/uniform buffer binding.
Should fix flickering in games.
* Add InvalidAccessHandler to MemoryTracking
Doesn't do anything yet
* Call invalid access handler on unmapped read/write.
Same rules as the regular memory manager.
* Make unsafe mapped memory its own MemoryManagerType
* Move FlushUboDirty into UpdateState.
* Buffer dirty cache, rather than ubo cache
Much cleaner, may be reusable for Inline2Memory updates.
* This doesn't return anything anymore.
* Add sigaction remove methods, correct a few function signatures.
* Return empty list of physical regions for size 0.
* Also on AddressSpaceManager
Co-authored-by: gdkchan <gab.dark.100@gmail.com>
2021-05-24 21:52:44 +01:00
|
|
|
outerHeader.MemoryManagerMode = GetMemoryManagerMode();
|
2021-04-13 03:24:36 +02:00
|
|
|
outerHeader.OSPlatform = GetOSPlatform();
|
2023-01-12 07:50:45 +00:00
|
|
|
outerHeader.Architecture = (uint)RuntimeInformation.ProcessArchitecture;
|
2021-04-13 03:24:36 +02:00
|
|
|
|
|
|
|
outerHeader.UncompressedStreamSize =
|
|
|
|
(long)Unsafe.SizeOf<InnerHeader>() +
|
|
|
|
innerHeader.InfosLength +
|
|
|
|
innerHeader.CodesLength +
|
|
|
|
innerHeader.RelocsLength +
|
Add multi-level function table (#2228)
* Add AddressTable<T>
* Use AddressTable<T> for dispatch
* Remove JumpTable & co.
* Add fallback for out of range addresses
* Add PPTC support
* Add documentation to `AddressTable<T>`
* Make AddressTable<T> configurable
* Fix table walk
* Fix IsMapped check
* Remove CountTableCapacity
* Add PPTC support for fast path
* Rename IsMapped to IsValid
* Remove stale comment
* Change format of address in exception message
* Add TranslatorStubs
* Split DispatchStub
Avoids recompilation of stubs during tests.
* Add hint for 64bit or 32bit
* Add documentation to `Symbol`
* Add documentation to `TranslatorStubs`
Make `TranslatorStubs` disposable as well.
* Add documentation to `SymbolType`
* Add `AddressTableEventSource` to monitor function table size
Add an EventSource which measures the amount of unmanaged bytes
allocated by AddressTable<T> instances.
dotnet-counters monitor -n Ryujinx --counters ARMeilleure
* Add `AllowLcqInFunctionTable` optimization toggle
This is to reduce the impact this change has on the test duration.
Before everytime a test was ran, the FunctionTable would be initialized
and populated so that the newly compiled test would get registered to
it.
* Implement unmanaged dispatcher
Uses the DispatchStub to dispatch into the next translation, which
allows execution to stay in unmanaged for longer and skips a
ConcurrentDictionary look up when the target translation has been
registered to the FunctionTable.
* Remove redundant null check
* Tune levels of FunctionTable
Uses 5 levels instead of 4 and change unit of AddressTableEventSource
from KB to MB.
* Use 64-bit function table
Improves codegen for direct branches:
mov qword [rax+0x408],0x10603560
- mov rcx,sub_10603560_OFFSET
- mov ecx,[rcx]
- mov ecx,ecx
- mov rdx,JIT_CACHE_BASE
- add rdx,rcx
+ mov rcx,sub_10603560
+ mov rdx,[rcx]
mov rcx,rax
Improves codegen for dispatch stub:
and rax,byte +0x1f
- mov eax,[rcx+rax*4]
- mov eax,eax
- mov rcx,JIT_CACHE_BASE
- lea rax,[rcx+rax]
+ mov rax,[rcx+rax*8]
mov rcx,rbx
* Remove `JitCacheSymbol` & `JitCache.Offset`
* Turn `Translator.Translate` into an instance method
We do not have to add more parameter to this method and related ones as
new structures are added & needed for translation.
* Add symbol only when PTC is enabled
Address LDj3SNuD's feedback
* Change `NativeContext.Running` to a 32-bit integer
* Fix PageTable symbol for host mapped
2021-05-30 01:06:28 +04:00
|
|
|
innerHeader.UnwindInfosLength;
|
2021-04-13 03:24:36 +02:00
|
|
|
|
|
|
|
outerHeader.SetHeaderHash();
|
2021-02-22 03:23:48 +01:00
|
|
|
|
|
|
|
IntPtr intPtr = IntPtr.Zero;
|
|
|
|
|
|
|
|
try
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
2021-04-13 03:24:36 +02:00
|
|
|
intPtr = Marshal.AllocHGlobal(new IntPtr(outerHeader.UncompressedStreamSize));
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
using (UnmanagedMemoryStream stream = new((byte*)intPtr.ToPointer(), outerHeader.UncompressedStreamSize, outerHeader.UncompressedStreamSize, FileAccess.ReadWrite))
|
2021-02-22 03:23:48 +01:00
|
|
|
{
|
2021-04-13 03:24:36 +02:00
|
|
|
stream.Seek((long)Unsafe.SizeOf<InnerHeader>(), SeekOrigin.Begin);
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
ReadOnlySpan<byte> infosBytes = new(stream.PositionPointer, innerHeader.InfosLength);
|
2021-02-22 03:23:48 +01:00
|
|
|
_infosStream.WriteTo(stream);
|
2021-04-13 03:24:36 +02:00
|
|
|
|
|
|
|
ReadOnlySpan<byte> codesBytes = (int)innerHeader.CodesLength > 0 ? new(stream.PositionPointer, (int)innerHeader.CodesLength) : ReadOnlySpan<byte>.Empty;
|
|
|
|
_codesList.WriteTo(stream);
|
|
|
|
|
|
|
|
ReadOnlySpan<byte> relocsBytes = new(stream.PositionPointer, innerHeader.RelocsLength);
|
2021-02-22 03:23:48 +01:00
|
|
|
_relocsStream.WriteTo(stream);
|
2021-04-13 03:24:36 +02:00
|
|
|
|
|
|
|
ReadOnlySpan<byte> unwindInfosBytes = new(stream.PositionPointer, innerHeader.UnwindInfosLength);
|
2021-02-22 03:23:48 +01:00
|
|
|
_unwindInfosStream.WriteTo(stream);
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
Debug.Assert(stream.Position == stream.Length);
|
|
|
|
|
|
|
|
innerHeader.InfosHash = XXHash128.ComputeHash(infosBytes);
|
|
|
|
innerHeader.CodesHash = XXHash128.ComputeHash(codesBytes);
|
|
|
|
innerHeader.RelocsHash = XXHash128.ComputeHash(relocsBytes);
|
|
|
|
innerHeader.UnwindInfosHash = XXHash128.ComputeHash(unwindInfosBytes);
|
|
|
|
|
|
|
|
innerHeader.SetHeaderHash();
|
2021-01-12 19:04:02 +01:00
|
|
|
|
2021-02-22 03:23:48 +01:00
|
|
|
stream.Seek(0L, SeekOrigin.Begin);
|
2021-04-13 03:24:36 +02:00
|
|
|
SerializeStructure(stream, innerHeader);
|
2021-01-12 19:04:02 +01:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
translatedFuncsCount = GetEntriesCount();
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
ResetCarriersIfNeeded();
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-02-22 03:23:48 +01:00
|
|
|
using (FileStream compressedStream = new(fileName, FileMode.OpenOrCreate))
|
|
|
|
using (DeflateStream deflateStream = new(compressedStream, SaveCompressionLevel, true))
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
2021-02-22 03:23:48 +01:00
|
|
|
try
|
|
|
|
{
|
2021-04-13 03:24:36 +02:00
|
|
|
SerializeStructure(compressedStream, outerHeader);
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-02-22 03:23:48 +01:00
|
|
|
stream.Seek(0L, SeekOrigin.Begin);
|
|
|
|
stream.CopyTo(deflateStream);
|
|
|
|
}
|
|
|
|
catch
|
|
|
|
{
|
|
|
|
compressedStream.Position = 0L;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (compressedStream.Position < compressedStream.Length)
|
|
|
|
{
|
|
|
|
compressedStream.SetLength(compressedStream.Position);
|
|
|
|
}
|
2020-06-16 20:28:02 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-02-22 03:23:48 +01:00
|
|
|
finally
|
|
|
|
{
|
|
|
|
if (intPtr != IntPtr.Zero)
|
|
|
|
{
|
|
|
|
Marshal.FreeHGlobal(intPtr);
|
|
|
|
}
|
|
|
|
}
|
2020-12-17 20:32:09 +01:00
|
|
|
|
|
|
|
long fileSize = new FileInfo(fileName).Length;
|
|
|
|
|
2021-02-22 03:23:48 +01:00
|
|
|
if (fileSize != 0L)
|
|
|
|
{
|
|
|
|
Logger.Info?.Print(LogClass.Ptc, $"Saved Translation Cache (size: {fileSize} bytes, translated functions: {translatedFuncsCount}).");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
public void LoadTranslations(Translator translator)
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
2021-04-13 03:24:36 +02:00
|
|
|
if (AreCarriersEmpty())
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2021-05-13 20:05:15 +02:00
|
|
|
long infosStreamLength = _infosStream.Length;
|
|
|
|
long relocsStreamLength = _relocsStream.Length;
|
|
|
|
long unwindInfosStreamLength = _unwindInfosStream.Length;
|
|
|
|
|
2020-06-16 20:28:02 +02:00
|
|
|
_infosStream.Seek(0L, SeekOrigin.Begin);
|
|
|
|
_relocsStream.Seek(0L, SeekOrigin.Begin);
|
|
|
|
_unwindInfosStream.Seek(0L, SeekOrigin.Begin);
|
|
|
|
|
2021-02-22 03:23:48 +01:00
|
|
|
using (BinaryReader relocsReader = new(_relocsStream, EncodingCache.UTF8NoBOM, true))
|
|
|
|
using (BinaryReader unwindInfosReader = new(_unwindInfosStream, EncodingCache.UTF8NoBOM, true))
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
2021-04-13 03:24:36 +02:00
|
|
|
for (int index = 0; index < GetEntriesCount(); index++)
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
2021-05-13 20:05:15 +02:00
|
|
|
InfoEntry infoEntry = DeserializeStructure<InfoEntry>(_infosStream);
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2020-12-17 20:32:09 +01:00
|
|
|
if (infoEntry.Stubbed)
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
2021-04-13 03:24:36 +02:00
|
|
|
SkipCode(index, infoEntry.CodeLength);
|
2020-12-17 20:32:09 +01:00
|
|
|
SkipReloc(infoEntry.RelocEntriesCount);
|
|
|
|
SkipUnwindInfo(unwindInfosReader);
|
2021-05-13 20:05:15 +02:00
|
|
|
|
|
|
|
continue;
|
2020-06-16 20:28:02 +02:00
|
|
|
}
|
2021-05-13 20:05:15 +02:00
|
|
|
|
Add multi-level function table (#2228)
* Add AddressTable<T>
* Use AddressTable<T> for dispatch
* Remove JumpTable & co.
* Add fallback for out of range addresses
* Add PPTC support
* Add documentation to `AddressTable<T>`
* Make AddressTable<T> configurable
* Fix table walk
* Fix IsMapped check
* Remove CountTableCapacity
* Add PPTC support for fast path
* Rename IsMapped to IsValid
* Remove stale comment
* Change format of address in exception message
* Add TranslatorStubs
* Split DispatchStub
Avoids recompilation of stubs during tests.
* Add hint for 64bit or 32bit
* Add documentation to `Symbol`
* Add documentation to `TranslatorStubs`
Make `TranslatorStubs` disposable as well.
* Add documentation to `SymbolType`
* Add `AddressTableEventSource` to monitor function table size
Add an EventSource which measures the amount of unmanaged bytes
allocated by AddressTable<T> instances.
dotnet-counters monitor -n Ryujinx --counters ARMeilleure
* Add `AllowLcqInFunctionTable` optimization toggle
This is to reduce the impact this change has on the test duration.
Before everytime a test was ran, the FunctionTable would be initialized
and populated so that the newly compiled test would get registered to
it.
* Implement unmanaged dispatcher
Uses the DispatchStub to dispatch into the next translation, which
allows execution to stay in unmanaged for longer and skips a
ConcurrentDictionary look up when the target translation has been
registered to the FunctionTable.
* Remove redundant null check
* Tune levels of FunctionTable
Uses 5 levels instead of 4 and change unit of AddressTableEventSource
from KB to MB.
* Use 64-bit function table
Improves codegen for direct branches:
mov qword [rax+0x408],0x10603560
- mov rcx,sub_10603560_OFFSET
- mov ecx,[rcx]
- mov ecx,ecx
- mov rdx,JIT_CACHE_BASE
- add rdx,rcx
+ mov rcx,sub_10603560
+ mov rdx,[rcx]
mov rcx,rax
Improves codegen for dispatch stub:
and rax,byte +0x1f
- mov eax,[rcx+rax*4]
- mov eax,eax
- mov rcx,JIT_CACHE_BASE
- lea rax,[rcx+rax]
+ mov rax,[rcx+rax*8]
mov rcx,rbx
* Remove `JitCacheSymbol` & `JitCache.Offset`
* Turn `Translator.Translate` into an instance method
We do not have to add more parameter to this method and related ones as
new structures are added & needed for translation.
* Add symbol only when PTC is enabled
Address LDj3SNuD's feedback
* Change `NativeContext.Running` to a 32-bit integer
* Fix PageTable symbol for host mapped
2021-05-30 01:06:28 +04:00
|
|
|
bool isEntryChanged = infoEntry.Hash != ComputeHash(translator.Memory, infoEntry.Address, infoEntry.GuestSize);
|
2021-05-13 20:05:15 +02:00
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
if (isEntryChanged || (!infoEntry.HighCq && Profiler.ProfiledFuncs.TryGetValue(infoEntry.Address, out var value) && value.HighCq))
|
2020-12-17 20:32:09 +01:00
|
|
|
{
|
2021-05-13 20:05:15 +02:00
|
|
|
infoEntry.Stubbed = true;
|
|
|
|
infoEntry.CodeLength = 0;
|
|
|
|
UpdateInfo(infoEntry);
|
2020-12-17 20:32:09 +01:00
|
|
|
|
2021-05-13 20:05:15 +02:00
|
|
|
StubCode(index);
|
|
|
|
StubReloc(infoEntry.RelocEntriesCount);
|
|
|
|
StubUnwindInfo(unwindInfosReader);
|
2021-04-19 01:43:53 +04:00
|
|
|
|
2021-05-13 20:05:15 +02:00
|
|
|
if (isEntryChanged)
|
2020-12-17 20:32:09 +01:00
|
|
|
{
|
2021-05-13 20:05:15 +02:00
|
|
|
Logger.Info?.Print(LogClass.Ptc, $"Invalidated translated function (address: 0x{infoEntry.Address:X16})");
|
2020-12-17 20:32:09 +01:00
|
|
|
}
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-05-13 20:05:15 +02:00
|
|
|
continue;
|
|
|
|
}
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-05-13 20:05:15 +02:00
|
|
|
byte[] code = ReadCode(index, infoEntry.CodeLength);
|
2020-12-17 20:32:09 +01:00
|
|
|
|
2021-05-13 20:05:15 +02:00
|
|
|
Counter<uint> callCounter = null;
|
2020-12-17 20:32:09 +01:00
|
|
|
|
2021-05-13 20:05:15 +02:00
|
|
|
if (infoEntry.RelocEntriesCount != 0)
|
2020-12-17 20:32:09 +01:00
|
|
|
{
|
2021-05-13 20:05:15 +02:00
|
|
|
RelocEntry[] relocEntries = GetRelocEntries(relocsReader, infoEntry.RelocEntriesCount);
|
2020-12-17 20:32:09 +01:00
|
|
|
|
Add multi-level function table (#2228)
* Add AddressTable<T>
* Use AddressTable<T> for dispatch
* Remove JumpTable & co.
* Add fallback for out of range addresses
* Add PPTC support
* Add documentation to `AddressTable<T>`
* Make AddressTable<T> configurable
* Fix table walk
* Fix IsMapped check
* Remove CountTableCapacity
* Add PPTC support for fast path
* Rename IsMapped to IsValid
* Remove stale comment
* Change format of address in exception message
* Add TranslatorStubs
* Split DispatchStub
Avoids recompilation of stubs during tests.
* Add hint for 64bit or 32bit
* Add documentation to `Symbol`
* Add documentation to `TranslatorStubs`
Make `TranslatorStubs` disposable as well.
* Add documentation to `SymbolType`
* Add `AddressTableEventSource` to monitor function table size
Add an EventSource which measures the amount of unmanaged bytes
allocated by AddressTable<T> instances.
dotnet-counters monitor -n Ryujinx --counters ARMeilleure
* Add `AllowLcqInFunctionTable` optimization toggle
This is to reduce the impact this change has on the test duration.
Before everytime a test was ran, the FunctionTable would be initialized
and populated so that the newly compiled test would get registered to
it.
* Implement unmanaged dispatcher
Uses the DispatchStub to dispatch into the next translation, which
allows execution to stay in unmanaged for longer and skips a
ConcurrentDictionary look up when the target translation has been
registered to the FunctionTable.
* Remove redundant null check
* Tune levels of FunctionTable
Uses 5 levels instead of 4 and change unit of AddressTableEventSource
from KB to MB.
* Use 64-bit function table
Improves codegen for direct branches:
mov qword [rax+0x408],0x10603560
- mov rcx,sub_10603560_OFFSET
- mov ecx,[rcx]
- mov ecx,ecx
- mov rdx,JIT_CACHE_BASE
- add rdx,rcx
+ mov rcx,sub_10603560
+ mov rdx,[rcx]
mov rcx,rax
Improves codegen for dispatch stub:
and rax,byte +0x1f
- mov eax,[rcx+rax*4]
- mov eax,eax
- mov rcx,JIT_CACHE_BASE
- lea rax,[rcx+rax]
+ mov rax,[rcx+rax*8]
mov rcx,rbx
* Remove `JitCacheSymbol` & `JitCache.Offset`
* Turn `Translator.Translate` into an instance method
We do not have to add more parameter to this method and related ones as
new structures are added & needed for translation.
* Add symbol only when PTC is enabled
Address LDj3SNuD's feedback
* Change `NativeContext.Running` to a 32-bit integer
* Fix PageTable symbol for host mapped
2021-05-30 01:06:28 +04:00
|
|
|
PatchCode(translator, code, relocEntries, out callCounter);
|
2020-12-17 20:32:09 +01:00
|
|
|
}
|
2021-05-13 20:05:15 +02:00
|
|
|
|
|
|
|
UnwindInfo unwindInfo = ReadUnwindInfo(unwindInfosReader);
|
|
|
|
|
|
|
|
TranslatedFunction func = FastTranslate(code, callCounter, infoEntry.GuestSize, unwindInfo, infoEntry.HighCq);
|
|
|
|
|
Add multi-level function table (#2228)
* Add AddressTable<T>
* Use AddressTable<T> for dispatch
* Remove JumpTable & co.
* Add fallback for out of range addresses
* Add PPTC support
* Add documentation to `AddressTable<T>`
* Make AddressTable<T> configurable
* Fix table walk
* Fix IsMapped check
* Remove CountTableCapacity
* Add PPTC support for fast path
* Rename IsMapped to IsValid
* Remove stale comment
* Change format of address in exception message
* Add TranslatorStubs
* Split DispatchStub
Avoids recompilation of stubs during tests.
* Add hint for 64bit or 32bit
* Add documentation to `Symbol`
* Add documentation to `TranslatorStubs`
Make `TranslatorStubs` disposable as well.
* Add documentation to `SymbolType`
* Add `AddressTableEventSource` to monitor function table size
Add an EventSource which measures the amount of unmanaged bytes
allocated by AddressTable<T> instances.
dotnet-counters monitor -n Ryujinx --counters ARMeilleure
* Add `AllowLcqInFunctionTable` optimization toggle
This is to reduce the impact this change has on the test duration.
Before everytime a test was ran, the FunctionTable would be initialized
and populated so that the newly compiled test would get registered to
it.
* Implement unmanaged dispatcher
Uses the DispatchStub to dispatch into the next translation, which
allows execution to stay in unmanaged for longer and skips a
ConcurrentDictionary look up when the target translation has been
registered to the FunctionTable.
* Remove redundant null check
* Tune levels of FunctionTable
Uses 5 levels instead of 4 and change unit of AddressTableEventSource
from KB to MB.
* Use 64-bit function table
Improves codegen for direct branches:
mov qword [rax+0x408],0x10603560
- mov rcx,sub_10603560_OFFSET
- mov ecx,[rcx]
- mov ecx,ecx
- mov rdx,JIT_CACHE_BASE
- add rdx,rcx
+ mov rcx,sub_10603560
+ mov rdx,[rcx]
mov rcx,rax
Improves codegen for dispatch stub:
and rax,byte +0x1f
- mov eax,[rcx+rax*4]
- mov eax,eax
- mov rcx,JIT_CACHE_BASE
- lea rax,[rcx+rax]
+ mov rax,[rcx+rax*8]
mov rcx,rbx
* Remove `JitCacheSymbol` & `JitCache.Offset`
* Turn `Translator.Translate` into an instance method
We do not have to add more parameter to this method and related ones as
new structures are added & needed for translation.
* Add symbol only when PTC is enabled
Address LDj3SNuD's feedback
* Change `NativeContext.Running` to a 32-bit integer
* Fix PageTable symbol for host mapped
2021-05-30 01:06:28 +04:00
|
|
|
translator.RegisterFunction(infoEntry.Address, func);
|
|
|
|
|
2022-02-17 22:53:18 -03:00
|
|
|
bool isAddressUnique = translator.Functions.TryAdd(infoEntry.Address, infoEntry.GuestSize, func);
|
2021-05-13 20:05:15 +02:00
|
|
|
|
|
|
|
Debug.Assert(isAddressUnique, $"The address 0x{infoEntry.Address:X16} is not unique.");
|
2020-06-16 20:28:02 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-05-13 20:05:15 +02:00
|
|
|
if (_infosStream.Length != infosStreamLength || _infosStream.Position != infosStreamLength ||
|
|
|
|
_relocsStream.Length != relocsStreamLength || _relocsStream.Position != relocsStreamLength ||
|
|
|
|
_unwindInfosStream.Length != unwindInfosStreamLength || _unwindInfosStream.Position != unwindInfosStreamLength)
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
2021-05-13 20:05:15 +02:00
|
|
|
throw new Exception("The length of a memory stream has changed, or its position has not reached or has exceeded its end.");
|
2020-06-16 20:28:02 +02:00
|
|
|
}
|
|
|
|
|
Add multi-level function table (#2228)
* Add AddressTable<T>
* Use AddressTable<T> for dispatch
* Remove JumpTable & co.
* Add fallback for out of range addresses
* Add PPTC support
* Add documentation to `AddressTable<T>`
* Make AddressTable<T> configurable
* Fix table walk
* Fix IsMapped check
* Remove CountTableCapacity
* Add PPTC support for fast path
* Rename IsMapped to IsValid
* Remove stale comment
* Change format of address in exception message
* Add TranslatorStubs
* Split DispatchStub
Avoids recompilation of stubs during tests.
* Add hint for 64bit or 32bit
* Add documentation to `Symbol`
* Add documentation to `TranslatorStubs`
Make `TranslatorStubs` disposable as well.
* Add documentation to `SymbolType`
* Add `AddressTableEventSource` to monitor function table size
Add an EventSource which measures the amount of unmanaged bytes
allocated by AddressTable<T> instances.
dotnet-counters monitor -n Ryujinx --counters ARMeilleure
* Add `AllowLcqInFunctionTable` optimization toggle
This is to reduce the impact this change has on the test duration.
Before everytime a test was ran, the FunctionTable would be initialized
and populated so that the newly compiled test would get registered to
it.
* Implement unmanaged dispatcher
Uses the DispatchStub to dispatch into the next translation, which
allows execution to stay in unmanaged for longer and skips a
ConcurrentDictionary look up when the target translation has been
registered to the FunctionTable.
* Remove redundant null check
* Tune levels of FunctionTable
Uses 5 levels instead of 4 and change unit of AddressTableEventSource
from KB to MB.
* Use 64-bit function table
Improves codegen for direct branches:
mov qword [rax+0x408],0x10603560
- mov rcx,sub_10603560_OFFSET
- mov ecx,[rcx]
- mov ecx,ecx
- mov rdx,JIT_CACHE_BASE
- add rdx,rcx
+ mov rcx,sub_10603560
+ mov rdx,[rcx]
mov rcx,rax
Improves codegen for dispatch stub:
and rax,byte +0x1f
- mov eax,[rcx+rax*4]
- mov eax,eax
- mov rcx,JIT_CACHE_BASE
- lea rax,[rcx+rax]
+ mov rax,[rcx+rax*8]
mov rcx,rbx
* Remove `JitCacheSymbol` & `JitCache.Offset`
* Turn `Translator.Translate` into an instance method
We do not have to add more parameter to this method and related ones as
new structures are added & needed for translation.
* Add symbol only when PTC is enabled
Address LDj3SNuD's feedback
* Change `NativeContext.Running` to a 32-bit integer
* Fix PageTable symbol for host mapped
2021-05-30 01:06:28 +04:00
|
|
|
Logger.Info?.Print(LogClass.Ptc, $"{translator.Functions.Count} translated functions loaded");
|
2020-12-17 20:32:09 +01:00
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
private int GetEntriesCount()
|
2020-12-17 20:32:09 +01:00
|
|
|
{
|
2021-04-13 03:24:36 +02:00
|
|
|
return _codesList.Count;
|
2020-06-16 20:28:02 +02:00
|
|
|
}
|
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
[Conditional("DEBUG")]
|
2023-01-04 20:01:44 -03:00
|
|
|
private void SkipCode(int index, int codeLength)
|
2020-12-17 20:32:09 +01:00
|
|
|
{
|
2021-04-13 03:24:36 +02:00
|
|
|
Debug.Assert(_codesList[index].Length == 0);
|
|
|
|
Debug.Assert(codeLength == 0);
|
2020-12-17 20:32:09 +01:00
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
private void SkipReloc(int relocEntriesCount)
|
2020-12-17 20:32:09 +01:00
|
|
|
{
|
|
|
|
_relocsStream.Seek(relocEntriesCount * RelocEntry.Stride, SeekOrigin.Current);
|
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
private void SkipUnwindInfo(BinaryReader unwindInfosReader)
|
2020-12-17 20:32:09 +01:00
|
|
|
{
|
|
|
|
int pushEntriesLength = unwindInfosReader.ReadInt32();
|
|
|
|
|
|
|
|
_unwindInfosStream.Seek(pushEntriesLength * UnwindPushEntry.Stride + UnwindInfo.Stride, SeekOrigin.Current);
|
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
private byte[] ReadCode(int index, int codeLength)
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
2021-04-13 03:24:36 +02:00
|
|
|
Debug.Assert(_codesList[index].Length == codeLength);
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
return _codesList[index];
|
2020-06-16 20:28:02 +02:00
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
private RelocEntry[] GetRelocEntries(BinaryReader relocsReader, int relocEntriesCount)
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
|
|
|
RelocEntry[] relocEntries = new RelocEntry[relocEntriesCount];
|
|
|
|
|
|
|
|
for (int i = 0; i < relocEntriesCount; i++)
|
|
|
|
{
|
|
|
|
int position = relocsReader.ReadInt32();
|
Add multi-level function table (#2228)
* Add AddressTable<T>
* Use AddressTable<T> for dispatch
* Remove JumpTable & co.
* Add fallback for out of range addresses
* Add PPTC support
* Add documentation to `AddressTable<T>`
* Make AddressTable<T> configurable
* Fix table walk
* Fix IsMapped check
* Remove CountTableCapacity
* Add PPTC support for fast path
* Rename IsMapped to IsValid
* Remove stale comment
* Change format of address in exception message
* Add TranslatorStubs
* Split DispatchStub
Avoids recompilation of stubs during tests.
* Add hint for 64bit or 32bit
* Add documentation to `Symbol`
* Add documentation to `TranslatorStubs`
Make `TranslatorStubs` disposable as well.
* Add documentation to `SymbolType`
* Add `AddressTableEventSource` to monitor function table size
Add an EventSource which measures the amount of unmanaged bytes
allocated by AddressTable<T> instances.
dotnet-counters monitor -n Ryujinx --counters ARMeilleure
* Add `AllowLcqInFunctionTable` optimization toggle
This is to reduce the impact this change has on the test duration.
Before everytime a test was ran, the FunctionTable would be initialized
and populated so that the newly compiled test would get registered to
it.
* Implement unmanaged dispatcher
Uses the DispatchStub to dispatch into the next translation, which
allows execution to stay in unmanaged for longer and skips a
ConcurrentDictionary look up when the target translation has been
registered to the FunctionTable.
* Remove redundant null check
* Tune levels of FunctionTable
Uses 5 levels instead of 4 and change unit of AddressTableEventSource
from KB to MB.
* Use 64-bit function table
Improves codegen for direct branches:
mov qword [rax+0x408],0x10603560
- mov rcx,sub_10603560_OFFSET
- mov ecx,[rcx]
- mov ecx,ecx
- mov rdx,JIT_CACHE_BASE
- add rdx,rcx
+ mov rcx,sub_10603560
+ mov rdx,[rcx]
mov rcx,rax
Improves codegen for dispatch stub:
and rax,byte +0x1f
- mov eax,[rcx+rax*4]
- mov eax,eax
- mov rcx,JIT_CACHE_BASE
- lea rax,[rcx+rax]
+ mov rax,[rcx+rax*8]
mov rcx,rbx
* Remove `JitCacheSymbol` & `JitCache.Offset`
* Turn `Translator.Translate` into an instance method
We do not have to add more parameter to this method and related ones as
new structures are added & needed for translation.
* Add symbol only when PTC is enabled
Address LDj3SNuD's feedback
* Change `NativeContext.Running` to a 32-bit integer
* Fix PageTable symbol for host mapped
2021-05-30 01:06:28 +04:00
|
|
|
SymbolType type = (SymbolType)relocsReader.ReadByte();
|
|
|
|
ulong value = relocsReader.ReadUInt64();
|
2020-06-16 20:28:02 +02:00
|
|
|
|
Add multi-level function table (#2228)
* Add AddressTable<T>
* Use AddressTable<T> for dispatch
* Remove JumpTable & co.
* Add fallback for out of range addresses
* Add PPTC support
* Add documentation to `AddressTable<T>`
* Make AddressTable<T> configurable
* Fix table walk
* Fix IsMapped check
* Remove CountTableCapacity
* Add PPTC support for fast path
* Rename IsMapped to IsValid
* Remove stale comment
* Change format of address in exception message
* Add TranslatorStubs
* Split DispatchStub
Avoids recompilation of stubs during tests.
* Add hint for 64bit or 32bit
* Add documentation to `Symbol`
* Add documentation to `TranslatorStubs`
Make `TranslatorStubs` disposable as well.
* Add documentation to `SymbolType`
* Add `AddressTableEventSource` to monitor function table size
Add an EventSource which measures the amount of unmanaged bytes
allocated by AddressTable<T> instances.
dotnet-counters monitor -n Ryujinx --counters ARMeilleure
* Add `AllowLcqInFunctionTable` optimization toggle
This is to reduce the impact this change has on the test duration.
Before everytime a test was ran, the FunctionTable would be initialized
and populated so that the newly compiled test would get registered to
it.
* Implement unmanaged dispatcher
Uses the DispatchStub to dispatch into the next translation, which
allows execution to stay in unmanaged for longer and skips a
ConcurrentDictionary look up when the target translation has been
registered to the FunctionTable.
* Remove redundant null check
* Tune levels of FunctionTable
Uses 5 levels instead of 4 and change unit of AddressTableEventSource
from KB to MB.
* Use 64-bit function table
Improves codegen for direct branches:
mov qword [rax+0x408],0x10603560
- mov rcx,sub_10603560_OFFSET
- mov ecx,[rcx]
- mov ecx,ecx
- mov rdx,JIT_CACHE_BASE
- add rdx,rcx
+ mov rcx,sub_10603560
+ mov rdx,[rcx]
mov rcx,rax
Improves codegen for dispatch stub:
and rax,byte +0x1f
- mov eax,[rcx+rax*4]
- mov eax,eax
- mov rcx,JIT_CACHE_BASE
- lea rax,[rcx+rax]
+ mov rax,[rcx+rax*8]
mov rcx,rbx
* Remove `JitCacheSymbol` & `JitCache.Offset`
* Turn `Translator.Translate` into an instance method
We do not have to add more parameter to this method and related ones as
new structures are added & needed for translation.
* Add symbol only when PTC is enabled
Address LDj3SNuD's feedback
* Change `NativeContext.Running` to a 32-bit integer
* Fix PageTable symbol for host mapped
2021-05-30 01:06:28 +04:00
|
|
|
relocEntries[i] = new RelocEntry(position, new Symbol(type, value));
|
2020-06-16 20:28:02 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return relocEntries;
|
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
private void PatchCode(Translator translator, Span<byte> code, RelocEntry[] relocEntries, out Counter<uint> callCounter)
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
2021-04-19 01:43:53 +04:00
|
|
|
callCounter = null;
|
|
|
|
|
2020-06-16 20:28:02 +02:00
|
|
|
foreach (RelocEntry relocEntry in relocEntries)
|
|
|
|
{
|
Add multi-level function table (#2228)
* Add AddressTable<T>
* Use AddressTable<T> for dispatch
* Remove JumpTable & co.
* Add fallback for out of range addresses
* Add PPTC support
* Add documentation to `AddressTable<T>`
* Make AddressTable<T> configurable
* Fix table walk
* Fix IsMapped check
* Remove CountTableCapacity
* Add PPTC support for fast path
* Rename IsMapped to IsValid
* Remove stale comment
* Change format of address in exception message
* Add TranslatorStubs
* Split DispatchStub
Avoids recompilation of stubs during tests.
* Add hint for 64bit or 32bit
* Add documentation to `Symbol`
* Add documentation to `TranslatorStubs`
Make `TranslatorStubs` disposable as well.
* Add documentation to `SymbolType`
* Add `AddressTableEventSource` to monitor function table size
Add an EventSource which measures the amount of unmanaged bytes
allocated by AddressTable<T> instances.
dotnet-counters monitor -n Ryujinx --counters ARMeilleure
* Add `AllowLcqInFunctionTable` optimization toggle
This is to reduce the impact this change has on the test duration.
Before everytime a test was ran, the FunctionTable would be initialized
and populated so that the newly compiled test would get registered to
it.
* Implement unmanaged dispatcher
Uses the DispatchStub to dispatch into the next translation, which
allows execution to stay in unmanaged for longer and skips a
ConcurrentDictionary look up when the target translation has been
registered to the FunctionTable.
* Remove redundant null check
* Tune levels of FunctionTable
Uses 5 levels instead of 4 and change unit of AddressTableEventSource
from KB to MB.
* Use 64-bit function table
Improves codegen for direct branches:
mov qword [rax+0x408],0x10603560
- mov rcx,sub_10603560_OFFSET
- mov ecx,[rcx]
- mov ecx,ecx
- mov rdx,JIT_CACHE_BASE
- add rdx,rcx
+ mov rcx,sub_10603560
+ mov rdx,[rcx]
mov rcx,rax
Improves codegen for dispatch stub:
and rax,byte +0x1f
- mov eax,[rcx+rax*4]
- mov eax,eax
- mov rcx,JIT_CACHE_BASE
- lea rax,[rcx+rax]
+ mov rax,[rcx+rax*8]
mov rcx,rbx
* Remove `JitCacheSymbol` & `JitCache.Offset`
* Turn `Translator.Translate` into an instance method
We do not have to add more parameter to this method and related ones as
new structures are added & needed for translation.
* Add symbol only when PTC is enabled
Address LDj3SNuD's feedback
* Change `NativeContext.Running` to a 32-bit integer
* Fix PageTable symbol for host mapped
2021-05-30 01:06:28 +04:00
|
|
|
IntPtr? imm = null;
|
|
|
|
Symbol symbol = relocEntry.Symbol;
|
2020-06-16 20:28:02 +02:00
|
|
|
|
Add multi-level function table (#2228)
* Add AddressTable<T>
* Use AddressTable<T> for dispatch
* Remove JumpTable & co.
* Add fallback for out of range addresses
* Add PPTC support
* Add documentation to `AddressTable<T>`
* Make AddressTable<T> configurable
* Fix table walk
* Fix IsMapped check
* Remove CountTableCapacity
* Add PPTC support for fast path
* Rename IsMapped to IsValid
* Remove stale comment
* Change format of address in exception message
* Add TranslatorStubs
* Split DispatchStub
Avoids recompilation of stubs during tests.
* Add hint for 64bit or 32bit
* Add documentation to `Symbol`
* Add documentation to `TranslatorStubs`
Make `TranslatorStubs` disposable as well.
* Add documentation to `SymbolType`
* Add `AddressTableEventSource` to monitor function table size
Add an EventSource which measures the amount of unmanaged bytes
allocated by AddressTable<T> instances.
dotnet-counters monitor -n Ryujinx --counters ARMeilleure
* Add `AllowLcqInFunctionTable` optimization toggle
This is to reduce the impact this change has on the test duration.
Before everytime a test was ran, the FunctionTable would be initialized
and populated so that the newly compiled test would get registered to
it.
* Implement unmanaged dispatcher
Uses the DispatchStub to dispatch into the next translation, which
allows execution to stay in unmanaged for longer and skips a
ConcurrentDictionary look up when the target translation has been
registered to the FunctionTable.
* Remove redundant null check
* Tune levels of FunctionTable
Uses 5 levels instead of 4 and change unit of AddressTableEventSource
from KB to MB.
* Use 64-bit function table
Improves codegen for direct branches:
mov qword [rax+0x408],0x10603560
- mov rcx,sub_10603560_OFFSET
- mov ecx,[rcx]
- mov ecx,ecx
- mov rdx,JIT_CACHE_BASE
- add rdx,rcx
+ mov rcx,sub_10603560
+ mov rdx,[rcx]
mov rcx,rax
Improves codegen for dispatch stub:
and rax,byte +0x1f
- mov eax,[rcx+rax*4]
- mov eax,eax
- mov rcx,JIT_CACHE_BASE
- lea rax,[rcx+rax]
+ mov rax,[rcx+rax*8]
mov rcx,rbx
* Remove `JitCacheSymbol` & `JitCache.Offset`
* Turn `Translator.Translate` into an instance method
We do not have to add more parameter to this method and related ones as
new structures are added & needed for translation.
* Add symbol only when PTC is enabled
Address LDj3SNuD's feedback
* Change `NativeContext.Running` to a 32-bit integer
* Fix PageTable symbol for host mapped
2021-05-30 01:06:28 +04:00
|
|
|
if (symbol.Type == SymbolType.FunctionTable)
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
Add multi-level function table (#2228)
* Add AddressTable<T>
* Use AddressTable<T> for dispatch
* Remove JumpTable & co.
* Add fallback for out of range addresses
* Add PPTC support
* Add documentation to `AddressTable<T>`
* Make AddressTable<T> configurable
* Fix table walk
* Fix IsMapped check
* Remove CountTableCapacity
* Add PPTC support for fast path
* Rename IsMapped to IsValid
* Remove stale comment
* Change format of address in exception message
* Add TranslatorStubs
* Split DispatchStub
Avoids recompilation of stubs during tests.
* Add hint for 64bit or 32bit
* Add documentation to `Symbol`
* Add documentation to `TranslatorStubs`
Make `TranslatorStubs` disposable as well.
* Add documentation to `SymbolType`
* Add `AddressTableEventSource` to monitor function table size
Add an EventSource which measures the amount of unmanaged bytes
allocated by AddressTable<T> instances.
dotnet-counters monitor -n Ryujinx --counters ARMeilleure
* Add `AllowLcqInFunctionTable` optimization toggle
This is to reduce the impact this change has on the test duration.
Before everytime a test was ran, the FunctionTable would be initialized
and populated so that the newly compiled test would get registered to
it.
* Implement unmanaged dispatcher
Uses the DispatchStub to dispatch into the next translation, which
allows execution to stay in unmanaged for longer and skips a
ConcurrentDictionary look up when the target translation has been
registered to the FunctionTable.
* Remove redundant null check
* Tune levels of FunctionTable
Uses 5 levels instead of 4 and change unit of AddressTableEventSource
from KB to MB.
* Use 64-bit function table
Improves codegen for direct branches:
mov qword [rax+0x408],0x10603560
- mov rcx,sub_10603560_OFFSET
- mov ecx,[rcx]
- mov ecx,ecx
- mov rdx,JIT_CACHE_BASE
- add rdx,rcx
+ mov rcx,sub_10603560
+ mov rdx,[rcx]
mov rcx,rax
Improves codegen for dispatch stub:
and rax,byte +0x1f
- mov eax,[rcx+rax*4]
- mov eax,eax
- mov rcx,JIT_CACHE_BASE
- lea rax,[rcx+rax]
+ mov rax,[rcx+rax*8]
mov rcx,rbx
* Remove `JitCacheSymbol` & `JitCache.Offset`
* Turn `Translator.Translate` into an instance method
We do not have to add more parameter to this method and related ones as
new structures are added & needed for translation.
* Add symbol only when PTC is enabled
Address LDj3SNuD's feedback
* Change `NativeContext.Running` to a 32-bit integer
* Fix PageTable symbol for host mapped
2021-05-30 01:06:28 +04:00
|
|
|
ulong guestAddress = symbol.Value;
|
|
|
|
|
|
|
|
if (translator.FunctionTable.IsValid(guestAddress))
|
|
|
|
{
|
|
|
|
unsafe { imm = (IntPtr)Unsafe.AsPointer(ref translator.FunctionTable.GetValue(guestAddress)); }
|
|
|
|
}
|
2020-06-16 20:28:02 +02:00
|
|
|
}
|
Add multi-level function table (#2228)
* Add AddressTable<T>
* Use AddressTable<T> for dispatch
* Remove JumpTable & co.
* Add fallback for out of range addresses
* Add PPTC support
* Add documentation to `AddressTable<T>`
* Make AddressTable<T> configurable
* Fix table walk
* Fix IsMapped check
* Remove CountTableCapacity
* Add PPTC support for fast path
* Rename IsMapped to IsValid
* Remove stale comment
* Change format of address in exception message
* Add TranslatorStubs
* Split DispatchStub
Avoids recompilation of stubs during tests.
* Add hint for 64bit or 32bit
* Add documentation to `Symbol`
* Add documentation to `TranslatorStubs`
Make `TranslatorStubs` disposable as well.
* Add documentation to `SymbolType`
* Add `AddressTableEventSource` to monitor function table size
Add an EventSource which measures the amount of unmanaged bytes
allocated by AddressTable<T> instances.
dotnet-counters monitor -n Ryujinx --counters ARMeilleure
* Add `AllowLcqInFunctionTable` optimization toggle
This is to reduce the impact this change has on the test duration.
Before everytime a test was ran, the FunctionTable would be initialized
and populated so that the newly compiled test would get registered to
it.
* Implement unmanaged dispatcher
Uses the DispatchStub to dispatch into the next translation, which
allows execution to stay in unmanaged for longer and skips a
ConcurrentDictionary look up when the target translation has been
registered to the FunctionTable.
* Remove redundant null check
* Tune levels of FunctionTable
Uses 5 levels instead of 4 and change unit of AddressTableEventSource
from KB to MB.
* Use 64-bit function table
Improves codegen for direct branches:
mov qword [rax+0x408],0x10603560
- mov rcx,sub_10603560_OFFSET
- mov ecx,[rcx]
- mov ecx,ecx
- mov rdx,JIT_CACHE_BASE
- add rdx,rcx
+ mov rcx,sub_10603560
+ mov rdx,[rcx]
mov rcx,rax
Improves codegen for dispatch stub:
and rax,byte +0x1f
- mov eax,[rcx+rax*4]
- mov eax,eax
- mov rcx,JIT_CACHE_BASE
- lea rax,[rcx+rax]
+ mov rax,[rcx+rax*8]
mov rcx,rbx
* Remove `JitCacheSymbol` & `JitCache.Offset`
* Turn `Translator.Translate` into an instance method
We do not have to add more parameter to this method and related ones as
new structures are added & needed for translation.
* Add symbol only when PTC is enabled
Address LDj3SNuD's feedback
* Change `NativeContext.Running` to a 32-bit integer
* Fix PageTable symbol for host mapped
2021-05-30 01:06:28 +04:00
|
|
|
else if (symbol.Type == SymbolType.DelegateTable)
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
Add multi-level function table (#2228)
* Add AddressTable<T>
* Use AddressTable<T> for dispatch
* Remove JumpTable & co.
* Add fallback for out of range addresses
* Add PPTC support
* Add documentation to `AddressTable<T>`
* Make AddressTable<T> configurable
* Fix table walk
* Fix IsMapped check
* Remove CountTableCapacity
* Add PPTC support for fast path
* Rename IsMapped to IsValid
* Remove stale comment
* Change format of address in exception message
* Add TranslatorStubs
* Split DispatchStub
Avoids recompilation of stubs during tests.
* Add hint for 64bit or 32bit
* Add documentation to `Symbol`
* Add documentation to `TranslatorStubs`
Make `TranslatorStubs` disposable as well.
* Add documentation to `SymbolType`
* Add `AddressTableEventSource` to monitor function table size
Add an EventSource which measures the amount of unmanaged bytes
allocated by AddressTable<T> instances.
dotnet-counters monitor -n Ryujinx --counters ARMeilleure
* Add `AllowLcqInFunctionTable` optimization toggle
This is to reduce the impact this change has on the test duration.
Before everytime a test was ran, the FunctionTable would be initialized
and populated so that the newly compiled test would get registered to
it.
* Implement unmanaged dispatcher
Uses the DispatchStub to dispatch into the next translation, which
allows execution to stay in unmanaged for longer and skips a
ConcurrentDictionary look up when the target translation has been
registered to the FunctionTable.
* Remove redundant null check
* Tune levels of FunctionTable
Uses 5 levels instead of 4 and change unit of AddressTableEventSource
from KB to MB.
* Use 64-bit function table
Improves codegen for direct branches:
mov qword [rax+0x408],0x10603560
- mov rcx,sub_10603560_OFFSET
- mov ecx,[rcx]
- mov ecx,ecx
- mov rdx,JIT_CACHE_BASE
- add rdx,rcx
+ mov rcx,sub_10603560
+ mov rdx,[rcx]
mov rcx,rax
Improves codegen for dispatch stub:
and rax,byte +0x1f
- mov eax,[rcx+rax*4]
- mov eax,eax
- mov rcx,JIT_CACHE_BASE
- lea rax,[rcx+rax]
+ mov rax,[rcx+rax*8]
mov rcx,rbx
* Remove `JitCacheSymbol` & `JitCache.Offset`
* Turn `Translator.Translate` into an instance method
We do not have to add more parameter to this method and related ones as
new structures are added & needed for translation.
* Add symbol only when PTC is enabled
Address LDj3SNuD's feedback
* Change `NativeContext.Running` to a 32-bit integer
* Fix PageTable symbol for host mapped
2021-05-30 01:06:28 +04:00
|
|
|
int index = (int)symbol.Value;
|
|
|
|
|
|
|
|
if (Delegates.TryGetDelegateFuncPtrByIndex(index, out IntPtr funcPtr))
|
|
|
|
{
|
|
|
|
imm = funcPtr;
|
|
|
|
}
|
2020-06-16 20:28:02 +02:00
|
|
|
}
|
Add multi-level function table (#2228)
* Add AddressTable<T>
* Use AddressTable<T> for dispatch
* Remove JumpTable & co.
* Add fallback for out of range addresses
* Add PPTC support
* Add documentation to `AddressTable<T>`
* Make AddressTable<T> configurable
* Fix table walk
* Fix IsMapped check
* Remove CountTableCapacity
* Add PPTC support for fast path
* Rename IsMapped to IsValid
* Remove stale comment
* Change format of address in exception message
* Add TranslatorStubs
* Split DispatchStub
Avoids recompilation of stubs during tests.
* Add hint for 64bit or 32bit
* Add documentation to `Symbol`
* Add documentation to `TranslatorStubs`
Make `TranslatorStubs` disposable as well.
* Add documentation to `SymbolType`
* Add `AddressTableEventSource` to monitor function table size
Add an EventSource which measures the amount of unmanaged bytes
allocated by AddressTable<T> instances.
dotnet-counters monitor -n Ryujinx --counters ARMeilleure
* Add `AllowLcqInFunctionTable` optimization toggle
This is to reduce the impact this change has on the test duration.
Before everytime a test was ran, the FunctionTable would be initialized
and populated so that the newly compiled test would get registered to
it.
* Implement unmanaged dispatcher
Uses the DispatchStub to dispatch into the next translation, which
allows execution to stay in unmanaged for longer and skips a
ConcurrentDictionary look up when the target translation has been
registered to the FunctionTable.
* Remove redundant null check
* Tune levels of FunctionTable
Uses 5 levels instead of 4 and change unit of AddressTableEventSource
from KB to MB.
* Use 64-bit function table
Improves codegen for direct branches:
mov qword [rax+0x408],0x10603560
- mov rcx,sub_10603560_OFFSET
- mov ecx,[rcx]
- mov ecx,ecx
- mov rdx,JIT_CACHE_BASE
- add rdx,rcx
+ mov rcx,sub_10603560
+ mov rdx,[rcx]
mov rcx,rax
Improves codegen for dispatch stub:
and rax,byte +0x1f
- mov eax,[rcx+rax*4]
- mov eax,eax
- mov rcx,JIT_CACHE_BASE
- lea rax,[rcx+rax]
+ mov rax,[rcx+rax*8]
mov rcx,rbx
* Remove `JitCacheSymbol` & `JitCache.Offset`
* Turn `Translator.Translate` into an instance method
We do not have to add more parameter to this method and related ones as
new structures are added & needed for translation.
* Add symbol only when PTC is enabled
Address LDj3SNuD's feedback
* Change `NativeContext.Running` to a 32-bit integer
* Fix PageTable symbol for host mapped
2021-05-30 01:06:28 +04:00
|
|
|
else if (symbol == PageTableSymbol)
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
Add multi-level function table (#2228)
* Add AddressTable<T>
* Use AddressTable<T> for dispatch
* Remove JumpTable & co.
* Add fallback for out of range addresses
* Add PPTC support
* Add documentation to `AddressTable<T>`
* Make AddressTable<T> configurable
* Fix table walk
* Fix IsMapped check
* Remove CountTableCapacity
* Add PPTC support for fast path
* Rename IsMapped to IsValid
* Remove stale comment
* Change format of address in exception message
* Add TranslatorStubs
* Split DispatchStub
Avoids recompilation of stubs during tests.
* Add hint for 64bit or 32bit
* Add documentation to `Symbol`
* Add documentation to `TranslatorStubs`
Make `TranslatorStubs` disposable as well.
* Add documentation to `SymbolType`
* Add `AddressTableEventSource` to monitor function table size
Add an EventSource which measures the amount of unmanaged bytes
allocated by AddressTable<T> instances.
dotnet-counters monitor -n Ryujinx --counters ARMeilleure
* Add `AllowLcqInFunctionTable` optimization toggle
This is to reduce the impact this change has on the test duration.
Before everytime a test was ran, the FunctionTable would be initialized
and populated so that the newly compiled test would get registered to
it.
* Implement unmanaged dispatcher
Uses the DispatchStub to dispatch into the next translation, which
allows execution to stay in unmanaged for longer and skips a
ConcurrentDictionary look up when the target translation has been
registered to the FunctionTable.
* Remove redundant null check
* Tune levels of FunctionTable
Uses 5 levels instead of 4 and change unit of AddressTableEventSource
from KB to MB.
* Use 64-bit function table
Improves codegen for direct branches:
mov qword [rax+0x408],0x10603560
- mov rcx,sub_10603560_OFFSET
- mov ecx,[rcx]
- mov ecx,ecx
- mov rdx,JIT_CACHE_BASE
- add rdx,rcx
+ mov rcx,sub_10603560
+ mov rdx,[rcx]
mov rcx,rax
Improves codegen for dispatch stub:
and rax,byte +0x1f
- mov eax,[rcx+rax*4]
- mov eax,eax
- mov rcx,JIT_CACHE_BASE
- lea rax,[rcx+rax]
+ mov rax,[rcx+rax*8]
mov rcx,rbx
* Remove `JitCacheSymbol` & `JitCache.Offset`
* Turn `Translator.Translate` into an instance method
We do not have to add more parameter to this method and related ones as
new structures are added & needed for translation.
* Add symbol only when PTC is enabled
Address LDj3SNuD's feedback
* Change `NativeContext.Running` to a 32-bit integer
* Fix PageTable symbol for host mapped
2021-05-30 01:06:28 +04:00
|
|
|
imm = translator.Memory.PageTablePointer;
|
2020-06-16 20:28:02 +02:00
|
|
|
}
|
Add multi-level function table (#2228)
* Add AddressTable<T>
* Use AddressTable<T> for dispatch
* Remove JumpTable & co.
* Add fallback for out of range addresses
* Add PPTC support
* Add documentation to `AddressTable<T>`
* Make AddressTable<T> configurable
* Fix table walk
* Fix IsMapped check
* Remove CountTableCapacity
* Add PPTC support for fast path
* Rename IsMapped to IsValid
* Remove stale comment
* Change format of address in exception message
* Add TranslatorStubs
* Split DispatchStub
Avoids recompilation of stubs during tests.
* Add hint for 64bit or 32bit
* Add documentation to `Symbol`
* Add documentation to `TranslatorStubs`
Make `TranslatorStubs` disposable as well.
* Add documentation to `SymbolType`
* Add `AddressTableEventSource` to monitor function table size
Add an EventSource which measures the amount of unmanaged bytes
allocated by AddressTable<T> instances.
dotnet-counters monitor -n Ryujinx --counters ARMeilleure
* Add `AllowLcqInFunctionTable` optimization toggle
This is to reduce the impact this change has on the test duration.
Before everytime a test was ran, the FunctionTable would be initialized
and populated so that the newly compiled test would get registered to
it.
* Implement unmanaged dispatcher
Uses the DispatchStub to dispatch into the next translation, which
allows execution to stay in unmanaged for longer and skips a
ConcurrentDictionary look up when the target translation has been
registered to the FunctionTable.
* Remove redundant null check
* Tune levels of FunctionTable
Uses 5 levels instead of 4 and change unit of AddressTableEventSource
from KB to MB.
* Use 64-bit function table
Improves codegen for direct branches:
mov qword [rax+0x408],0x10603560
- mov rcx,sub_10603560_OFFSET
- mov ecx,[rcx]
- mov ecx,ecx
- mov rdx,JIT_CACHE_BASE
- add rdx,rcx
+ mov rcx,sub_10603560
+ mov rdx,[rcx]
mov rcx,rax
Improves codegen for dispatch stub:
and rax,byte +0x1f
- mov eax,[rcx+rax*4]
- mov eax,eax
- mov rcx,JIT_CACHE_BASE
- lea rax,[rcx+rax]
+ mov rax,[rcx+rax*8]
mov rcx,rbx
* Remove `JitCacheSymbol` & `JitCache.Offset`
* Turn `Translator.Translate` into an instance method
We do not have to add more parameter to this method and related ones as
new structures are added & needed for translation.
* Add symbol only when PTC is enabled
Address LDj3SNuD's feedback
* Change `NativeContext.Running` to a 32-bit integer
* Fix PageTable symbol for host mapped
2021-05-30 01:06:28 +04:00
|
|
|
else if (symbol == CountTableSymbol)
|
2021-04-19 01:43:53 +04:00
|
|
|
{
|
2021-09-29 02:28:34 +04:00
|
|
|
if (callCounter == null)
|
|
|
|
{
|
|
|
|
callCounter = new Counter<uint>(translator.CountTable);
|
|
|
|
}
|
2021-04-19 01:43:53 +04:00
|
|
|
|
Add multi-level function table (#2228)
* Add AddressTable<T>
* Use AddressTable<T> for dispatch
* Remove JumpTable & co.
* Add fallback for out of range addresses
* Add PPTC support
* Add documentation to `AddressTable<T>`
* Make AddressTable<T> configurable
* Fix table walk
* Fix IsMapped check
* Remove CountTableCapacity
* Add PPTC support for fast path
* Rename IsMapped to IsValid
* Remove stale comment
* Change format of address in exception message
* Add TranslatorStubs
* Split DispatchStub
Avoids recompilation of stubs during tests.
* Add hint for 64bit or 32bit
* Add documentation to `Symbol`
* Add documentation to `TranslatorStubs`
Make `TranslatorStubs` disposable as well.
* Add documentation to `SymbolType`
* Add `AddressTableEventSource` to monitor function table size
Add an EventSource which measures the amount of unmanaged bytes
allocated by AddressTable<T> instances.
dotnet-counters monitor -n Ryujinx --counters ARMeilleure
* Add `AllowLcqInFunctionTable` optimization toggle
This is to reduce the impact this change has on the test duration.
Before everytime a test was ran, the FunctionTable would be initialized
and populated so that the newly compiled test would get registered to
it.
* Implement unmanaged dispatcher
Uses the DispatchStub to dispatch into the next translation, which
allows execution to stay in unmanaged for longer and skips a
ConcurrentDictionary look up when the target translation has been
registered to the FunctionTable.
* Remove redundant null check
* Tune levels of FunctionTable
Uses 5 levels instead of 4 and change unit of AddressTableEventSource
from KB to MB.
* Use 64-bit function table
Improves codegen for direct branches:
mov qword [rax+0x408],0x10603560
- mov rcx,sub_10603560_OFFSET
- mov ecx,[rcx]
- mov ecx,ecx
- mov rdx,JIT_CACHE_BASE
- add rdx,rcx
+ mov rcx,sub_10603560
+ mov rdx,[rcx]
mov rcx,rax
Improves codegen for dispatch stub:
and rax,byte +0x1f
- mov eax,[rcx+rax*4]
- mov eax,eax
- mov rcx,JIT_CACHE_BASE
- lea rax,[rcx+rax]
+ mov rax,[rcx+rax*8]
mov rcx,rbx
* Remove `JitCacheSymbol` & `JitCache.Offset`
* Turn `Translator.Translate` into an instance method
We do not have to add more parameter to this method and related ones as
new structures are added & needed for translation.
* Add symbol only when PTC is enabled
Address LDj3SNuD's feedback
* Change `NativeContext.Running` to a 32-bit integer
* Fix PageTable symbol for host mapped
2021-05-30 01:06:28 +04:00
|
|
|
unsafe { imm = (IntPtr)Unsafe.AsPointer(ref callCounter.Value); }
|
2021-04-19 01:43:53 +04:00
|
|
|
}
|
Add multi-level function table (#2228)
* Add AddressTable<T>
* Use AddressTable<T> for dispatch
* Remove JumpTable & co.
* Add fallback for out of range addresses
* Add PPTC support
* Add documentation to `AddressTable<T>`
* Make AddressTable<T> configurable
* Fix table walk
* Fix IsMapped check
* Remove CountTableCapacity
* Add PPTC support for fast path
* Rename IsMapped to IsValid
* Remove stale comment
* Change format of address in exception message
* Add TranslatorStubs
* Split DispatchStub
Avoids recompilation of stubs during tests.
* Add hint for 64bit or 32bit
* Add documentation to `Symbol`
* Add documentation to `TranslatorStubs`
Make `TranslatorStubs` disposable as well.
* Add documentation to `SymbolType`
* Add `AddressTableEventSource` to monitor function table size
Add an EventSource which measures the amount of unmanaged bytes
allocated by AddressTable<T> instances.
dotnet-counters monitor -n Ryujinx --counters ARMeilleure
* Add `AllowLcqInFunctionTable` optimization toggle
This is to reduce the impact this change has on the test duration.
Before everytime a test was ran, the FunctionTable would be initialized
and populated so that the newly compiled test would get registered to
it.
* Implement unmanaged dispatcher
Uses the DispatchStub to dispatch into the next translation, which
allows execution to stay in unmanaged for longer and skips a
ConcurrentDictionary look up when the target translation has been
registered to the FunctionTable.
* Remove redundant null check
* Tune levels of FunctionTable
Uses 5 levels instead of 4 and change unit of AddressTableEventSource
from KB to MB.
* Use 64-bit function table
Improves codegen for direct branches:
mov qword [rax+0x408],0x10603560
- mov rcx,sub_10603560_OFFSET
- mov ecx,[rcx]
- mov ecx,ecx
- mov rdx,JIT_CACHE_BASE
- add rdx,rcx
+ mov rcx,sub_10603560
+ mov rdx,[rcx]
mov rcx,rax
Improves codegen for dispatch stub:
and rax,byte +0x1f
- mov eax,[rcx+rax*4]
- mov eax,eax
- mov rcx,JIT_CACHE_BASE
- lea rax,[rcx+rax]
+ mov rax,[rcx+rax*8]
mov rcx,rbx
* Remove `JitCacheSymbol` & `JitCache.Offset`
* Turn `Translator.Translate` into an instance method
We do not have to add more parameter to this method and related ones as
new structures are added & needed for translation.
* Add symbol only when PTC is enabled
Address LDj3SNuD's feedback
* Change `NativeContext.Running` to a 32-bit integer
* Fix PageTable symbol for host mapped
2021-05-30 01:06:28 +04:00
|
|
|
else if (symbol == DispatchStubSymbol)
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
Add multi-level function table (#2228)
* Add AddressTable<T>
* Use AddressTable<T> for dispatch
* Remove JumpTable & co.
* Add fallback for out of range addresses
* Add PPTC support
* Add documentation to `AddressTable<T>`
* Make AddressTable<T> configurable
* Fix table walk
* Fix IsMapped check
* Remove CountTableCapacity
* Add PPTC support for fast path
* Rename IsMapped to IsValid
* Remove stale comment
* Change format of address in exception message
* Add TranslatorStubs
* Split DispatchStub
Avoids recompilation of stubs during tests.
* Add hint for 64bit or 32bit
* Add documentation to `Symbol`
* Add documentation to `TranslatorStubs`
Make `TranslatorStubs` disposable as well.
* Add documentation to `SymbolType`
* Add `AddressTableEventSource` to monitor function table size
Add an EventSource which measures the amount of unmanaged bytes
allocated by AddressTable<T> instances.
dotnet-counters monitor -n Ryujinx --counters ARMeilleure
* Add `AllowLcqInFunctionTable` optimization toggle
This is to reduce the impact this change has on the test duration.
Before everytime a test was ran, the FunctionTable would be initialized
and populated so that the newly compiled test would get registered to
it.
* Implement unmanaged dispatcher
Uses the DispatchStub to dispatch into the next translation, which
allows execution to stay in unmanaged for longer and skips a
ConcurrentDictionary look up when the target translation has been
registered to the FunctionTable.
* Remove redundant null check
* Tune levels of FunctionTable
Uses 5 levels instead of 4 and change unit of AddressTableEventSource
from KB to MB.
* Use 64-bit function table
Improves codegen for direct branches:
mov qword [rax+0x408],0x10603560
- mov rcx,sub_10603560_OFFSET
- mov ecx,[rcx]
- mov ecx,ecx
- mov rdx,JIT_CACHE_BASE
- add rdx,rcx
+ mov rcx,sub_10603560
+ mov rdx,[rcx]
mov rcx,rax
Improves codegen for dispatch stub:
and rax,byte +0x1f
- mov eax,[rcx+rax*4]
- mov eax,eax
- mov rcx,JIT_CACHE_BASE
- lea rax,[rcx+rax]
+ mov rax,[rcx+rax*8]
mov rcx,rbx
* Remove `JitCacheSymbol` & `JitCache.Offset`
* Turn `Translator.Translate` into an instance method
We do not have to add more parameter to this method and related ones as
new structures are added & needed for translation.
* Add symbol only when PTC is enabled
Address LDj3SNuD's feedback
* Change `NativeContext.Running` to a 32-bit integer
* Fix PageTable symbol for host mapped
2021-05-30 01:06:28 +04:00
|
|
|
imm = translator.Stubs.DispatchStub;
|
2020-06-16 20:28:02 +02:00
|
|
|
}
|
Add multi-level function table (#2228)
* Add AddressTable<T>
* Use AddressTable<T> for dispatch
* Remove JumpTable & co.
* Add fallback for out of range addresses
* Add PPTC support
* Add documentation to `AddressTable<T>`
* Make AddressTable<T> configurable
* Fix table walk
* Fix IsMapped check
* Remove CountTableCapacity
* Add PPTC support for fast path
* Rename IsMapped to IsValid
* Remove stale comment
* Change format of address in exception message
* Add TranslatorStubs
* Split DispatchStub
Avoids recompilation of stubs during tests.
* Add hint for 64bit or 32bit
* Add documentation to `Symbol`
* Add documentation to `TranslatorStubs`
Make `TranslatorStubs` disposable as well.
* Add documentation to `SymbolType`
* Add `AddressTableEventSource` to monitor function table size
Add an EventSource which measures the amount of unmanaged bytes
allocated by AddressTable<T> instances.
dotnet-counters monitor -n Ryujinx --counters ARMeilleure
* Add `AllowLcqInFunctionTable` optimization toggle
This is to reduce the impact this change has on the test duration.
Before everytime a test was ran, the FunctionTable would be initialized
and populated so that the newly compiled test would get registered to
it.
* Implement unmanaged dispatcher
Uses the DispatchStub to dispatch into the next translation, which
allows execution to stay in unmanaged for longer and skips a
ConcurrentDictionary look up when the target translation has been
registered to the FunctionTable.
* Remove redundant null check
* Tune levels of FunctionTable
Uses 5 levels instead of 4 and change unit of AddressTableEventSource
from KB to MB.
* Use 64-bit function table
Improves codegen for direct branches:
mov qword [rax+0x408],0x10603560
- mov rcx,sub_10603560_OFFSET
- mov ecx,[rcx]
- mov ecx,ecx
- mov rdx,JIT_CACHE_BASE
- add rdx,rcx
+ mov rcx,sub_10603560
+ mov rdx,[rcx]
mov rcx,rax
Improves codegen for dispatch stub:
and rax,byte +0x1f
- mov eax,[rcx+rax*4]
- mov eax,eax
- mov rcx,JIT_CACHE_BASE
- lea rax,[rcx+rax]
+ mov rax,[rcx+rax*8]
mov rcx,rbx
* Remove `JitCacheSymbol` & `JitCache.Offset`
* Turn `Translator.Translate` into an instance method
We do not have to add more parameter to this method and related ones as
new structures are added & needed for translation.
* Add symbol only when PTC is enabled
Address LDj3SNuD's feedback
* Change `NativeContext.Running` to a 32-bit integer
* Fix PageTable symbol for host mapped
2021-05-30 01:06:28 +04:00
|
|
|
|
|
|
|
if (imm == null)
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
|
|
|
throw new Exception($"Unexpected reloc entry {relocEntry}.");
|
|
|
|
}
|
|
|
|
|
Add multi-level function table (#2228)
* Add AddressTable<T>
* Use AddressTable<T> for dispatch
* Remove JumpTable & co.
* Add fallback for out of range addresses
* Add PPTC support
* Add documentation to `AddressTable<T>`
* Make AddressTable<T> configurable
* Fix table walk
* Fix IsMapped check
* Remove CountTableCapacity
* Add PPTC support for fast path
* Rename IsMapped to IsValid
* Remove stale comment
* Change format of address in exception message
* Add TranslatorStubs
* Split DispatchStub
Avoids recompilation of stubs during tests.
* Add hint for 64bit or 32bit
* Add documentation to `Symbol`
* Add documentation to `TranslatorStubs`
Make `TranslatorStubs` disposable as well.
* Add documentation to `SymbolType`
* Add `AddressTableEventSource` to monitor function table size
Add an EventSource which measures the amount of unmanaged bytes
allocated by AddressTable<T> instances.
dotnet-counters monitor -n Ryujinx --counters ARMeilleure
* Add `AllowLcqInFunctionTable` optimization toggle
This is to reduce the impact this change has on the test duration.
Before everytime a test was ran, the FunctionTable would be initialized
and populated so that the newly compiled test would get registered to
it.
* Implement unmanaged dispatcher
Uses the DispatchStub to dispatch into the next translation, which
allows execution to stay in unmanaged for longer and skips a
ConcurrentDictionary look up when the target translation has been
registered to the FunctionTable.
* Remove redundant null check
* Tune levels of FunctionTable
Uses 5 levels instead of 4 and change unit of AddressTableEventSource
from KB to MB.
* Use 64-bit function table
Improves codegen for direct branches:
mov qword [rax+0x408],0x10603560
- mov rcx,sub_10603560_OFFSET
- mov ecx,[rcx]
- mov ecx,ecx
- mov rdx,JIT_CACHE_BASE
- add rdx,rcx
+ mov rcx,sub_10603560
+ mov rdx,[rcx]
mov rcx,rax
Improves codegen for dispatch stub:
and rax,byte +0x1f
- mov eax,[rcx+rax*4]
- mov eax,eax
- mov rcx,JIT_CACHE_BASE
- lea rax,[rcx+rax]
+ mov rax,[rcx+rax*8]
mov rcx,rbx
* Remove `JitCacheSymbol` & `JitCache.Offset`
* Turn `Translator.Translate` into an instance method
We do not have to add more parameter to this method and related ones as
new structures are added & needed for translation.
* Add symbol only when PTC is enabled
Address LDj3SNuD's feedback
* Change `NativeContext.Running` to a 32-bit integer
* Fix PageTable symbol for host mapped
2021-05-30 01:06:28 +04:00
|
|
|
BinaryPrimitives.WriteUInt64LittleEndian(code.Slice(relocEntry.Position, 8), (ulong)imm.Value);
|
2020-06-16 20:28:02 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
private UnwindInfo ReadUnwindInfo(BinaryReader unwindInfosReader)
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
|
|
|
int pushEntriesLength = unwindInfosReader.ReadInt32();
|
|
|
|
|
|
|
|
UnwindPushEntry[] pushEntries = new UnwindPushEntry[pushEntriesLength];
|
|
|
|
|
|
|
|
for (int i = 0; i < pushEntriesLength; i++)
|
|
|
|
{
|
|
|
|
int pseudoOp = unwindInfosReader.ReadInt32();
|
|
|
|
int prologOffset = unwindInfosReader.ReadInt32();
|
|
|
|
int regIndex = unwindInfosReader.ReadInt32();
|
|
|
|
int stackOffsetOrAllocSize = unwindInfosReader.ReadInt32();
|
|
|
|
|
|
|
|
pushEntries[i] = new UnwindPushEntry((UnwindPseudoOp)pseudoOp, prologOffset, regIndex, stackOffsetOrAllocSize);
|
|
|
|
}
|
|
|
|
|
|
|
|
int prologueSize = unwindInfosReader.ReadInt32();
|
|
|
|
|
|
|
|
return new UnwindInfo(pushEntries, prologueSize);
|
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
private TranslatedFunction FastTranslate(
|
2021-04-19 01:43:53 +04:00
|
|
|
byte[] code,
|
|
|
|
Counter<uint> callCounter,
|
|
|
|
ulong guestSize,
|
|
|
|
UnwindInfo unwindInfo,
|
|
|
|
bool highCq)
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
2021-09-14 03:23:37 +04:00
|
|
|
var cFunc = new CompiledFunction(code, unwindInfo, RelocInfo.Empty);
|
2023-01-23 19:37:53 -03:00
|
|
|
var gFunc = cFunc.MapWithPointer<GuestFunction>(out IntPtr gFuncPointer);
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2023-01-23 19:37:53 -03:00
|
|
|
return new TranslatedFunction(gFunc, gFuncPointer, callCounter, guestSize, highCq);
|
2020-06-16 20:28:02 +02:00
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
private void UpdateInfo(InfoEntry infoEntry)
|
2020-12-17 20:32:09 +01:00
|
|
|
{
|
2021-05-13 20:05:15 +02:00
|
|
|
_infosStream.Seek(-Unsafe.SizeOf<InfoEntry>(), SeekOrigin.Current);
|
|
|
|
|
|
|
|
SerializeStructure(_infosStream, infoEntry);
|
2020-12-17 20:32:09 +01:00
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
private void StubCode(int index)
|
2020-12-17 20:32:09 +01:00
|
|
|
{
|
2021-04-13 03:24:36 +02:00
|
|
|
_codesList[index] = Array.Empty<byte>();
|
2020-12-17 20:32:09 +01:00
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
private void StubReloc(int relocEntriesCount)
|
2020-12-17 20:32:09 +01:00
|
|
|
{
|
|
|
|
for (int i = 0; i < relocEntriesCount * RelocEntry.Stride; i++)
|
|
|
|
{
|
|
|
|
_relocsStream.WriteByte(FillingByte);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
private void StubUnwindInfo(BinaryReader unwindInfosReader)
|
2020-12-17 20:32:09 +01:00
|
|
|
{
|
|
|
|
int pushEntriesLength = unwindInfosReader.ReadInt32();
|
|
|
|
|
|
|
|
for (int i = 0; i < pushEntriesLength * UnwindPushEntry.Stride + UnwindInfo.Stride; i++)
|
|
|
|
{
|
|
|
|
_unwindInfosStream.WriteByte(FillingByte);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
public void MakeAndSaveTranslations(Translator translator)
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
2023-01-04 20:01:44 -03:00
|
|
|
var profiledFuncsToTranslate = Profiler.GetProfiledFuncsToTranslate(translator.Functions);
|
2020-12-17 20:32:09 +01:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
_translateCount = 0;
|
|
|
|
_translateTotalCount = profiledFuncsToTranslate.Count;
|
|
|
|
|
Reduce JIT GC allocations (#2515)
* Turn `MemoryOperand` into a struct
* Remove `IntrinsicOperation`
* Remove `PhiNode`
* Remove `Node`
* Turn `Operand` into a struct
* Turn `Operation` into a struct
* Clean up pool management methods
* Add `Arena` allocator
* Move `OperationHelper` to `Operation.Factory`
* Move `OperandHelper` to `Operand.Factory`
* Optimize `Operation` a bit
* Fix `Arena` initialization
* Rename `NativeList<T>` to `ArenaList<T>`
* Reduce `Operand` size from 88 to 56 bytes
* Reduce `Operation` size from 56 to 40 bytes
* Add optimistic interning of Register & Constant operands
* Optimize `RegisterUsage` pass a bit
* Optimize `RemoveUnusedNodes` pass a bit
Iterating in reverse-order allows killing dependency chains in a single
pass.
* Fix PPTC symbols
* Optimize `BasicBlock` a bit
Reduce allocations from `_successor` & `DominanceFrontiers`
* Fix `Operation` resize
* Make `Arena` expandable
Change the arena allocator to be expandable by allocating in pages, with
some of them being pooled. Currently 32 pages are pooled. An LRU removal
mechanism should probably be added to it.
Apparently MHR can allocate bitmaps large enough to exceed the 16MB
limit for the type.
* Move `Arena` & `ArenaList` to `Common`
* Remove `ThreadStaticPool` & co
* Add `PhiOperation`
* Reduce `Operand` size from 56 from 48 bytes
* Add linear-probing to `Operand` intern table
* Optimize `HybridAllocator` a bit
* Add `Allocators` class
* Tune `ArenaAllocator` sizes
* Add page removal mechanism to `ArenaAllocator`
Remove pages which have not been used for more than 5s after each reset.
I am on fence if this would be better using a Gen2 callback object like
the one in System.Buffers.ArrayPool<T>, to trim the pool. Because right
now if a large translation happens, the pages will be freed only after a
reset. This reset may not happen for a while because no new translation
is hit, but the arena base sizes are rather small.
* Fix `OOM` when allocating larger than page size in `ArenaAllocator`
Tweak resizing mechanism for Operand.Uses and Assignemnts.
* Optimize `Optimizer` a bit
* Optimize `Operand.Add<T>/Remove<T>` a bit
* Clean up `PreAllocator`
* Fix phi insertion order
Reduce codegen diffs.
* Fix code alignment
* Use new heuristics for degree of parallelism
* Suppress warnings
* Address gdkchan's feedback
Renamed `GetValue()` to `GetValueUnsafe()` to make it more clear that
`Operand.Value` should usually not be modified directly.
* Add fast path to `ArenaAllocator`
* Assembly for `ArenaAllocator.Allocate(ulong)`:
.L0:
mov rax, [rcx+0x18]
lea r8, [rax+rdx]
cmp r8, [rcx+0x10]
ja short .L2
.L1:
mov rdx, [rcx+8]
add rax, [rdx+8]
mov [rcx+0x18], r8
ret
.L2:
jmp ArenaAllocator.AllocateSlow(UInt64)
A few variable/field had to be changed to ulong so that RyuJIT avoids
emitting zero-extends.
* Implement a new heuristic to free pooled pages.
If an arena is used often, it is more likely that its pages will be
needed, so the pages are kept for longer (e.g: during PPTC rebuild or
burst sof compilations). If is not used often, then it is more likely
that its pages will not be needed (e.g: after PPTC rebuild or bursts
of compilations).
* Address riperiperi's feedback
* Use `EqualityComparer<T>` in `IntrusiveList<T>`
Avoids a potential GC hole in `Equals(T, T)`.
2021-08-17 22:08:34 +04:00
|
|
|
if (_translateTotalCount == 0)
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
2021-04-13 03:24:36 +02:00
|
|
|
ResetCarriersIfNeeded();
|
2021-02-22 03:23:48 +01:00
|
|
|
|
|
|
|
GCSettings.LargeObjectHeapCompactionMode = GCLargeObjectHeapCompactionMode.CompactOnce;
|
|
|
|
|
2020-06-16 20:28:02 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
Reduce JIT GC allocations (#2515)
* Turn `MemoryOperand` into a struct
* Remove `IntrinsicOperation`
* Remove `PhiNode`
* Remove `Node`
* Turn `Operand` into a struct
* Turn `Operation` into a struct
* Clean up pool management methods
* Add `Arena` allocator
* Move `OperationHelper` to `Operation.Factory`
* Move `OperandHelper` to `Operand.Factory`
* Optimize `Operation` a bit
* Fix `Arena` initialization
* Rename `NativeList<T>` to `ArenaList<T>`
* Reduce `Operand` size from 88 to 56 bytes
* Reduce `Operation` size from 56 to 40 bytes
* Add optimistic interning of Register & Constant operands
* Optimize `RegisterUsage` pass a bit
* Optimize `RemoveUnusedNodes` pass a bit
Iterating in reverse-order allows killing dependency chains in a single
pass.
* Fix PPTC symbols
* Optimize `BasicBlock` a bit
Reduce allocations from `_successor` & `DominanceFrontiers`
* Fix `Operation` resize
* Make `Arena` expandable
Change the arena allocator to be expandable by allocating in pages, with
some of them being pooled. Currently 32 pages are pooled. An LRU removal
mechanism should probably be added to it.
Apparently MHR can allocate bitmaps large enough to exceed the 16MB
limit for the type.
* Move `Arena` & `ArenaList` to `Common`
* Remove `ThreadStaticPool` & co
* Add `PhiOperation`
* Reduce `Operand` size from 56 from 48 bytes
* Add linear-probing to `Operand` intern table
* Optimize `HybridAllocator` a bit
* Add `Allocators` class
* Tune `ArenaAllocator` sizes
* Add page removal mechanism to `ArenaAllocator`
Remove pages which have not been used for more than 5s after each reset.
I am on fence if this would be better using a Gen2 callback object like
the one in System.Buffers.ArrayPool<T>, to trim the pool. Because right
now if a large translation happens, the pages will be freed only after a
reset. This reset may not happen for a while because no new translation
is hit, but the arena base sizes are rather small.
* Fix `OOM` when allocating larger than page size in `ArenaAllocator`
Tweak resizing mechanism for Operand.Uses and Assignemnts.
* Optimize `Optimizer` a bit
* Optimize `Operand.Add<T>/Remove<T>` a bit
* Clean up `PreAllocator`
* Fix phi insertion order
Reduce codegen diffs.
* Fix code alignment
* Use new heuristics for degree of parallelism
* Suppress warnings
* Address gdkchan's feedback
Renamed `GetValue()` to `GetValueUnsafe()` to make it more clear that
`Operand.Value` should usually not be modified directly.
* Add fast path to `ArenaAllocator`
* Assembly for `ArenaAllocator.Allocate(ulong)`:
.L0:
mov rax, [rcx+0x18]
lea r8, [rax+rdx]
cmp r8, [rcx+0x10]
ja short .L2
.L1:
mov rdx, [rcx+8]
add rax, [rdx+8]
mov [rcx+0x18], r8
ret
.L2:
jmp ArenaAllocator.AllocateSlow(UInt64)
A few variable/field had to be changed to ulong so that RyuJIT avoids
emitting zero-extends.
* Implement a new heuristic to free pooled pages.
If an arena is used often, it is more likely that its pages will be
needed, so the pages are kept for longer (e.g: during PPTC rebuild or
burst sof compilations). If is not used often, then it is more likely
that its pages will not be needed (e.g: after PPTC rebuild or bursts
of compilations).
* Address riperiperi's feedback
* Use `EqualityComparer<T>` in `IntrusiveList<T>`
Avoids a potential GC hole in `Equals(T, T)`.
2021-08-17 22:08:34 +04:00
|
|
|
int degreeOfParallelism = Environment.ProcessorCount;
|
|
|
|
|
|
|
|
// If there are enough cores lying around, we leave one alone for other tasks.
|
|
|
|
if (degreeOfParallelism > 4)
|
|
|
|
{
|
|
|
|
degreeOfParallelism--;
|
|
|
|
}
|
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
Logger.Info?.Print(LogClass.Ptc, $"{_translateCount} of {_translateTotalCount} functions translated | Thread count: {degreeOfParallelism}");
|
2021-03-23 00:10:07 +05:30
|
|
|
|
|
|
|
PtcStateChanged?.Invoke(PtcLoadingState.Start, _translateCount, _translateTotalCount);
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-03-23 00:10:07 +05:30
|
|
|
using AutoResetEvent progressReportEvent = new AutoResetEvent(false);
|
|
|
|
|
|
|
|
Thread progressReportThread = new Thread(ReportProgress)
|
|
|
|
{
|
|
|
|
Name = "Ptc.ProgressReporter",
|
|
|
|
Priority = ThreadPriority.Lowest,
|
|
|
|
IsBackground = true
|
|
|
|
};
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-03-23 00:10:07 +05:30
|
|
|
progressReportThread.Start(progressReportEvent);
|
2021-03-03 06:09:36 +05:30
|
|
|
|
2020-12-24 03:58:36 +01:00
|
|
|
void TranslateFuncs()
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
2020-12-24 03:58:36 +01:00
|
|
|
while (profiledFuncsToTranslate.TryDequeue(out var item))
|
|
|
|
{
|
|
|
|
ulong address = item.address;
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
Debug.Assert(Profiler.IsAddressInStaticCodeRange(address));
|
2020-06-16 20:28:02 +02:00
|
|
|
|
Add multi-level function table (#2228)
* Add AddressTable<T>
* Use AddressTable<T> for dispatch
* Remove JumpTable & co.
* Add fallback for out of range addresses
* Add PPTC support
* Add documentation to `AddressTable<T>`
* Make AddressTable<T> configurable
* Fix table walk
* Fix IsMapped check
* Remove CountTableCapacity
* Add PPTC support for fast path
* Rename IsMapped to IsValid
* Remove stale comment
* Change format of address in exception message
* Add TranslatorStubs
* Split DispatchStub
Avoids recompilation of stubs during tests.
* Add hint for 64bit or 32bit
* Add documentation to `Symbol`
* Add documentation to `TranslatorStubs`
Make `TranslatorStubs` disposable as well.
* Add documentation to `SymbolType`
* Add `AddressTableEventSource` to monitor function table size
Add an EventSource which measures the amount of unmanaged bytes
allocated by AddressTable<T> instances.
dotnet-counters monitor -n Ryujinx --counters ARMeilleure
* Add `AllowLcqInFunctionTable` optimization toggle
This is to reduce the impact this change has on the test duration.
Before everytime a test was ran, the FunctionTable would be initialized
and populated so that the newly compiled test would get registered to
it.
* Implement unmanaged dispatcher
Uses the DispatchStub to dispatch into the next translation, which
allows execution to stay in unmanaged for longer and skips a
ConcurrentDictionary look up when the target translation has been
registered to the FunctionTable.
* Remove redundant null check
* Tune levels of FunctionTable
Uses 5 levels instead of 4 and change unit of AddressTableEventSource
from KB to MB.
* Use 64-bit function table
Improves codegen for direct branches:
mov qword [rax+0x408],0x10603560
- mov rcx,sub_10603560_OFFSET
- mov ecx,[rcx]
- mov ecx,ecx
- mov rdx,JIT_CACHE_BASE
- add rdx,rcx
+ mov rcx,sub_10603560
+ mov rdx,[rcx]
mov rcx,rax
Improves codegen for dispatch stub:
and rax,byte +0x1f
- mov eax,[rcx+rax*4]
- mov eax,eax
- mov rcx,JIT_CACHE_BASE
- lea rax,[rcx+rax]
+ mov rax,[rcx+rax*8]
mov rcx,rbx
* Remove `JitCacheSymbol` & `JitCache.Offset`
* Turn `Translator.Translate` into an instance method
We do not have to add more parameter to this method and related ones as
new structures are added & needed for translation.
* Add symbol only when PTC is enabled
Address LDj3SNuD's feedback
* Change `NativeContext.Running` to a 32-bit integer
* Fix PageTable symbol for host mapped
2021-05-30 01:06:28 +04:00
|
|
|
TranslatedFunction func = translator.Translate(address, item.funcProfile.Mode, item.funcProfile.HighCq);
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2022-02-17 22:53:18 -03:00
|
|
|
bool isAddressUnique = translator.Functions.TryAdd(address, func.GuestSize, func);
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2020-12-24 03:58:36 +01:00
|
|
|
Debug.Assert(isAddressUnique, $"The address 0x{address:X16} is not unique.");
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2020-12-24 03:58:36 +01:00
|
|
|
Interlocked.Increment(ref _translateCount);
|
|
|
|
|
Add multi-level function table (#2228)
* Add AddressTable<T>
* Use AddressTable<T> for dispatch
* Remove JumpTable & co.
* Add fallback for out of range addresses
* Add PPTC support
* Add documentation to `AddressTable<T>`
* Make AddressTable<T> configurable
* Fix table walk
* Fix IsMapped check
* Remove CountTableCapacity
* Add PPTC support for fast path
* Rename IsMapped to IsValid
* Remove stale comment
* Change format of address in exception message
* Add TranslatorStubs
* Split DispatchStub
Avoids recompilation of stubs during tests.
* Add hint for 64bit or 32bit
* Add documentation to `Symbol`
* Add documentation to `TranslatorStubs`
Make `TranslatorStubs` disposable as well.
* Add documentation to `SymbolType`
* Add `AddressTableEventSource` to monitor function table size
Add an EventSource which measures the amount of unmanaged bytes
allocated by AddressTable<T> instances.
dotnet-counters monitor -n Ryujinx --counters ARMeilleure
* Add `AllowLcqInFunctionTable` optimization toggle
This is to reduce the impact this change has on the test duration.
Before everytime a test was ran, the FunctionTable would be initialized
and populated so that the newly compiled test would get registered to
it.
* Implement unmanaged dispatcher
Uses the DispatchStub to dispatch into the next translation, which
allows execution to stay in unmanaged for longer and skips a
ConcurrentDictionary look up when the target translation has been
registered to the FunctionTable.
* Remove redundant null check
* Tune levels of FunctionTable
Uses 5 levels instead of 4 and change unit of AddressTableEventSource
from KB to MB.
* Use 64-bit function table
Improves codegen for direct branches:
mov qword [rax+0x408],0x10603560
- mov rcx,sub_10603560_OFFSET
- mov ecx,[rcx]
- mov ecx,ecx
- mov rdx,JIT_CACHE_BASE
- add rdx,rcx
+ mov rcx,sub_10603560
+ mov rdx,[rcx]
mov rcx,rax
Improves codegen for dispatch stub:
and rax,byte +0x1f
- mov eax,[rcx+rax*4]
- mov eax,eax
- mov rcx,JIT_CACHE_BASE
- lea rax,[rcx+rax]
+ mov rax,[rcx+rax*8]
mov rcx,rbx
* Remove `JitCacheSymbol` & `JitCache.Offset`
* Turn `Translator.Translate` into an instance method
We do not have to add more parameter to this method and related ones as
new structures are added & needed for translation.
* Add symbol only when PTC is enabled
Address LDj3SNuD's feedback
* Change `NativeContext.Running` to a 32-bit integer
* Fix PageTable symbol for host mapped
2021-05-30 01:06:28 +04:00
|
|
|
translator.RegisterFunction(address, func);
|
|
|
|
|
2020-12-24 03:58:36 +01:00
|
|
|
if (State != PtcState.Enabled)
|
|
|
|
{
|
|
|
|
break;
|
|
|
|
}
|
2020-06-16 20:28:02 +02:00
|
|
|
}
|
2020-12-24 03:58:36 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
List<Thread> threads = new List<Thread>();
|
2020-12-17 20:32:09 +01:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
for (int i = 0; i < degreeOfParallelism; i++)
|
2020-12-24 03:58:36 +01:00
|
|
|
{
|
|
|
|
Thread thread = new Thread(TranslateFuncs);
|
|
|
|
thread.IsBackground = true;
|
|
|
|
|
|
|
|
threads.Add(thread);
|
|
|
|
}
|
|
|
|
|
Reduce JIT GC allocations (#2515)
* Turn `MemoryOperand` into a struct
* Remove `IntrinsicOperation`
* Remove `PhiNode`
* Remove `Node`
* Turn `Operand` into a struct
* Turn `Operation` into a struct
* Clean up pool management methods
* Add `Arena` allocator
* Move `OperationHelper` to `Operation.Factory`
* Move `OperandHelper` to `Operand.Factory`
* Optimize `Operation` a bit
* Fix `Arena` initialization
* Rename `NativeList<T>` to `ArenaList<T>`
* Reduce `Operand` size from 88 to 56 bytes
* Reduce `Operation` size from 56 to 40 bytes
* Add optimistic interning of Register & Constant operands
* Optimize `RegisterUsage` pass a bit
* Optimize `RemoveUnusedNodes` pass a bit
Iterating in reverse-order allows killing dependency chains in a single
pass.
* Fix PPTC symbols
* Optimize `BasicBlock` a bit
Reduce allocations from `_successor` & `DominanceFrontiers`
* Fix `Operation` resize
* Make `Arena` expandable
Change the arena allocator to be expandable by allocating in pages, with
some of them being pooled. Currently 32 pages are pooled. An LRU removal
mechanism should probably be added to it.
Apparently MHR can allocate bitmaps large enough to exceed the 16MB
limit for the type.
* Move `Arena` & `ArenaList` to `Common`
* Remove `ThreadStaticPool` & co
* Add `PhiOperation`
* Reduce `Operand` size from 56 from 48 bytes
* Add linear-probing to `Operand` intern table
* Optimize `HybridAllocator` a bit
* Add `Allocators` class
* Tune `ArenaAllocator` sizes
* Add page removal mechanism to `ArenaAllocator`
Remove pages which have not been used for more than 5s after each reset.
I am on fence if this would be better using a Gen2 callback object like
the one in System.Buffers.ArrayPool<T>, to trim the pool. Because right
now if a large translation happens, the pages will be freed only after a
reset. This reset may not happen for a while because no new translation
is hit, but the arena base sizes are rather small.
* Fix `OOM` when allocating larger than page size in `ArenaAllocator`
Tweak resizing mechanism for Operand.Uses and Assignemnts.
* Optimize `Optimizer` a bit
* Optimize `Operand.Add<T>/Remove<T>` a bit
* Clean up `PreAllocator`
* Fix phi insertion order
Reduce codegen diffs.
* Fix code alignment
* Use new heuristics for degree of parallelism
* Suppress warnings
* Address gdkchan's feedback
Renamed `GetValue()` to `GetValueUnsafe()` to make it more clear that
`Operand.Value` should usually not be modified directly.
* Add fast path to `ArenaAllocator`
* Assembly for `ArenaAllocator.Allocate(ulong)`:
.L0:
mov rax, [rcx+0x18]
lea r8, [rax+rdx]
cmp r8, [rcx+0x10]
ja short .L2
.L1:
mov rdx, [rcx+8]
add rax, [rdx+8]
mov [rcx+0x18], r8
ret
.L2:
jmp ArenaAllocator.AllocateSlow(UInt64)
A few variable/field had to be changed to ulong so that RyuJIT avoids
emitting zero-extends.
* Implement a new heuristic to free pooled pages.
If an arena is used often, it is more likely that its pages will be
needed, so the pages are kept for longer (e.g: during PPTC rebuild or
burst sof compilations). If is not used often, then it is more likely
that its pages will not be needed (e.g: after PPTC rebuild or bursts
of compilations).
* Address riperiperi's feedback
* Use `EqualityComparer<T>` in `IntrusiveList<T>`
Avoids a potential GC hole in `Equals(T, T)`.
2021-08-17 22:08:34 +04:00
|
|
|
Stopwatch sw = Stopwatch.StartNew();
|
|
|
|
|
2020-12-24 03:58:36 +01:00
|
|
|
threads.ForEach((thread) => thread.Start());
|
|
|
|
threads.ForEach((thread) => thread.Join());
|
|
|
|
|
|
|
|
threads.Clear();
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-03-23 00:10:07 +05:30
|
|
|
progressReportEvent.Set();
|
|
|
|
progressReportThread.Join();
|
|
|
|
|
Reduce JIT GC allocations (#2515)
* Turn `MemoryOperand` into a struct
* Remove `IntrinsicOperation`
* Remove `PhiNode`
* Remove `Node`
* Turn `Operand` into a struct
* Turn `Operation` into a struct
* Clean up pool management methods
* Add `Arena` allocator
* Move `OperationHelper` to `Operation.Factory`
* Move `OperandHelper` to `Operand.Factory`
* Optimize `Operation` a bit
* Fix `Arena` initialization
* Rename `NativeList<T>` to `ArenaList<T>`
* Reduce `Operand` size from 88 to 56 bytes
* Reduce `Operation` size from 56 to 40 bytes
* Add optimistic interning of Register & Constant operands
* Optimize `RegisterUsage` pass a bit
* Optimize `RemoveUnusedNodes` pass a bit
Iterating in reverse-order allows killing dependency chains in a single
pass.
* Fix PPTC symbols
* Optimize `BasicBlock` a bit
Reduce allocations from `_successor` & `DominanceFrontiers`
* Fix `Operation` resize
* Make `Arena` expandable
Change the arena allocator to be expandable by allocating in pages, with
some of them being pooled. Currently 32 pages are pooled. An LRU removal
mechanism should probably be added to it.
Apparently MHR can allocate bitmaps large enough to exceed the 16MB
limit for the type.
* Move `Arena` & `ArenaList` to `Common`
* Remove `ThreadStaticPool` & co
* Add `PhiOperation`
* Reduce `Operand` size from 56 from 48 bytes
* Add linear-probing to `Operand` intern table
* Optimize `HybridAllocator` a bit
* Add `Allocators` class
* Tune `ArenaAllocator` sizes
* Add page removal mechanism to `ArenaAllocator`
Remove pages which have not been used for more than 5s after each reset.
I am on fence if this would be better using a Gen2 callback object like
the one in System.Buffers.ArrayPool<T>, to trim the pool. Because right
now if a large translation happens, the pages will be freed only after a
reset. This reset may not happen for a while because no new translation
is hit, but the arena base sizes are rather small.
* Fix `OOM` when allocating larger than page size in `ArenaAllocator`
Tweak resizing mechanism for Operand.Uses and Assignemnts.
* Optimize `Optimizer` a bit
* Optimize `Operand.Add<T>/Remove<T>` a bit
* Clean up `PreAllocator`
* Fix phi insertion order
Reduce codegen diffs.
* Fix code alignment
* Use new heuristics for degree of parallelism
* Suppress warnings
* Address gdkchan's feedback
Renamed `GetValue()` to `GetValueUnsafe()` to make it more clear that
`Operand.Value` should usually not be modified directly.
* Add fast path to `ArenaAllocator`
* Assembly for `ArenaAllocator.Allocate(ulong)`:
.L0:
mov rax, [rcx+0x18]
lea r8, [rax+rdx]
cmp r8, [rcx+0x10]
ja short .L2
.L1:
mov rdx, [rcx+8]
add rax, [rdx+8]
mov [rcx+0x18], r8
ret
.L2:
jmp ArenaAllocator.AllocateSlow(UInt64)
A few variable/field had to be changed to ulong so that RyuJIT avoids
emitting zero-extends.
* Implement a new heuristic to free pooled pages.
If an arena is used often, it is more likely that its pages will be
needed, so the pages are kept for longer (e.g: during PPTC rebuild or
burst sof compilations). If is not used often, then it is more likely
that its pages will not be needed (e.g: after PPTC rebuild or bursts
of compilations).
* Address riperiperi's feedback
* Use `EqualityComparer<T>` in `IntrusiveList<T>`
Avoids a potential GC hole in `Equals(T, T)`.
2021-08-17 22:08:34 +04:00
|
|
|
sw.Stop();
|
|
|
|
|
2021-03-23 00:10:07 +05:30
|
|
|
PtcStateChanged?.Invoke(PtcLoadingState.Loaded, _translateCount, _translateTotalCount);
|
|
|
|
|
Reduce JIT GC allocations (#2515)
* Turn `MemoryOperand` into a struct
* Remove `IntrinsicOperation`
* Remove `PhiNode`
* Remove `Node`
* Turn `Operand` into a struct
* Turn `Operation` into a struct
* Clean up pool management methods
* Add `Arena` allocator
* Move `OperationHelper` to `Operation.Factory`
* Move `OperandHelper` to `Operand.Factory`
* Optimize `Operation` a bit
* Fix `Arena` initialization
* Rename `NativeList<T>` to `ArenaList<T>`
* Reduce `Operand` size from 88 to 56 bytes
* Reduce `Operation` size from 56 to 40 bytes
* Add optimistic interning of Register & Constant operands
* Optimize `RegisterUsage` pass a bit
* Optimize `RemoveUnusedNodes` pass a bit
Iterating in reverse-order allows killing dependency chains in a single
pass.
* Fix PPTC symbols
* Optimize `BasicBlock` a bit
Reduce allocations from `_successor` & `DominanceFrontiers`
* Fix `Operation` resize
* Make `Arena` expandable
Change the arena allocator to be expandable by allocating in pages, with
some of them being pooled. Currently 32 pages are pooled. An LRU removal
mechanism should probably be added to it.
Apparently MHR can allocate bitmaps large enough to exceed the 16MB
limit for the type.
* Move `Arena` & `ArenaList` to `Common`
* Remove `ThreadStaticPool` & co
* Add `PhiOperation`
* Reduce `Operand` size from 56 from 48 bytes
* Add linear-probing to `Operand` intern table
* Optimize `HybridAllocator` a bit
* Add `Allocators` class
* Tune `ArenaAllocator` sizes
* Add page removal mechanism to `ArenaAllocator`
Remove pages which have not been used for more than 5s after each reset.
I am on fence if this would be better using a Gen2 callback object like
the one in System.Buffers.ArrayPool<T>, to trim the pool. Because right
now if a large translation happens, the pages will be freed only after a
reset. This reset may not happen for a while because no new translation
is hit, but the arena base sizes are rather small.
* Fix `OOM` when allocating larger than page size in `ArenaAllocator`
Tweak resizing mechanism for Operand.Uses and Assignemnts.
* Optimize `Optimizer` a bit
* Optimize `Operand.Add<T>/Remove<T>` a bit
* Clean up `PreAllocator`
* Fix phi insertion order
Reduce codegen diffs.
* Fix code alignment
* Use new heuristics for degree of parallelism
* Suppress warnings
* Address gdkchan's feedback
Renamed `GetValue()` to `GetValueUnsafe()` to make it more clear that
`Operand.Value` should usually not be modified directly.
* Add fast path to `ArenaAllocator`
* Assembly for `ArenaAllocator.Allocate(ulong)`:
.L0:
mov rax, [rcx+0x18]
lea r8, [rax+rdx]
cmp r8, [rcx+0x10]
ja short .L2
.L1:
mov rdx, [rcx+8]
add rax, [rdx+8]
mov [rcx+0x18], r8
ret
.L2:
jmp ArenaAllocator.AllocateSlow(UInt64)
A few variable/field had to be changed to ulong so that RyuJIT avoids
emitting zero-extends.
* Implement a new heuristic to free pooled pages.
If an arena is used often, it is more likely that its pages will be
needed, so the pages are kept for longer (e.g: during PPTC rebuild or
burst sof compilations). If is not used often, then it is more likely
that its pages will not be needed (e.g: after PPTC rebuild or bursts
of compilations).
* Address riperiperi's feedback
* Use `EqualityComparer<T>` in `IntrusiveList<T>`
Avoids a potential GC hole in `Equals(T, T)`.
2021-08-17 22:08:34 +04:00
|
|
|
Logger.Info?.Print(LogClass.Ptc, $"{_translateCount} of {_translateTotalCount} functions translated | Thread count: {degreeOfParallelism} in {sw.Elapsed.TotalSeconds} s");
|
2021-01-12 19:04:02 +01:00
|
|
|
|
2021-02-22 03:23:48 +01:00
|
|
|
Thread preSaveThread = new Thread(PreSave);
|
|
|
|
preSaveThread.IsBackground = true;
|
|
|
|
preSaveThread.Start();
|
2020-06-16 20:28:02 +02:00
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
private void ReportProgress(object state)
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
2021-04-13 03:24:36 +02:00
|
|
|
const int refreshRate = 50; // ms.
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-03-23 00:10:07 +05:30
|
|
|
AutoResetEvent endEvent = (AutoResetEvent)state;
|
|
|
|
|
|
|
|
int count = 0;
|
2020-06-16 20:28:02 +02:00
|
|
|
|
|
|
|
do
|
|
|
|
{
|
2021-03-23 00:10:07 +05:30
|
|
|
int newCount = _translateCount;
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-03-23 00:10:07 +05:30
|
|
|
if (count != newCount)
|
|
|
|
{
|
|
|
|
PtcStateChanged?.Invoke(PtcLoadingState.Loading, newCount, _translateTotalCount);
|
|
|
|
count = newCount;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
while (!endEvent.WaitOne(refreshRate));
|
2020-06-16 20:28:02 +02:00
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
public static Hash128 ComputeHash(IMemoryManager memory, ulong address, ulong guestSize)
|
2021-05-13 20:05:15 +02:00
|
|
|
{
|
|
|
|
return XXHash128.ComputeHash(memory.GetSpan(address, checked((int)(guestSize))));
|
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
public void WriteCompiledFunction(ulong address, ulong guestSize, Hash128 hash, bool highCq, CompiledFunction compiledFunc)
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
|
|
|
lock (_lock)
|
|
|
|
{
|
2021-09-14 03:23:37 +04:00
|
|
|
byte[] code = compiledFunc.Code;
|
|
|
|
RelocInfo relocInfo = compiledFunc.RelocInfo;
|
|
|
|
UnwindInfo unwindInfo = compiledFunc.UnwindInfo;
|
|
|
|
|
2021-05-13 20:05:15 +02:00
|
|
|
InfoEntry infoEntry = new InfoEntry();
|
|
|
|
|
|
|
|
infoEntry.Address = address;
|
|
|
|
infoEntry.GuestSize = guestSize;
|
|
|
|
infoEntry.Hash = hash;
|
|
|
|
infoEntry.HighCq = highCq;
|
|
|
|
infoEntry.Stubbed = false;
|
2021-09-14 03:23:37 +04:00
|
|
|
infoEntry.CodeLength = code.Length;
|
|
|
|
infoEntry.RelocEntriesCount = relocInfo.Entries.Length;
|
2021-05-13 20:05:15 +02:00
|
|
|
|
|
|
|
SerializeStructure(_infosStream, infoEntry);
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-09-14 03:23:37 +04:00
|
|
|
WriteCode(code.AsSpan());
|
2020-06-16 20:28:02 +02:00
|
|
|
|
|
|
|
// WriteReloc.
|
2021-09-14 03:23:37 +04:00
|
|
|
using var relocInfoWriter = new BinaryWriter(_relocsStream, EncodingCache.UTF8NoBOM, true);
|
|
|
|
|
|
|
|
foreach (RelocEntry entry in relocInfo.Entries)
|
|
|
|
{
|
|
|
|
relocInfoWriter.Write(entry.Position);
|
|
|
|
relocInfoWriter.Write((byte)entry.Symbol.Type);
|
|
|
|
relocInfoWriter.Write(entry.Symbol.Value);
|
|
|
|
}
|
2020-06-16 20:28:02 +02:00
|
|
|
|
|
|
|
// WriteUnwindInfo.
|
2021-09-14 03:23:37 +04:00
|
|
|
using var unwindInfoWriter = new BinaryWriter(_unwindInfosStream, EncodingCache.UTF8NoBOM, true);
|
|
|
|
|
|
|
|
unwindInfoWriter.Write(unwindInfo.PushEntries.Length);
|
|
|
|
|
|
|
|
foreach (UnwindPushEntry unwindPushEntry in unwindInfo.PushEntries)
|
|
|
|
{
|
|
|
|
unwindInfoWriter.Write((int)unwindPushEntry.PseudoOp);
|
|
|
|
unwindInfoWriter.Write(unwindPushEntry.PrologOffset);
|
|
|
|
unwindInfoWriter.Write(unwindPushEntry.RegIndex);
|
|
|
|
unwindInfoWriter.Write(unwindPushEntry.StackOffsetOrAllocSize);
|
|
|
|
}
|
|
|
|
|
|
|
|
unwindInfoWriter.Write(unwindInfo.PrologSize);
|
2020-06-16 20:28:02 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
private void WriteCode(ReadOnlySpan<byte> code)
|
2021-04-13 03:24:36 +02:00
|
|
|
{
|
|
|
|
_codesList.Add(code.ToArray());
|
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
public static bool GetEndianness()
|
2021-02-22 03:23:48 +01:00
|
|
|
{
|
|
|
|
return BitConverter.IsLittleEndian;
|
|
|
|
}
|
|
|
|
|
2022-08-25 11:12:13 +01:00
|
|
|
private static FeatureInfo GetFeatureInfo()
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
2023-01-12 07:05:18 +00:00
|
|
|
if (RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
|
|
|
|
{
|
|
|
|
return new FeatureInfo(
|
|
|
|
(ulong)Arm64HardwareCapabilities.LinuxFeatureInfoHwCap,
|
|
|
|
(ulong)Arm64HardwareCapabilities.LinuxFeatureInfoHwCap2,
|
|
|
|
(ulong)Arm64HardwareCapabilities.MacOsFeatureInfo,
|
ARMeilleure: Add initial support for AVX512 (EVEX encoding) (cont) (#4147)
* ARMeilleure: Add AVX512{F,VL,DQ,BW} detection
Add `UseAvx512Ortho` and `UseAvx512OrthoFloat` optimization flags as
short-hands for `F+VL` and `F+VL+DQ`.
* ARMeilleure: Add initial support for EVEX instruction encoding
Does not implement rounding, or exception controls.
* ARMeilleure: Add `X86Vpternlogd`
Accelerates the vector-`Not` instruction.
* ARMeilleure: Add check for `OSXSAVE` for AVX{2,512}
* ARMeilleure: Add check for `XCR0` flags
Add XCR0 register checks for AVX and AVX512F, following the guidelines
from section 14.3 and 15.2 from the Intel Architecture Software
Developer's Manual.
* ARMeilleure: Remove redundant `ReProtect` and `Dispose`, formatting
* ARMeilleure: Move XCR0 procedure to GetXcr0Eax
* ARMeilleure: Add `XCR0` to `FeatureInfo` structure
* ARMeilleure: Utilize `ReadOnlySpan` for Xcr0 assembly
Avoids an additional allocation
* ARMeilleure: Formatting fixes
* ARMeilleure: Fix EVEX encoding src2 register index
> Just like in VEX prefix, vvvv is provided in inverted form.
* ARMeilleure: Add `X86Vpternlogd` acceleration to `Vmvn_I`
Passes unit tests, verified instruction utilization
* ARMeilleure: Fix EVEX register operand designations
Operand 2 was being sourced improperly.
EVEX encoded instructions source their operands like so:
Operand 1: ModRM:reg
Operand 2: EVEX.vvvvv
Operand 3: ModRM:r/m
Operand 4: Imm
This fixes the improper register designations when emitting vpternlog.
Now "dest", "src1", "src2" arguments emit in the proper order in EVEX instructions.
* ARMeilleure: Add `X86Vpternlogd` acceleration to `Orn_V`
* ARMeilleure: PTC version bump
* ARMeilleure: Update EVEX encoding Debug.Assert to Debug.Fail
* ARMeilleure: Update EVEX encoding comment capitalization
2023-03-20 12:09:24 -07:00
|
|
|
0,
|
2023-01-12 07:05:18 +00:00
|
|
|
0);
|
|
|
|
}
|
|
|
|
else if (RuntimeInformation.ProcessArchitecture == Architecture.X64)
|
|
|
|
{
|
|
|
|
return new FeatureInfo(
|
|
|
|
(ulong)X86HardwareCapabilities.FeatureInfo1Ecx,
|
|
|
|
(ulong)X86HardwareCapabilities.FeatureInfo1Edx,
|
|
|
|
(ulong)X86HardwareCapabilities.FeatureInfo7Ebx,
|
ARMeilleure: Add initial support for AVX512 (EVEX encoding) (cont) (#4147)
* ARMeilleure: Add AVX512{F,VL,DQ,BW} detection
Add `UseAvx512Ortho` and `UseAvx512OrthoFloat` optimization flags as
short-hands for `F+VL` and `F+VL+DQ`.
* ARMeilleure: Add initial support for EVEX instruction encoding
Does not implement rounding, or exception controls.
* ARMeilleure: Add `X86Vpternlogd`
Accelerates the vector-`Not` instruction.
* ARMeilleure: Add check for `OSXSAVE` for AVX{2,512}
* ARMeilleure: Add check for `XCR0` flags
Add XCR0 register checks for AVX and AVX512F, following the guidelines
from section 14.3 and 15.2 from the Intel Architecture Software
Developer's Manual.
* ARMeilleure: Remove redundant `ReProtect` and `Dispose`, formatting
* ARMeilleure: Move XCR0 procedure to GetXcr0Eax
* ARMeilleure: Add `XCR0` to `FeatureInfo` structure
* ARMeilleure: Utilize `ReadOnlySpan` for Xcr0 assembly
Avoids an additional allocation
* ARMeilleure: Formatting fixes
* ARMeilleure: Fix EVEX encoding src2 register index
> Just like in VEX prefix, vvvv is provided in inverted form.
* ARMeilleure: Add `X86Vpternlogd` acceleration to `Vmvn_I`
Passes unit tests, verified instruction utilization
* ARMeilleure: Fix EVEX register operand designations
Operand 2 was being sourced improperly.
EVEX encoded instructions source their operands like so:
Operand 1: ModRM:reg
Operand 2: EVEX.vvvvv
Operand 3: ModRM:r/m
Operand 4: Imm
This fixes the improper register designations when emitting vpternlog.
Now "dest", "src1", "src2" arguments emit in the proper order in EVEX instructions.
* ARMeilleure: Add `X86Vpternlogd` acceleration to `Orn_V`
* ARMeilleure: PTC version bump
* ARMeilleure: Update EVEX encoding Debug.Assert to Debug.Fail
* ARMeilleure: Update EVEX encoding comment capitalization
2023-03-20 12:09:24 -07:00
|
|
|
(ulong)X86HardwareCapabilities.FeatureInfo7Ecx,
|
|
|
|
(ulong)X86HardwareCapabilities.Xcr0InfoEax);
|
2023-01-12 07:05:18 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
ARMeilleure: Add initial support for AVX512 (EVEX encoding) (cont) (#4147)
* ARMeilleure: Add AVX512{F,VL,DQ,BW} detection
Add `UseAvx512Ortho` and `UseAvx512OrthoFloat` optimization flags as
short-hands for `F+VL` and `F+VL+DQ`.
* ARMeilleure: Add initial support for EVEX instruction encoding
Does not implement rounding, or exception controls.
* ARMeilleure: Add `X86Vpternlogd`
Accelerates the vector-`Not` instruction.
* ARMeilleure: Add check for `OSXSAVE` for AVX{2,512}
* ARMeilleure: Add check for `XCR0` flags
Add XCR0 register checks for AVX and AVX512F, following the guidelines
from section 14.3 and 15.2 from the Intel Architecture Software
Developer's Manual.
* ARMeilleure: Remove redundant `ReProtect` and `Dispose`, formatting
* ARMeilleure: Move XCR0 procedure to GetXcr0Eax
* ARMeilleure: Add `XCR0` to `FeatureInfo` structure
* ARMeilleure: Utilize `ReadOnlySpan` for Xcr0 assembly
Avoids an additional allocation
* ARMeilleure: Formatting fixes
* ARMeilleure: Fix EVEX encoding src2 register index
> Just like in VEX prefix, vvvv is provided in inverted form.
* ARMeilleure: Add `X86Vpternlogd` acceleration to `Vmvn_I`
Passes unit tests, verified instruction utilization
* ARMeilleure: Fix EVEX register operand designations
Operand 2 was being sourced improperly.
EVEX encoded instructions source their operands like so:
Operand 1: ModRM:reg
Operand 2: EVEX.vvvvv
Operand 3: ModRM:r/m
Operand 4: Imm
This fixes the improper register designations when emitting vpternlog.
Now "dest", "src1", "src2" arguments emit in the proper order in EVEX instructions.
* ARMeilleure: Add `X86Vpternlogd` acceleration to `Orn_V`
* ARMeilleure: PTC version bump
* ARMeilleure: Update EVEX encoding Debug.Assert to Debug.Fail
* ARMeilleure: Update EVEX encoding comment capitalization
2023-03-20 12:09:24 -07:00
|
|
|
return new FeatureInfo(0, 0, 0, 0, 0);
|
2023-01-12 07:05:18 +00:00
|
|
|
}
|
2020-06-16 20:28:02 +02:00
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
private byte GetMemoryManagerMode()
|
POWER - Performance Optimizations With Extensive Ramifications (#2286)
* Refactoring of KMemoryManager class
* Replace some trivial uses of DRAM address with VA
* Get rid of GetDramAddressFromVa
* Abstracting more operations on derived page table class
* Run auto-format on KPageTableBase
* Managed to make TryConvertVaToPa private, few uses remains now
* Implement guest physical pages ref counting, remove manual freeing
* Make DoMmuOperation private and call new abstract methods only from the base class
* Pass pages count rather than size on Map/UnmapMemory
* Change memory managers to take host pointers
* Fix a guest memory leak and simplify KPageTable
* Expose new methods for host range query and mapping
* Some refactoring of MapPagesFromClientProcess to allow proper page ref counting and mapping without KPageLists
* Remove more uses of AddVaRangeToPageList, now only one remains (shared memory page checking)
* Add a SharedMemoryStorage class, will be useful for host mapping
* Sayonara AddVaRangeToPageList, you served us well
* Start to implement host memory mapping (WIP)
* Support memory tracking through host exception handling
* Fix some access violations from HLE service guest memory access and CPU
* Fix memory tracking
* Fix mapping list bugs, including a race and a error adding mapping ranges
* Simple page table for memory tracking
* Simple "volatile" region handle mode
* Update UBOs directly (experimental, rough)
* Fix the overlap check
* Only set non-modified buffers as volatile
* Fix some memory tracking issues
* Fix possible race in MapBufferFromClientProcess (block list updates were not locked)
* Write uniform update to memory immediately, only defer the buffer set.
* Fix some memory tracking issues
* Pass correct pages count on shared memory unmap
* Armeilleure Signal Handler v1 + Unix changes
Unix currently behaves like windows, rather than remapping physical
* Actually check if the host platform is unix
* Fix decommit on linux.
* Implement windows 10 placeholder shared memory, fix a buffer issue.
* Make PTC version something that will never match with master
* Remove testing variable for block count
* Add reference count for memory manager, fix dispose
Can still deadlock with OpenAL
* Add address validation, use page table for mapped check, add docs
Might clean up the page table traversing routines.
* Implement batched mapping/tracking.
* Move documentation, fix tests.
* Cleanup uniform buffer update stuff.
* Remove unnecessary assignment.
* Add unsafe host mapped memory switch
On by default. Would be good to turn this off for untrusted code (homebrew, exefs mods) and give the user the option to turn it on manually, though that requires some UI work.
* Remove C# exception handlers
They have issues due to current .NET limitations, so the meilleure one fully replaces them for now.
* Fix MapPhysicalMemory on the software MemoryManager.
* Null check for GetHostAddress, docs
* Add configuration for setting memory manager mode (not in UI yet)
* Add config to UI
* Fix type mismatch on Unix signal handler code emit
* Fix 6GB DRAM mode.
The size can be greater than `uint.MaxValue` when the DRAM is >4GB.
* Address some feedback.
* More detailed error if backing memory cannot be mapped.
* SetLastError on all OS functions for consistency
* Force pages dirty with UBO update instead of setting them directly.
Seems to be much faster across a few games. Need retesting.
* Rebase, configuration rework, fix mem tracking regression
* Fix race in FreePages
* Set memory managers null after decrementing ref count
* Remove readonly keyword, as this is now modified.
* Use a local variable for the signal handler rather than a register.
* Fix bug with buffer resize, and index/uniform buffer binding.
Should fix flickering in games.
* Add InvalidAccessHandler to MemoryTracking
Doesn't do anything yet
* Call invalid access handler on unmapped read/write.
Same rules as the regular memory manager.
* Make unsafe mapped memory its own MemoryManagerType
* Move FlushUboDirty into UpdateState.
* Buffer dirty cache, rather than ubo cache
Much cleaner, may be reusable for Inline2Memory updates.
* This doesn't return anything anymore.
* Add sigaction remove methods, correct a few function signatures.
* Return empty list of physical regions for size 0.
* Also on AddressSpaceManager
Co-authored-by: gdkchan <gab.dark.100@gmail.com>
2021-05-24 21:52:44 +01:00
|
|
|
{
|
|
|
|
return (byte)_memoryMode;
|
|
|
|
}
|
|
|
|
|
2020-12-17 20:32:09 +01:00
|
|
|
private static uint GetOSPlatform()
|
|
|
|
{
|
|
|
|
uint osPlatform = 0u;
|
|
|
|
|
2021-12-05 00:02:30 +01:00
|
|
|
osPlatform |= (OperatingSystem.IsFreeBSD() ? 1u : 0u) << 0;
|
|
|
|
osPlatform |= (OperatingSystem.IsLinux() ? 1u : 0u) << 1;
|
|
|
|
osPlatform |= (OperatingSystem.IsMacOS() ? 1u : 0u) << 2;
|
|
|
|
osPlatform |= (OperatingSystem.IsWindows() ? 1u : 0u) << 3;
|
2020-12-17 20:32:09 +01:00
|
|
|
|
|
|
|
return osPlatform;
|
|
|
|
}
|
|
|
|
|
ARMeilleure: Add initial support for AVX512 (EVEX encoding) (cont) (#4147)
* ARMeilleure: Add AVX512{F,VL,DQ,BW} detection
Add `UseAvx512Ortho` and `UseAvx512OrthoFloat` optimization flags as
short-hands for `F+VL` and `F+VL+DQ`.
* ARMeilleure: Add initial support for EVEX instruction encoding
Does not implement rounding, or exception controls.
* ARMeilleure: Add `X86Vpternlogd`
Accelerates the vector-`Not` instruction.
* ARMeilleure: Add check for `OSXSAVE` for AVX{2,512}
* ARMeilleure: Add check for `XCR0` flags
Add XCR0 register checks for AVX and AVX512F, following the guidelines
from section 14.3 and 15.2 from the Intel Architecture Software
Developer's Manual.
* ARMeilleure: Remove redundant `ReProtect` and `Dispose`, formatting
* ARMeilleure: Move XCR0 procedure to GetXcr0Eax
* ARMeilleure: Add `XCR0` to `FeatureInfo` structure
* ARMeilleure: Utilize `ReadOnlySpan` for Xcr0 assembly
Avoids an additional allocation
* ARMeilleure: Formatting fixes
* ARMeilleure: Fix EVEX encoding src2 register index
> Just like in VEX prefix, vvvv is provided in inverted form.
* ARMeilleure: Add `X86Vpternlogd` acceleration to `Vmvn_I`
Passes unit tests, verified instruction utilization
* ARMeilleure: Fix EVEX register operand designations
Operand 2 was being sourced improperly.
EVEX encoded instructions source their operands like so:
Operand 1: ModRM:reg
Operand 2: EVEX.vvvvv
Operand 3: ModRM:r/m
Operand 4: Imm
This fixes the improper register designations when emitting vpternlog.
Now "dest", "src1", "src2" arguments emit in the proper order in EVEX instructions.
* ARMeilleure: Add `X86Vpternlogd` acceleration to `Orn_V`
* ARMeilleure: PTC version bump
* ARMeilleure: Update EVEX encoding Debug.Assert to Debug.Fail
* ARMeilleure: Update EVEX encoding comment capitalization
2023-03-20 12:09:24 -07:00
|
|
|
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 86*/)]
|
2021-04-13 03:24:36 +02:00
|
|
|
private struct OuterHeader
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
2021-02-22 03:23:48 +01:00
|
|
|
public ulong Magic;
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2020-12-17 20:32:09 +01:00
|
|
|
public uint CacheFileVersion;
|
2021-04-13 03:24:36 +02:00
|
|
|
|
2021-02-22 03:23:48 +01:00
|
|
|
public bool Endianness;
|
2022-08-25 11:12:13 +01:00
|
|
|
public FeatureInfo FeatureInfo;
|
POWER - Performance Optimizations With Extensive Ramifications (#2286)
* Refactoring of KMemoryManager class
* Replace some trivial uses of DRAM address with VA
* Get rid of GetDramAddressFromVa
* Abstracting more operations on derived page table class
* Run auto-format on KPageTableBase
* Managed to make TryConvertVaToPa private, few uses remains now
* Implement guest physical pages ref counting, remove manual freeing
* Make DoMmuOperation private and call new abstract methods only from the base class
* Pass pages count rather than size on Map/UnmapMemory
* Change memory managers to take host pointers
* Fix a guest memory leak and simplify KPageTable
* Expose new methods for host range query and mapping
* Some refactoring of MapPagesFromClientProcess to allow proper page ref counting and mapping without KPageLists
* Remove more uses of AddVaRangeToPageList, now only one remains (shared memory page checking)
* Add a SharedMemoryStorage class, will be useful for host mapping
* Sayonara AddVaRangeToPageList, you served us well
* Start to implement host memory mapping (WIP)
* Support memory tracking through host exception handling
* Fix some access violations from HLE service guest memory access and CPU
* Fix memory tracking
* Fix mapping list bugs, including a race and a error adding mapping ranges
* Simple page table for memory tracking
* Simple "volatile" region handle mode
* Update UBOs directly (experimental, rough)
* Fix the overlap check
* Only set non-modified buffers as volatile
* Fix some memory tracking issues
* Fix possible race in MapBufferFromClientProcess (block list updates were not locked)
* Write uniform update to memory immediately, only defer the buffer set.
* Fix some memory tracking issues
* Pass correct pages count on shared memory unmap
* Armeilleure Signal Handler v1 + Unix changes
Unix currently behaves like windows, rather than remapping physical
* Actually check if the host platform is unix
* Fix decommit on linux.
* Implement windows 10 placeholder shared memory, fix a buffer issue.
* Make PTC version something that will never match with master
* Remove testing variable for block count
* Add reference count for memory manager, fix dispose
Can still deadlock with OpenAL
* Add address validation, use page table for mapped check, add docs
Might clean up the page table traversing routines.
* Implement batched mapping/tracking.
* Move documentation, fix tests.
* Cleanup uniform buffer update stuff.
* Remove unnecessary assignment.
* Add unsafe host mapped memory switch
On by default. Would be good to turn this off for untrusted code (homebrew, exefs mods) and give the user the option to turn it on manually, though that requires some UI work.
* Remove C# exception handlers
They have issues due to current .NET limitations, so the meilleure one fully replaces them for now.
* Fix MapPhysicalMemory on the software MemoryManager.
* Null check for GetHostAddress, docs
* Add configuration for setting memory manager mode (not in UI yet)
* Add config to UI
* Fix type mismatch on Unix signal handler code emit
* Fix 6GB DRAM mode.
The size can be greater than `uint.MaxValue` when the DRAM is >4GB.
* Address some feedback.
* More detailed error if backing memory cannot be mapped.
* SetLastError on all OS functions for consistency
* Force pages dirty with UBO update instead of setting them directly.
Seems to be much faster across a few games. Need retesting.
* Rebase, configuration rework, fix mem tracking regression
* Fix race in FreePages
* Set memory managers null after decrementing ref count
* Remove readonly keyword, as this is now modified.
* Use a local variable for the signal handler rather than a register.
* Fix bug with buffer resize, and index/uniform buffer binding.
Should fix flickering in games.
* Add InvalidAccessHandler to MemoryTracking
Doesn't do anything yet
* Call invalid access handler on unmapped read/write.
Same rules as the regular memory manager.
* Make unsafe mapped memory its own MemoryManagerType
* Move FlushUboDirty into UpdateState.
* Buffer dirty cache, rather than ubo cache
Much cleaner, may be reusable for Inline2Memory updates.
* This doesn't return anything anymore.
* Add sigaction remove methods, correct a few function signatures.
* Return empty list of physical regions for size 0.
* Also on AddressSpaceManager
Co-authored-by: gdkchan <gab.dark.100@gmail.com>
2021-05-24 21:52:44 +01:00
|
|
|
public byte MemoryManagerMode;
|
2020-12-17 20:32:09 +01:00
|
|
|
public uint OSPlatform;
|
2023-01-12 07:50:45 +00:00
|
|
|
public uint Architecture;
|
2020-06-16 20:28:02 +02:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
public long UncompressedStreamSize;
|
|
|
|
|
|
|
|
public Hash128 HeaderHash;
|
|
|
|
|
|
|
|
public void SetHeaderHash()
|
|
|
|
{
|
|
|
|
Span<OuterHeader> spanHeader = MemoryMarshal.CreateSpan(ref this, 1);
|
|
|
|
|
|
|
|
HeaderHash = XXHash128.ComputeHash(MemoryMarshal.AsBytes(spanHeader).Slice(0, Unsafe.SizeOf<OuterHeader>() - Unsafe.SizeOf<Hash128>()));
|
|
|
|
}
|
|
|
|
|
|
|
|
public bool IsHeaderValid()
|
|
|
|
{
|
|
|
|
Span<OuterHeader> spanHeader = MemoryMarshal.CreateSpan(ref this, 1);
|
|
|
|
|
|
|
|
return XXHash128.ComputeHash(MemoryMarshal.AsBytes(spanHeader).Slice(0, Unsafe.SizeOf<OuterHeader>() - Unsafe.SizeOf<Hash128>())) == HeaderHash;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
ARMeilleure: Add initial support for AVX512 (EVEX encoding) (cont) (#4147)
* ARMeilleure: Add AVX512{F,VL,DQ,BW} detection
Add `UseAvx512Ortho` and `UseAvx512OrthoFloat` optimization flags as
short-hands for `F+VL` and `F+VL+DQ`.
* ARMeilleure: Add initial support for EVEX instruction encoding
Does not implement rounding, or exception controls.
* ARMeilleure: Add `X86Vpternlogd`
Accelerates the vector-`Not` instruction.
* ARMeilleure: Add check for `OSXSAVE` for AVX{2,512}
* ARMeilleure: Add check for `XCR0` flags
Add XCR0 register checks for AVX and AVX512F, following the guidelines
from section 14.3 and 15.2 from the Intel Architecture Software
Developer's Manual.
* ARMeilleure: Remove redundant `ReProtect` and `Dispose`, formatting
* ARMeilleure: Move XCR0 procedure to GetXcr0Eax
* ARMeilleure: Add `XCR0` to `FeatureInfo` structure
* ARMeilleure: Utilize `ReadOnlySpan` for Xcr0 assembly
Avoids an additional allocation
* ARMeilleure: Formatting fixes
* ARMeilleure: Fix EVEX encoding src2 register index
> Just like in VEX prefix, vvvv is provided in inverted form.
* ARMeilleure: Add `X86Vpternlogd` acceleration to `Vmvn_I`
Passes unit tests, verified instruction utilization
* ARMeilleure: Fix EVEX register operand designations
Operand 2 was being sourced improperly.
EVEX encoded instructions source their operands like so:
Operand 1: ModRM:reg
Operand 2: EVEX.vvvvv
Operand 3: ModRM:r/m
Operand 4: Imm
This fixes the improper register designations when emitting vpternlog.
Now "dest", "src1", "src2" arguments emit in the proper order in EVEX instructions.
* ARMeilleure: Add `X86Vpternlogd` acceleration to `Orn_V`
* ARMeilleure: PTC version bump
* ARMeilleure: Update EVEX encoding Debug.Assert to Debug.Fail
* ARMeilleure: Update EVEX encoding comment capitalization
2023-03-20 12:09:24 -07:00
|
|
|
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 40*/)]
|
|
|
|
private record struct FeatureInfo(ulong FeatureInfo0, ulong FeatureInfo1, ulong FeatureInfo2, ulong FeatureInfo3, ulong FeatureInfo4);
|
2022-08-25 11:12:13 +01:00
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 128*/)]
|
|
|
|
private struct InnerHeader
|
|
|
|
{
|
|
|
|
public ulong Magic;
|
|
|
|
|
|
|
|
public int InfosLength;
|
|
|
|
public long CodesLength;
|
|
|
|
public int RelocsLength;
|
|
|
|
public int UnwindInfosLength;
|
|
|
|
|
|
|
|
public Hash128 InfosHash;
|
|
|
|
public Hash128 CodesHash;
|
|
|
|
public Hash128 RelocsHash;
|
|
|
|
public Hash128 UnwindInfosHash;
|
|
|
|
|
|
|
|
public Hash128 HeaderHash;
|
|
|
|
|
|
|
|
public void SetHeaderHash()
|
|
|
|
{
|
|
|
|
Span<InnerHeader> spanHeader = MemoryMarshal.CreateSpan(ref this, 1);
|
|
|
|
|
|
|
|
HeaderHash = XXHash128.ComputeHash(MemoryMarshal.AsBytes(spanHeader).Slice(0, Unsafe.SizeOf<InnerHeader>() - Unsafe.SizeOf<Hash128>()));
|
|
|
|
}
|
|
|
|
|
|
|
|
public bool IsHeaderValid()
|
|
|
|
{
|
|
|
|
Span<InnerHeader> spanHeader = MemoryMarshal.CreateSpan(ref this, 1);
|
|
|
|
|
|
|
|
return XXHash128.ComputeHash(MemoryMarshal.AsBytes(spanHeader).Slice(0, Unsafe.SizeOf<InnerHeader>() - Unsafe.SizeOf<Hash128>())) == HeaderHash;
|
|
|
|
}
|
2020-06-16 20:28:02 +02:00
|
|
|
}
|
|
|
|
|
2021-05-13 20:05:15 +02:00
|
|
|
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 42*/)]
|
2020-06-16 20:28:02 +02:00
|
|
|
private struct InfoEntry
|
|
|
|
{
|
2020-12-16 17:07:42 -03:00
|
|
|
public ulong Address;
|
|
|
|
public ulong GuestSize;
|
2021-05-13 20:05:15 +02:00
|
|
|
public Hash128 Hash;
|
2020-06-16 20:28:02 +02:00
|
|
|
public bool HighCq;
|
2020-12-17 20:32:09 +01:00
|
|
|
public bool Stubbed;
|
2021-04-13 03:24:36 +02:00
|
|
|
public int CodeLength;
|
2020-06-16 20:28:02 +02:00
|
|
|
public int RelocEntriesCount;
|
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
private void Enable()
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
|
|
|
State = PtcState.Enabled;
|
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
public void Continue()
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
|
|
|
if (State == PtcState.Enabled)
|
|
|
|
{
|
|
|
|
State = PtcState.Continuing;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
public void Close()
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
|
|
|
if (State == PtcState.Enabled ||
|
|
|
|
State == PtcState.Continuing)
|
|
|
|
{
|
|
|
|
State = PtcState.Closing;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
public void Disable()
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
|
|
|
State = PtcState.Disabled;
|
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
private void Wait()
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
|
|
|
_waitEvent.WaitOne();
|
|
|
|
}
|
|
|
|
|
2023-01-04 20:01:44 -03:00
|
|
|
public void Dispose()
|
2020-06-16 20:28:02 +02:00
|
|
|
{
|
|
|
|
if (!_disposed)
|
|
|
|
{
|
|
|
|
_disposed = true;
|
|
|
|
|
|
|
|
Wait();
|
|
|
|
_waitEvent.Dispose();
|
|
|
|
|
2021-04-13 03:24:36 +02:00
|
|
|
DisposeCarriers();
|
2020-06-16 20:28:02 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-04-19 01:43:53 +04:00
|
|
|
}
|