diff --git a/Misaki.HighPerformance.Jobs/JobInfo.cs b/Misaki.HighPerformance.Jobs/JobInfo.cs index 8c45a97..ad2d565 100644 --- a/Misaki.HighPerformance.Jobs/JobInfo.cs +++ b/Misaki.HighPerformance.Jobs/JobInfo.cs @@ -135,6 +135,7 @@ internal static class JobUtility // Lock-Free constants: State mask (low 16 bits) and RC unit (1 << 16) public const int STATE_MASK = 0xFFFF; public const int RC_ONE = 0x10000; + public const int RC_SHIFT = 16; public const int JOBSTATE_INVALID = (int)JobState.Invalid & STATE_MASK; public const int JOBSTATE_CREATED = (int)JobState.Created & STATE_MASK; @@ -172,18 +173,18 @@ internal static class JobUtility public static int ReadRefCount(ref JobInfo jobInfo) { var stateVal = Volatile.Read(ref jobInfo.state); - return stateVal >> 16; // RC is stored in the high 16 bits + return stateVal >> RC_SHIFT; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int GetRefCount(int stateValue) { - return stateValue >> 16; + return stateValue >> RC_SHIFT; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int ReleaseRC(ref int jobState) { - return GetRefCount(Interlocked.Add(ref jobState, -RC_ONE)); + return (Interlocked.Add(ref jobState, -RC_ONE) & ~STATE_MASK) >> RC_SHIFT; } } \ No newline at end of file diff --git a/Misaki.HighPerformance.Jobs/JobScheduler.cs b/Misaki.HighPerformance.Jobs/JobScheduler.cs index c98a038..e8b8cea 100644 --- a/Misaki.HighPerformance.Jobs/JobScheduler.cs +++ b/Misaki.HighPerformance.Jobs/JobScheduler.cs @@ -383,6 +383,8 @@ public sealed unsafe partial class JobScheduler : IDisposable // Release RC Interlocked.Add(ref depJobInfo.state, -JobUtility.RC_ONE); + registered = true; + break; } diff --git a/Misaki.HighPerformance.Jobs/Misaki.HighPerformance.Jobs.csproj b/Misaki.HighPerformance.Jobs/Misaki.HighPerformance.Jobs.csproj index 5c98246..72e3173 100644 --- a/Misaki.HighPerformance.Jobs/Misaki.HighPerformance.Jobs.csproj +++ b/Misaki.HighPerformance.Jobs/Misaki.HighPerformance.Jobs.csproj @@ -6,7 +6,7 @@ enable True True - 3.1.2 + 3.1.3 $(AssemblyVersion) Misaki https://git.personalnas.com/Misaki/Misaki.HighPerformance.git diff --git a/Misaki.HighPerformance.LowLevel/Buffer/AllocationManager.cs b/Misaki.HighPerformance.LowLevel/Buffer/AllocationManager.cs index 89d6b38..172e9c3 100644 --- a/Misaki.HighPerformance.LowLevel/Buffer/AllocationManager.cs +++ b/Misaki.HighPerformance.LowLevel/Buffer/AllocationManager.cs @@ -138,12 +138,30 @@ public static unsafe class AllocationManager } } + private class ThreadLocalStackPool + { + public MemoryPool pool; + + public ThreadLocalStackPool(nuint stackCapacity) + { + pool = new MemoryPool(new VirtualStack.CreationOptions + { + reserveCapacity = stackCapacity + }); + } + + ~ThreadLocalStackPool() + { + pool.Dispose(); + } + } + internal static MemoryPool s_arenaAllocator; internal static MemoryPool s_freeListAllocator; internal static HeapAllocator* s_pHeapAllocator; [ThreadStatic] - private static MemoryPool t_stackAllocator; + private static ThreadLocalStackPool? t_stackAllocator; #if MHP_ENABLE_SAFETY_CHECKS @@ -172,52 +190,6 @@ public static unsafe class AllocationManager private static nuint s_threadLocalStackSize; private static SpinLock s_stackLocker = new SpinLock(false); - private static VirtualStack** s_ppStack; - private static int s_ppStackCount; - private static int s_ppStackCapacity; - - private static void EnsureThreadLocalStackInitialize() - { - if (Unsafe.IsNullRef(ref t_stackAllocator.Allocator)) - { - t_stackAllocator = new MemoryPool(new VirtualStack.CreationOptions - { - reserveCapacity = s_threadLocalStackSize - }); - - var token = false; - try - { - s_stackLocker.Enter(ref token); - if (s_ppStack == null) - { - s_ppStack = (VirtualStack**)Malloc((nuint)(sizeof(VirtualStack*) * Environment.ProcessorCount)); - s_ppStackCapacity = Environment.ProcessorCount; - } - - if (s_ppStackCount >= s_ppStackCapacity) - { - var pOld = s_ppStack; - var newCapacity = s_ppStackCapacity * 2; - var pNew = (VirtualStack**)Realloc(pOld, (nuint)(sizeof(VirtualStack*) * newCapacity)); - - s_ppStack = pNew; - s_ppStackCapacity = newCapacity; - } - - s_ppStack[s_ppStackCount] = (VirtualStack*)Unsafe.AsPointer(ref t_stackAllocator.Allocator); - var test = s_ppStack[s_ppStackCount]; - s_ppStackCount++; - } - finally - { - if (token) - { - s_stackLocker.Exit(); - } - } - } - } public static void Initialize(AllocationManagerDesc opts) { @@ -290,8 +262,8 @@ public static unsafe class AllocationManager { Debug.Assert(s_initialized, "AllocationManager is not initialized."); - EnsureThreadLocalStackInitialize(); - return t_stackAllocator.Allocator.CreateScope(t_stackAllocator.AllocationHandle); + t_stackAllocator ??= new ThreadLocalStackPool(s_threadLocalStackSize); + return t_stackAllocator.pool.Allocator.CreateScope(t_stackAllocator.pool.AllocationHandle); } /// @@ -447,22 +419,6 @@ public static unsafe class AllocationManager s_arenaAllocator.Dispose(); s_freeListAllocator.Dispose(); - if (s_ppStack != null) - { - for (var i = 0; i < s_ppStackCount; i++) - { - var pStack = s_ppStack[i]; - if (pStack != null) - { - pStack->Dispose(); - Free(pStack); - } - } - - Free(s_ppStack); - s_ppStack = null; - } - if (s_pHeapAllocator != null) { Free(s_pHeapAllocator); diff --git a/Misaki.HighPerformance.LowLevel/Buffer/FreeList.cs b/Misaki.HighPerformance.LowLevel/Buffer/FreeList.cs index 1fc7700..bc6f2b0 100644 --- a/Misaki.HighPerformance.LowLevel/Buffer/FreeList.cs +++ b/Misaki.HighPerformance.LowLevel/Buffer/FreeList.cs @@ -19,7 +19,7 @@ public unsafe struct FreeList : IMemoryAllocatorisDisposed) == 0) + { + Volatile.Write(ref _cache->active, 0); + ThreadCache* current; + do + { + current = (ThreadCache*)Volatile.Read(ref *(nint*)&_state->inactiveCacheHead); + _cache->inactiveNext = current; + } + while (Interlocked.CompareExchange(ref *(nint*)&_state->inactiveCacheHead, (nint)_cache, (nint)current) != (nint)current); + } + } + } + private const byte _MAX_BUCKETS = 16; private const int _DEFAULT_MAX_CONCURRENCY_LEVEL = 1; private const int _OVERFLOW_CACHE_INDEX = 0; @@ -88,23 +127,22 @@ public unsafe struct FreeList : IMemoryAllocator /// Gets the alignment requirement for allocations. @@ -116,18 +154,12 @@ public unsafe struct FreeList : IMemoryAllocator public readonly nuint ChunkSize => _chunkSize; - /// - /// Gets the maximum number of dedicated thread caches. - /// - public readonly int MaxConcurrencyLevel => _maxConcurrencyLevel; - /// /// Initializes a new variable-size FreeList allocator with the specified parameters. /// /// Alignment requirement for blocks (must be power of 2). /// Size of memory chunks to allocate (default: 64KB). - /// Maximum number of dedicated thread caches. - public FreeList(nuint alignment, nuint chunkSize = _DEFAULT_CHUNK_SIZE, int maxConcurrencyLevel = _DEFAULT_MAX_CONCURRENCY_LEVEL) + public FreeList(nuint alignment, nuint chunkSize = _DEFAULT_CHUNK_SIZE) { if (alignment == 0 || (alignment & (alignment - 1)) != 0) { @@ -139,56 +171,32 @@ public unsafe struct FreeList : IMemoryAllocatorisDisposed = 0; + state->headCache = null; + state->inactiveCacheHead = null; + + _instanceId = state; _chunks = null; - _cacheCount = 0; _disposed = 0; _chunkCreationLock = 0; _cacheRegistrationLock = 0; - _overflowLock = 0; _chunkArena = new DynamicArena(1024); - _caches = (ThreadCache**)Malloc((nuint)sizeof(ThreadCache*) * (nuint)(maxConcurrencyLevel + 1)); - - for (var i = 0; i <= maxConcurrencyLevel; i++) - { - _caches[i] = null; - } - - var overflowCache = CreateCacheForThread(0); - if (overflowCache == null) - { - throw new OutOfMemoryException("Failed to initialize free list overflow cache."); - } - - _caches[_OVERFLOW_CACHE_INDEX] = overflowCache; - } catch { if (_instanceId != null) { - Free(_instanceId); + MemoryUtility.Free(_instanceId); _instanceId = null; } - if (_caches != null) - { - Free(_caches); - _caches = null; - } - _chunkArena.Dispose(); throw; @@ -268,76 +276,85 @@ public unsafe struct FreeList : IMemoryAllocatorisDisposed) != 0) { - if (t_ownerId == _instanceId && t_cacheIndex > 0 && t_cacheIndex <= _cacheCount) + return null; + } + + var threadId = Environment.CurrentManagedThreadId; + ThreadCache* cacheToUse = null; + + while (true) + { + cacheToUse = (ThreadCache*)Volatile.Read(ref *(nint*)&state->inactiveCacheHead); + if (cacheToUse == null) { - return _caches[t_cacheIndex]; + break; } - if (_cacheCount >= _maxConcurrencyLevel) + var nextInactive = cacheToUse->inactiveNext; + if (Interlocked.CompareExchange(ref *(nint*)&state->inactiveCacheHead, (nint)nextInactive, (nint)cacheToUse) == (nint)cacheToUse) { - t_ownerId = _instanceId; - t_cacheIndex = _OVERFLOW_CACHE_INDEX; - return GetOverflowCache(); + cacheToUse->threadId = threadId; + Volatile.Write(ref cacheToUse->active, 1); + break; + } + } + + if (cacheToUse == null) + { + while (Interlocked.CompareExchange(ref _cacheRegistrationLock, 1, 0) != 0) + { + Thread.SpinWait(1); } - var threadId = Environment.CurrentManagedThreadId; - var cache = CreateCacheForThread(threadId); - if (cache == null) + try { - t_ownerId = _instanceId; - t_cacheIndex = _OVERFLOW_CACHE_INDEX; - return GetOverflowCache(); + cacheToUse = CreateCacheForThread(threadId); + if (cacheToUse != null) + { + cacheToUse->next = state->headCache; + state->headCache = cacheToUse; + } } + finally + { + Interlocked.Exchange(ref _cacheRegistrationLock, 0); + } + } - _cacheCount++; - _caches[_cacheCount] = cache; - + if (cacheToUse != null) + { t_ownerId = _instanceId; - t_cacheIndex = _cacheCount; - return cache; - } - finally - { - Interlocked.Exchange(ref _cacheRegistrationLock, 0); + t_localCache = cacheToUse; + t_cacheReclaimer = new CacheReclaimer(cacheToUse, state); } + + return cacheToUse; } [MethodImpl(MethodImplOptions.AggressiveInlining)] private ThreadCache* GetCurrentCache() { - if (t_ownerId == _instanceId) + if (t_ownerId == _instanceId && t_localCache != null) { - var index = t_cacheIndex; - if (index <= _cacheCount) - { - var cache = _caches[index]; - if (cache != null) - { - return cache; - } - } + return t_localCache; } return RegisterThreadCache(); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private readonly void* TryPopFromBucket(ThreadCache* cache, ushort cacheIndex, byte bucketIndex) + private readonly void* TryPopFromBucket(ThreadCache* cache, byte bucketIndex) { var buckets = GetBuckets(cache); var bucket = &buckets[bucketIndex]; @@ -350,7 +367,7 @@ public unsafe struct FreeList : IMemoryAllocatorfreeHead = (nint)head->next; bucket->freeCount--; - AssignBlockHeader((BlockHeader*)head, head->ownerChunk, head->bucketIndex, cacheIndex); + AssignBlockHeader((BlockHeader*)head, head->ownerChunk, head->bucketIndex, cache); return head; } @@ -368,15 +385,15 @@ public unsafe struct FreeList : IMemoryAllocatorownerChunk = ownerChunk; header->bucketIndex = bucketIndex; header->magicNumber = _MAGIC_NUMBER; - header->ownerCacheIndex = ownerCacheIndex; + header->ownerCache = ownerCache; } - private bool TryCreateBlocksForBucket(ThreadCache* cache, ushort cacheIndex, byte bucketIndex) + private bool TryCreateBlocksForBucket(ThreadCache* cache, byte bucketIndex) { var buckets = GetBuckets(cache); var bucket = &buckets[bucketIndex]; @@ -509,31 +526,21 @@ public unsafe struct FreeList : IMemoryAllocatorownerCacheIndex = cacheIndex; + header->ownerCache = cache; var userPtr = (byte*)ptr + sizeof(BlockHeader); if (allocationOption.HasFlag(AllocationOption.Clear)) @@ -566,10 +573,7 @@ public unsafe struct FreeList : IMemoryAllocator [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void Free(void* ptr) + public readonly void Free(void* ptr) { if (_disposed != 0 || ptr == null) { @@ -618,7 +622,7 @@ public unsafe struct FreeList : IMemoryAllocatorownerCacheIndex; + var targetCache = header->ownerCache; var bucketIndex = header->bucketIndex; if (bucketIndex == byte.MaxValue) @@ -630,38 +634,11 @@ public unsafe struct FreeList : IMemoryAllocator= 0 && ownerCacheIndex <= _cacheCount ? _caches[ownerCacheIndex] : null; - if (targetCache == null) - { - targetCache = GetOverflowCache(); - ownerCacheIndex = _OVERFLOW_CACHE_INDEX; - sameThread = t_ownerId == _instanceId && t_cacheIndex == ownerCacheIndex; - } + var sameThread = t_ownerId == _instanceId && t_localCache == targetCache; if (sameThread) { - if (ownerCacheIndex == _OVERFLOW_CACHE_INDEX) - { - while (Interlocked.CompareExchange(ref _overflowLock, 1, 0) != 0) - { - Thread.SpinWait(1); - } - - try - { - PushToBucket(targetCache, bucketIndex, blockStartPtr, chunk); - } - finally - { - Interlocked.Exchange(ref _overflowLock, 0); - } - } - else - { - PushToBucket(targetCache, bucketIndex, blockStartPtr, chunk); - } - + PushToBucket(targetCache, bucketIndex, blockStartPtr, chunk); return; } @@ -684,27 +661,19 @@ public unsafe struct FreeList : IMemoryAllocatoractive = 0; - } - } - } - - if (_caches != null) - { - MemoryUtility.Free(_caches); - _caches = null; - } - if (_instanceId != null) { + var state = (SharedState*)_instanceId; + Volatile.Write(ref state->isDisposed, 1); + + var current = state->headCache; + while (current != null) + { + DrainRemoteFrees(current); + current->active = 0; + current = current->next; + } + MemoryUtility.Free(_instanceId); _instanceId = null; } diff --git a/Misaki.HighPerformance.LowLevel/Buffer/VirtualStack.cs b/Misaki.HighPerformance.LowLevel/Buffer/VirtualStack.cs index e854ff2..af48101 100644 --- a/Misaki.HighPerformance.LowLevel/Buffer/VirtualStack.cs +++ b/Misaki.HighPerformance.LowLevel/Buffer/VirtualStack.cs @@ -24,6 +24,7 @@ public unsafe struct VirtualStack : IMemoryAllocator _handle; + public readonly nuint OriginalOffset => _originalOffset; internal Scope(VirtualStack* allocator, AllocationHandle handle) { diff --git a/Misaki.HighPerformance.Test/UnitTest/Buffer/TestAllocationManager.cs b/Misaki.HighPerformance.Test/UnitTest/Buffer/TestAllocationManager.cs index c9e27ef..68e86b2 100644 --- a/Misaki.HighPerformance.Test/UnitTest/Buffer/TestAllocationManager.cs +++ b/Misaki.HighPerformance.Test/UnitTest/Buffer/TestAllocationManager.cs @@ -57,7 +57,7 @@ public class TestAllocationManager } [TestMethod] - public unsafe void StackAllocationTest() + public void StackAllocationTest() { var thread = new Thread(() => { @@ -66,6 +66,8 @@ public class TestAllocationManager Assert.IsTrue(ptr1.IsCreated); + Thread.Sleep(100); // Simulate some work + ptr1.Dispose(); scope.Dispose(); }); @@ -73,6 +75,8 @@ public class TestAllocationManager thread.Start(); var scope = AllocationManager.CreateStackScope(); + Assert.AreEqual(0u, scope.OriginalOffset); + var ptr2 = new MemoryBlock(1024, 8, scope.AllocationHandle); Assert.IsTrue(ptr2.IsCreated); diff --git a/Misaki.HighPerformance.Test/UnitTest/Buffer/TestFreeList.cs b/Misaki.HighPerformance.Test/UnitTest/Buffer/TestFreeList.cs index 4e642f5..ceda926 100644 --- a/Misaki.HighPerformance.Test/UnitTest/Buffer/TestFreeList.cs +++ b/Misaki.HighPerformance.Test/UnitTest/Buffer/TestFreeList.cs @@ -40,7 +40,7 @@ public unsafe class TestFreeList { const int threadCount = 8; const int iterations = 1000; - using var freeList = new FreeList(8, 64 * 1024, threadCount); + using var freeList = new FreeList(8, 64 * 1024); var threads = new Thread[threadCount]; for (var i = 0; i < threadCount; i++) @@ -68,7 +68,7 @@ public unsafe class TestFreeList const int producerCount = 4; const int consumerCount = 4; const int iterations = 5000; - using var freeList = new FreeList(8, 64 * 1024, producerCount + consumerCount); + using var freeList = new FreeList(8, 64 * 1024); var queue = new System.Collections.Concurrent.ConcurrentQueue(); var producers = new Thread[producerCount]; @@ -124,7 +124,7 @@ public unsafe class TestFreeList { // Set maxConcurrencyLevel to 1, but use more threads const int threadCount = 5; - using var freeList = new FreeList(8, 1024, 1); + using var freeList = new FreeList(8, 1024); var threads = new Thread[threadCount]; for (var i = 0; i < threadCount; i++)