using Misaki.HighPerformance.LowLevel.Utilities;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace Misaki.HighPerformance.LowLevel.Buffer;
///
/// A variable-size allocator that uses per-thread caches for the hot path and a remote-free queue for cross-thread deallocation.
///
[StructLayout(LayoutKind.Sequential)]
public unsafe struct FreeList : IMemoryAllocator
{
public struct CreationOptions
{
public nuint alignment;
public nuint chunkSize;
public int maxConcurrencyLevel;
}
public static FreeList Create(in CreationOptions opts)
{
return new FreeList(opts.alignment, opts.chunkSize, opts.maxConcurrencyLevel);
}
[StructLayout(LayoutKind.Sequential)]
private struct FreeNode
{
public FreeNode* next;
public MemoryChunk* ownerChunk;
public nuint blockSize;
public int bucketIndex;
}
[StructLayout(LayoutKind.Sequential)]
private struct MemoryChunk
{
public MemoryChunk* next;
public byte* memory;
public nuint size;
public nuint used;
}
[StructLayout(LayoutKind.Explicit, Size = 32)]
private struct SizeBucket
{
[FieldOffset(0)]
public long freeCount;
[FieldOffset(8)]
public nint freeHead;
[FieldOffset(16)]
public nuint blockSize;
[FieldOffset(24)]
public int creationLock;
}
[StructLayout(LayoutKind.Sequential)]
private struct ThreadCache
{
public fixed byte buckets[_MAX_BUCKETS * 32];
public nint remoteFreeHead;
public int threadId;
public int active;
}
[StructLayout(LayoutKind.Explicit, Size = 32)]
private struct BlockHeader
{
[FieldOffset(0)]
public MemoryChunk* ownerChunk;
[FieldOffset(8)]
public nuint blockSize;
[FieldOffset(16)]
public ulong magicNumber;
[FieldOffset(24)]
public int ownerCacheIndex;
}
private const int _MAX_BUCKETS = 16;
private const int _DEFAULT_MAX_CONCURRENCY_LEVEL = 1;
private const int _OVERFLOW_CACHE_INDEX = 0;
private const nuint _MIN_BLOCK_SIZE = 16;
private const nuint _DEFAULT_CHUNK_SIZE = 64 * 1024;
private const ulong _MAGIC_NUMBER = 0xDEADBEEFDEADBEEF;
[ThreadStatic]
private static int t_cacheIndex;
[ThreadStatic]
private static void* t_ownerId;
private void* _instanceId;
private ThreadCache** _caches;
private DynamicArena _chunkArena;
private MemoryChunk* _chunks;
private readonly nuint _chunkSize;
private readonly nuint _alignment;
private readonly int _maxConcurrencyLevel;
private int _cacheCount;
private volatile int _disposed;
private volatile int _chunkCreationLock;
private volatile int _cacheRegistrationLock;
private volatile int _overflowLock;
///
/// Gets the alignment requirement for allocations.
///
public readonly nuint Alignment => _alignment;
///
/// Gets the chunk size used by this allocator.
///
public readonly nuint ChunkSize => _chunkSize;
///
/// Gets the maximum number of dedicated thread caches.
///
public readonly int MaxConcurrencyLevel => _maxConcurrencyLevel;
///
/// Initializes a new variable-size FreeList allocator with the specified parameters.
///
/// Alignment requirement for blocks (must be power of 2).
/// Size of memory chunks to allocate (default: 64KB).
/// Maximum number of dedicated thread caches.
public FreeList(nuint alignment, nuint chunkSize = _DEFAULT_CHUNK_SIZE, int maxConcurrencyLevel = _DEFAULT_MAX_CONCURRENCY_LEVEL)
{
if (alignment == 0 || (alignment & (alignment - 1)) != 0)
{
throw new ArgumentException("Alignment must be a power of 2", nameof(alignment));
}
if (chunkSize < 1024)
{
throw new ArgumentException("Chunk size must be at least 1KB", nameof(chunkSize));
}
if (maxConcurrencyLevel < 1)
{
throw new ArgumentOutOfRangeException(nameof(maxConcurrencyLevel), "Max concurrency level must be greater than zero.");
}
_alignment = alignment;
_chunkSize = chunkSize;
_maxConcurrencyLevel = maxConcurrencyLevel;
try
{
_instanceId = Malloc((nuint)sizeof(nint));
_chunks = null;
_cacheCount = 0;
_disposed = 0;
_chunkCreationLock = 0;
_cacheRegistrationLock = 0;
_overflowLock = 0;
_chunkArena = new DynamicArena(1024);
_caches = (ThreadCache**)Malloc((nuint)sizeof(ThreadCache*) * (nuint)(maxConcurrencyLevel + 1));
for (var i = 0; i <= maxConcurrencyLevel; i++)
{
_caches[i] = null;
}
var overflowCache = CreateCacheForThread(0);
if (overflowCache == null)
{
throw new OutOfMemoryException("Failed to initialize free list overflow cache.");
}
_caches[_OVERFLOW_CACHE_INDEX] = overflowCache;
}
catch
{
if (_instanceId != null)
{
Free(_instanceId);
_instanceId = null;
}
if (_caches != null)
{
Free(_caches);
_caches = null;
}
_chunkArena.Dispose();
throw;
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static SizeBucket* GetBuckets(ThreadCache* cache)
{
return (SizeBucket*)cache->buckets;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void InitializeBuckets(ThreadCache* cache)
{
var buckets = GetBuckets(cache);
var size = _MIN_BLOCK_SIZE;
for (var i = 0; i < _MAX_BUCKETS; i++)
{
buckets[i].blockSize = size;
buckets[i].freeHead = 0;
buckets[i].freeCount = 0;
buckets[i].creationLock = 0;
size *= 2;
}
cache->remoteFreeHead = 0;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int FindBucket(nuint size)
{
var blockSize = _MIN_BLOCK_SIZE;
for (var i = 0; i < _MAX_BUCKETS; i++)
{
if (size <= blockSize)
{
return i;
}
blockSize <<= 1;
}
return -1;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private ThreadCache* CreateCacheForThread(int threadId)
{
var cache = (ThreadCache*)_chunkArena.Allocate(SizeOf(), AlignOf(), AllocationOption.Clear);
if (cache == null)
{
return null;
}
InitializeBuckets(cache);
cache->threadId = threadId;
cache->active = 1;
return cache;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private readonly void DrainRemoteFrees(ThreadCache* cache)
{
var head = (FreeNode*)Interlocked.Exchange(ref cache->remoteFreeHead, 0);
while (head != null)
{
var next = head->next;
PushToBucket(cache, head->bucketIndex, head, head->ownerChunk, head->blockSize);
head = next;
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private readonly ThreadCache* GetOverflowCache()
{
return _caches[_OVERFLOW_CACHE_INDEX];
}
private ThreadCache* RegisterThreadCache()
{
while (Interlocked.CompareExchange(ref _cacheRegistrationLock, 1, 0) != 0)
{
Thread.SpinWait(1);
}
try
{
if (t_ownerId == _instanceId && t_cacheIndex > 0 && t_cacheIndex <= _cacheCount)
{
return _caches[t_cacheIndex];
}
if (_cacheCount >= _maxConcurrencyLevel)
{
t_ownerId = _instanceId;
t_cacheIndex = _OVERFLOW_CACHE_INDEX;
return GetOverflowCache();
}
var threadId = Environment.CurrentManagedThreadId;
var cache = CreateCacheForThread(threadId);
if (cache == null)
{
t_ownerId = _instanceId;
t_cacheIndex = _OVERFLOW_CACHE_INDEX;
return GetOverflowCache();
}
_cacheCount++;
_caches[_cacheCount] = cache;
t_ownerId = _instanceId;
t_cacheIndex = _cacheCount;
return cache;
}
finally
{
Interlocked.Exchange(ref _cacheRegistrationLock, 0);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private ThreadCache* GetCurrentCache()
{
if (t_ownerId == _instanceId)
{
var index = t_cacheIndex;
if ((uint)index <= (uint)_cacheCount)
{
var cache = _caches[index];
if (cache != null)
{
return cache;
}
}
}
return RegisterThreadCache();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private readonly void* TryPopFromBucket(ThreadCache* cache, int cacheIndex, int bucketIndex)
{
var buckets = GetBuckets(cache);
var bucket = &buckets[bucketIndex];
var head = (FreeNode*)bucket->freeHead;
if (head == null)
{
return null;
}
bucket->freeHead = (nint)head->next;
bucket->freeCount--;
AssignBlockHeader((BlockHeader*)head, head->ownerChunk, head->blockSize, cacheIndex);
return head;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private readonly void PushToBucket(ThreadCache* cache, int bucketIndex, void* ptr, MemoryChunk* ownerChunk, nuint blockSize)
{
var buckets = GetBuckets(cache);
var bucket = &buckets[bucketIndex];
var node = (FreeNode*)ptr;
node->ownerChunk = ownerChunk;
node->blockSize = blockSize;
node->bucketIndex = bucketIndex;
node->next = (FreeNode*)bucket->freeHead;
bucket->freeHead = (nint)node;
bucket->freeCount++;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void AssignBlockHeader(BlockHeader* header, MemoryChunk* ownerChunk, nuint blockSize, int ownerCacheIndex)
{
header->ownerChunk = ownerChunk;
header->blockSize = blockSize;
header->magicNumber = _MAGIC_NUMBER;
header->ownerCacheIndex = ownerCacheIndex;
}
private bool TryCreateBlocksForBucket(ThreadCache* cache, int cacheIndex, int bucketIndex)
{
var buckets = GetBuckets(cache);
var bucket = &buckets[bucketIndex];
while (Interlocked.CompareExchange(ref bucket->creationLock, 1, 0) != 0)
{
Thread.SpinWait(1);
}
try
{
DrainRemoteFrees(cache);
if (bucket->freeHead != 0)
{
return true;
}
var blockSize = bucket->blockSize;
var blocksToCreate = Math.Max(1u, _chunkSize / blockSize);
blocksToCreate = Math.Min(blocksToCreate, 256);
if (blocksToCreate == 0)
{
return false;
}
var totalSize = blocksToCreate * blockSize;
var memory = (byte*)AlignedAlloc(totalSize, _alignment);
if (memory == null)
{
return false;
}
var chunk = (MemoryChunk*)_chunkArena.Allocate(SizeOf(), AlignOf(), AllocationOption.None);
if (chunk == null)
{
AlignedFree(memory);
return false;
}
while (Interlocked.CompareExchange(ref _chunkCreationLock, 1, 0) != 0)
{
Thread.SpinWait(1);
}
try
{
chunk->memory = memory;
chunk->size = totalSize;
chunk->used = totalSize;
chunk->next = _chunks;
_chunks = chunk;
}
finally
{
Interlocked.Exchange(ref _chunkCreationLock, 0);
}
for (nuint i = 0; i < blocksToCreate; i++)
{
var blockStartPtr = memory + (i * blockSize);
PushToBucket(cache, bucketIndex, blockStartPtr, chunk, blockSize);
}
return true;
}
finally
{
Interlocked.Exchange(ref bucket->creationLock, 0);
}
}
private void* AllocateFromChunk(int cacheIndex, nuint size, nuint alignment)
{
while (Interlocked.CompareExchange(ref _chunkCreationLock, 1, 0) != 0)
{
Thread.SpinWait(1);
}
try
{
var chunk = _chunks;
while (chunk != null)
{
var alignedOffset = (chunk->used + alignment - 1) & ~(alignment - 1);
var totalNeeded = alignedOffset - chunk->used + size;
var available = chunk->size - chunk->used;
if (totalNeeded <= available)
{
var blockStartPtr = chunk->memory + alignedOffset;
chunk->used = alignedOffset + size;
AssignBlockHeader((BlockHeader*)blockStartPtr, chunk, size, cacheIndex);
return blockStartPtr;
}
chunk = chunk->next;
}
var newChunkSize = Math.Max(_chunkSize, size + alignment);
var newMemory = (byte*)AlignedAlloc(newChunkSize, alignment);
if (newMemory == null)
{
return null;
}
var newChunk = (MemoryChunk*)_chunkArena.Allocate(SizeOf(), AlignOf(), AllocationOption.None);
if (newChunk == null)
{
AlignedFree(newMemory);
return null;
}
newChunk->memory = newMemory;
newChunk->size = newChunkSize;
newChunk->used = size;
newChunk->next = _chunks;
_chunks = newChunk;
AssignBlockHeader((BlockHeader*)newMemory, newChunk, size, cacheIndex);
return newMemory;
}
finally
{
Interlocked.Exchange(ref _chunkCreationLock, 0);
}
}
///
/// Allocates a memory block of the specified size.
///
///
/// This is thread safe.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void* Allocate(nuint size, nuint alignment, AllocationOption allocationOption = AllocationOption.None)
{
if (_disposed != 0)
{
return null;
}
if (size == 0)
{
return null;
}
if (alignment == 0)
{
alignment = _alignment;
}
if ((alignment & (alignment - 1)) != 0)
{
throw new ArgumentException("Alignment must be a power of two.", nameof(alignment));
}
var alignedSize = (size + alignment - 1) & ~(alignment - 1);
alignedSize = Math.Max(alignedSize, _MIN_BLOCK_SIZE);
var totalSize = alignedSize + (nuint)sizeof(BlockHeader);
var bucketIndex = FindBucket(totalSize);
var cache = GetCurrentCache();
var cacheIndex = t_cacheIndex;
var requiresOverflowLock = cacheIndex == _OVERFLOW_CACHE_INDEX;
if (requiresOverflowLock)
{
while (Interlocked.CompareExchange(ref _overflowLock, 1, 0) != 0)
{
Thread.SpinWait(1);
}
}
try
{
DrainRemoteFrees(cache);
void* ptr = null;
if (bucketIndex >= 0)
{
ptr = TryPopFromBucket(cache, cacheIndex, bucketIndex);
if (ptr == null && TryCreateBlocksForBucket(cache, cacheIndex, bucketIndex))
{
ptr = TryPopFromBucket(cache, cacheIndex, bucketIndex);
}
}
else
{
// Oversized block: Bypass chunk linking entirely and go straight to the OS
ptr = AlignedAlloc(totalSize, alignment);
if (ptr != null)
{
// Pass null for ownerChunk so 'Free' knows this is a standalone allocation
AssignBlockHeader((BlockHeader*)ptr, null, totalSize, cacheIndex);
}
}
if (ptr == null)
{
return null;
}
var header = (BlockHeader*)ptr;
header->ownerCacheIndex = cacheIndex;
var userPtr = (byte*)ptr + sizeof(BlockHeader);
if (allocationOption.HasFlag(AllocationOption.Clear))
{
MemClear(userPtr, alignedSize);
}
return userPtr;
}
finally
{
if (requiresOverflowLock)
{
Interlocked.Exchange(ref _overflowLock, 0);
}
}
}
public void* Reallocate(void* ptr, nuint oldSize, nuint newSize, nuint alignment, AllocationOption allocationOption = AllocationOption.None)
{
if (_disposed != 0)
{
return null;
}
var newPtr = Allocate(newSize, alignment, allocationOption);
if (newPtr != null && ptr != null)
{
var copySize = Math.Min(oldSize, newSize);
MemCpy(newPtr, ptr, copySize);
Free(ptr);
}
return newPtr;
}
///
/// Frees a previously allocated memory block.
///
///
/// This is thread safe.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Free(void* ptr)
{
if (_disposed != 0 || ptr == null)
{
return;
}
var blockStartPtr = (byte*)ptr - sizeof(BlockHeader);
var header = (BlockHeader*)blockStartPtr;
if (header->magicNumber != _MAGIC_NUMBER)
{
return;
}
var chunk = header->ownerChunk;
if (chunk == null)
{
return;
}
var blockSize = header->blockSize;
var ownerCacheIndex = header->ownerCacheIndex;
var bucketIndex = FindBucket(blockSize);
if (bucketIndex < 0)
{
// This is an oversized allocation. It doesn't belong to a bucket or a chunk.
// Erase the magic number for safety and instantly yield it back to the OS.
header->magicNumber = 0;
AlignedFree(blockStartPtr);
return;
}
var sameThread = t_ownerId == _instanceId && t_cacheIndex == ownerCacheIndex;
var targetCache = ownerCacheIndex >= 0 && ownerCacheIndex <= _cacheCount ? _caches[ownerCacheIndex] : null;
if (targetCache == null)
{
targetCache = GetOverflowCache();
ownerCacheIndex = _OVERFLOW_CACHE_INDEX;
sameThread = t_ownerId == _instanceId && t_cacheIndex == ownerCacheIndex;
}
if (sameThread)
{
if (ownerCacheIndex == _OVERFLOW_CACHE_INDEX)
{
while (Interlocked.CompareExchange(ref _overflowLock, 1, 0) != 0)
{
Thread.SpinWait(1);
}
try
{
PushToBucket(targetCache, bucketIndex, blockStartPtr, chunk, blockSize);
}
finally
{
Interlocked.Exchange(ref _overflowLock, 0);
}
}
else
{
PushToBucket(targetCache, bucketIndex, blockStartPtr, chunk, blockSize);
}
return;
}
var remoteNode = (FreeNode*)blockStartPtr;
remoteNode->ownerChunk = chunk;
remoteNode->blockSize = blockSize;
remoteNode->bucketIndex = bucketIndex;
nint head;
do
{
head = targetCache->remoteFreeHead;
remoteNode->next = (FreeNode*)head;
} while (Interlocked.CompareExchange(ref targetCache->remoteFreeHead, (nint)remoteNode, head) != head);
}
public void Dispose()
{
if (Interlocked.CompareExchange(ref _disposed, 1, 0) != 0)
{
return;
}
if (_caches != null)
{
for (var i = 0; i <= _cacheCount; i++)
{
var cache = _caches[i];
if (cache != null)
{
DrainRemoteFrees(cache);
cache->active = 0;
}
}
}
var chunk = _chunks;
while (chunk != null)
{
var next = chunk->next;
AlignedFree(chunk->memory);
chunk = next;
}
_chunkArena.Dispose();
if (_caches != null)
{
MemoryUtility.Free(_caches);
_caches = null;
}
if (_instanceId != null)
{
MemoryUtility.Free(_instanceId);
_instanceId = null;
}
_chunks = null;
_cacheCount = 0;
}
}