Improve FreeList alignment, error handling, and GGX SPMD
- Increased BlockHeader size, added blockStart, and improved alignment logic in FreeList allocator. - Changed _MIN_BLOCK_SIZE to 32 and consolidated to a single implementation. - Updated allocation and free logic for correct pointer alignment and header management. - MemoryUtility now throws OutOfMemoryException on allocation failure. - Optimized GGXMipGenerationBenchmark SPMD output with MaskScatter and minor math/cleanup improvements. - Cleaned up Program.cs and enabled global/test initialization. - Bumped assembly version to 1.6.19.
This commit is contained in:
@@ -1,4 +1,3 @@
|
|||||||
#if true
|
|
||||||
using Misaki.HighPerformance.LowLevel.Utilities;
|
using Misaki.HighPerformance.LowLevel.Utilities;
|
||||||
using System.Runtime.CompilerServices;
|
using System.Runtime.CompilerServices;
|
||||||
using System.Runtime.InteropServices;
|
using System.Runtime.InteropServices;
|
||||||
@@ -73,7 +72,7 @@ public unsafe struct FreeList : IMemoryAllocator<FreeList, FreeList.CreationOpti
|
|||||||
public ThreadCache* inactiveNext;
|
public ThreadCache* inactiveNext;
|
||||||
}
|
}
|
||||||
|
|
||||||
[StructLayout(LayoutKind.Explicit, Size = 24)]
|
[StructLayout(LayoutKind.Explicit, Size = 32)]
|
||||||
private struct BlockHeader
|
private struct BlockHeader
|
||||||
{
|
{
|
||||||
[FieldOffset(0)]
|
[FieldOffset(0)]
|
||||||
@@ -81,8 +80,10 @@ public unsafe struct FreeList : IMemoryAllocator<FreeList, FreeList.CreationOpti
|
|||||||
[FieldOffset(8)]
|
[FieldOffset(8)]
|
||||||
public ThreadCache* ownerCache;
|
public ThreadCache* ownerCache;
|
||||||
[FieldOffset(16)]
|
[FieldOffset(16)]
|
||||||
|
public void* blockStart;
|
||||||
|
[FieldOffset(24)]
|
||||||
public uint magicNumber;
|
public uint magicNumber;
|
||||||
[FieldOffset(20)]
|
[FieldOffset(28)]
|
||||||
public byte bucketIndex;
|
public byte bucketIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -124,7 +125,7 @@ public unsafe struct FreeList : IMemoryAllocator<FreeList, FreeList.CreationOpti
|
|||||||
private const byte _MAX_BUCKETS = 16;
|
private const byte _MAX_BUCKETS = 16;
|
||||||
private const int _DEFAULT_MAX_CONCURRENCY_LEVEL = 1;
|
private const int _DEFAULT_MAX_CONCURRENCY_LEVEL = 1;
|
||||||
private const int _OVERFLOW_CACHE_INDEX = 0;
|
private const int _OVERFLOW_CACHE_INDEX = 0;
|
||||||
private const nuint _MIN_BLOCK_SIZE = 16;
|
private const nuint _MIN_BLOCK_SIZE = 32;
|
||||||
private const nuint _DEFAULT_CHUNK_SIZE = 64 * 1024;
|
private const nuint _DEFAULT_CHUNK_SIZE = 64 * 1024;
|
||||||
private const uint _MAGIC_NUMBER = 0xDEADBEEF;
|
private const uint _MAGIC_NUMBER = 0xDEADBEEF;
|
||||||
|
|
||||||
@@ -357,7 +358,7 @@ public unsafe struct FreeList : IMemoryAllocator<FreeList, FreeList.CreationOpti
|
|||||||
}
|
}
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
private readonly void* TryPopFromBucket(ThreadCache* cache, byte bucketIndex)
|
private readonly void* TryPopFromBucket(ThreadCache* cache, byte bucketIndex, nuint alignment)
|
||||||
{
|
{
|
||||||
var buckets = GetBuckets(cache);
|
var buckets = GetBuckets(cache);
|
||||||
var bucket = &buckets[bucketIndex];
|
var bucket = &buckets[bucketIndex];
|
||||||
@@ -370,8 +371,12 @@ public unsafe struct FreeList : IMemoryAllocator<FreeList, FreeList.CreationOpti
|
|||||||
bucket->freeHead = (nint)head->next;
|
bucket->freeHead = (nint)head->next;
|
||||||
bucket->freeCount--;
|
bucket->freeCount--;
|
||||||
|
|
||||||
AssignBlockHeader((BlockHeader*)head, head->ownerChunk, head->bucketIndex, cache);
|
var blockSize = bucket->blockSize;
|
||||||
return head;
|
var userPtr = (byte*)(((nuint)head + (nuint)sizeof(BlockHeader) + alignment - 1) & ~(alignment - 1));
|
||||||
|
var header = (BlockHeader*)userPtr - 1;
|
||||||
|
|
||||||
|
AssignBlockHeader(header, head, head->ownerChunk, bucketIndex, cache);
|
||||||
|
return userPtr;
|
||||||
}
|
}
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
@@ -388,12 +393,13 @@ public unsafe struct FreeList : IMemoryAllocator<FreeList, FreeList.CreationOpti
|
|||||||
}
|
}
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
private static void AssignBlockHeader(BlockHeader* header, MemoryChunk* ownerChunk, byte bucketIndex, ThreadCache* ownerCache)
|
private static void AssignBlockHeader(BlockHeader* header, void* blockStart, MemoryChunk* ownerChunk, byte bucketIndex, ThreadCache* ownerCache)
|
||||||
{
|
{
|
||||||
header->ownerChunk = ownerChunk;
|
header->ownerChunk = ownerChunk;
|
||||||
header->bucketIndex = bucketIndex;
|
header->bucketIndex = bucketIndex;
|
||||||
header->magicNumber = _MAGIC_NUMBER;
|
header->magicNumber = _MAGIC_NUMBER;
|
||||||
header->ownerCache = ownerCache;
|
header->ownerCache = ownerCache;
|
||||||
|
header->blockStart = blockStart;
|
||||||
}
|
}
|
||||||
|
|
||||||
private bool TryCreateBlocksForBucket(ThreadCache* cache, byte bucketIndex)
|
private bool TryCreateBlocksForBucket(ThreadCache* cache, byte bucketIndex)
|
||||||
@@ -425,7 +431,7 @@ public unsafe struct FreeList : IMemoryAllocator<FreeList, FreeList.CreationOpti
|
|||||||
}
|
}
|
||||||
|
|
||||||
var totalSize = blocksToCreate * blockSize;
|
var totalSize = blocksToCreate * blockSize;
|
||||||
var memory = AllocateFromChunk(totalSize, _alignment, out var chunk);
|
var memory = AllocateFromChunk(totalSize, blockSize, out var chunk);
|
||||||
if (memory == null)
|
if (memory == null)
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
@@ -525,49 +531,45 @@ public unsafe struct FreeList : IMemoryAllocator<FreeList, FreeList.CreationOpti
|
|||||||
}
|
}
|
||||||
|
|
||||||
var alignedSize = (size + alignment - 1) & ~(alignment - 1);
|
var alignedSize = (size + alignment - 1) & ~(alignment - 1);
|
||||||
alignedSize = Math.Max(alignedSize, _MIN_BLOCK_SIZE);
|
var totalSize = alignedSize + (nuint)sizeof(BlockHeader) + alignment;
|
||||||
|
|
||||||
var totalSize = alignedSize + (nuint)sizeof(BlockHeader);
|
|
||||||
var bucketIndex = FindBucket(totalSize);
|
var bucketIndex = FindBucket(totalSize);
|
||||||
var cache = GetCurrentCache();
|
var cache = GetCurrentCache();
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
void* ptr = null;
|
void* userPtr = null;
|
||||||
if (bucketIndex != byte.MaxValue)
|
if (bucketIndex != byte.MaxValue)
|
||||||
{
|
{
|
||||||
ptr = TryPopFromBucket(cache, bucketIndex);
|
userPtr = TryPopFromBucket(cache, bucketIndex, alignment);
|
||||||
if (ptr == null)
|
if (userPtr == null)
|
||||||
{
|
{
|
||||||
DrainRemoteFrees(cache);
|
DrainRemoteFrees(cache);
|
||||||
|
|
||||||
ptr = TryPopFromBucket(cache, bucketIndex);
|
userPtr = TryPopFromBucket(cache, bucketIndex, alignment);
|
||||||
if (ptr == null && TryCreateBlocksForBucket(cache, bucketIndex))
|
if (userPtr == null && TryCreateBlocksForBucket(cache, bucketIndex))
|
||||||
{
|
{
|
||||||
ptr = TryPopFromBucket(cache, bucketIndex);
|
userPtr = TryPopFromBucket(cache, bucketIndex, alignment);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Oversized block: Bypass chunk linking entirely and go straight to the OS
|
// Oversized block: Bypass chunk linking entirely and go straight to the OS
|
||||||
ptr = AlignedAlloc(totalSize, alignment);
|
void* ptr = AlignedAlloc(totalSize, alignment);
|
||||||
if (ptr != null)
|
if (ptr != null)
|
||||||
{
|
{
|
||||||
|
userPtr = (byte*)(((nuint)ptr + (nuint)sizeof(BlockHeader) + alignment - 1) & ~(alignment - 1));
|
||||||
|
var header = (BlockHeader*)userPtr - 1;
|
||||||
// Pass null for ownerChunk so 'Free' knows this is a standalone allocation
|
// Pass null for ownerChunk so 'Free' knows this is a standalone allocation
|
||||||
AssignBlockHeader((BlockHeader*)ptr, null, bucketIndex, cache);
|
AssignBlockHeader(header, ptr, null, bucketIndex, cache);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ptr == null)
|
if (userPtr == null)
|
||||||
{
|
{
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
var header = (BlockHeader*)ptr;
|
|
||||||
header->ownerCache = cache;
|
|
||||||
|
|
||||||
var userPtr = (byte*)ptr + sizeof(BlockHeader);
|
|
||||||
if (allocationOption.HasFlag(AllocationOption.Clear))
|
if (allocationOption.HasFlag(AllocationOption.Clear))
|
||||||
{
|
{
|
||||||
MemClear(userPtr, alignedSize);
|
MemClear(userPtr, alignedSize);
|
||||||
@@ -612,13 +614,13 @@ public unsafe struct FreeList : IMemoryAllocator<FreeList, FreeList.CreationOpti
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
var blockStartPtr = (byte*)ptr - sizeof(BlockHeader);
|
var header = (BlockHeader*)ptr - 1;
|
||||||
var header = (BlockHeader*)blockStartPtr;
|
|
||||||
if (header->magicNumber != _MAGIC_NUMBER)
|
if (header->magicNumber != _MAGIC_NUMBER)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var blockStartPtr = header->blockStart;
|
||||||
var chunk = header->ownerChunk;
|
var chunk = header->ownerChunk;
|
||||||
if (chunk == null)
|
if (chunk == null)
|
||||||
{
|
{
|
||||||
@@ -695,750 +697,3 @@ public unsafe struct FreeList : IMemoryAllocator<FreeList, FreeList.CreationOpti
|
|||||||
arena.Dispose();
|
arena.Dispose();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
using Misaki.HighPerformance.LowLevel.Utilities;
|
|
||||||
using System.Runtime.CompilerServices;
|
|
||||||
using System.Runtime.InteropServices;
|
|
||||||
|
|
||||||
namespace Misaki.HighPerformance.LowLevel.Buffer;
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// A variable-size allocator that uses per-thread caches for the hot path and a remote-free queue for cross-thread deallocation.
|
|
||||||
/// </summary>
|
|
||||||
[StructLayout(LayoutKind.Sequential)]
|
|
||||||
public unsafe struct FreeList : IMemoryAllocator<FreeList, FreeList.CreationOptions>
|
|
||||||
{
|
|
||||||
public struct CreationOptions
|
|
||||||
{
|
|
||||||
public nuint alignment;
|
|
||||||
public nuint chunkSize;
|
|
||||||
public int maxConcurrencyLevel;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static FreeList Create(in CreationOptions opts)
|
|
||||||
{
|
|
||||||
return new FreeList(opts.alignment, opts.chunkSize, opts.maxConcurrencyLevel);
|
|
||||||
}
|
|
||||||
|
|
||||||
[StructLayout(LayoutKind.Sequential)]
|
|
||||||
private struct FreeNode
|
|
||||||
{
|
|
||||||
public FreeNode* next;
|
|
||||||
public MemoryChunk* ownerChunk;
|
|
||||||
public nuint blockSize;
|
|
||||||
public int bucketIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
[StructLayout(LayoutKind.Sequential)]
|
|
||||||
private struct MemoryChunk
|
|
||||||
{
|
|
||||||
public MemoryChunk* next;
|
|
||||||
public byte* memory;
|
|
||||||
public nuint size;
|
|
||||||
public nuint used;
|
|
||||||
}
|
|
||||||
|
|
||||||
[StructLayout(LayoutKind.Explicit, Size = 32)]
|
|
||||||
private struct SizeBucket
|
|
||||||
{
|
|
||||||
[FieldOffset(0)]
|
|
||||||
public long freeCount;
|
|
||||||
[FieldOffset(8)]
|
|
||||||
public nint freeHead;
|
|
||||||
[FieldOffset(16)]
|
|
||||||
public nuint blockSize;
|
|
||||||
[FieldOffset(24)]
|
|
||||||
public int creationLock;
|
|
||||||
}
|
|
||||||
|
|
||||||
[StructLayout(LayoutKind.Sequential)]
|
|
||||||
private struct ThreadCache
|
|
||||||
{
|
|
||||||
public fixed byte buckets[_MAX_BUCKETS * 32];
|
|
||||||
public nint remoteFreeHead;
|
|
||||||
public int threadId;
|
|
||||||
public int active;
|
|
||||||
}
|
|
||||||
|
|
||||||
[StructLayout(LayoutKind.Explicit, Size = 32)]
|
|
||||||
private struct BlockHeader
|
|
||||||
{
|
|
||||||
[FieldOffset(0)]
|
|
||||||
public MemoryChunk* ownerChunk;
|
|
||||||
[FieldOffset(8)]
|
|
||||||
public nuint blockSize;
|
|
||||||
[FieldOffset(16)]
|
|
||||||
public ulong magicNumber;
|
|
||||||
[FieldOffset(24)]
|
|
||||||
public int ownerCacheIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
private const int _MAX_BUCKETS = 16;
|
|
||||||
private const int _DEFAULT_MAX_CONCURRENCY_LEVEL = 1;
|
|
||||||
private const int _OVERFLOW_CACHE_INDEX = 0;
|
|
||||||
private const nuint _MIN_BLOCK_SIZE = 16;
|
|
||||||
private const nuint _DEFAULT_CHUNK_SIZE = 64 * 1024;
|
|
||||||
private const ulong _MAGIC_NUMBER = 0xDEADBEEFDEADBEEF;
|
|
||||||
|
|
||||||
[ThreadStatic]
|
|
||||||
private static int t_cacheIndex;
|
|
||||||
|
|
||||||
[ThreadStatic]
|
|
||||||
private static void* t_ownerId;
|
|
||||||
|
|
||||||
private void* _instanceId;
|
|
||||||
private ThreadCache** _caches;
|
|
||||||
private DynamicArena _chunkArena;
|
|
||||||
private MemoryChunk* _chunks;
|
|
||||||
private readonly nuint _chunkSize;
|
|
||||||
private readonly nuint _alignment;
|
|
||||||
private readonly int _maxConcurrencyLevel;
|
|
||||||
private int _cacheCount;
|
|
||||||
private volatile int _disposed;
|
|
||||||
private volatile int _chunkCreationLock;
|
|
||||||
private volatile int _cacheRegistrationLock;
|
|
||||||
private volatile int _overflowLock;
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Gets the alignment requirement for allocations.
|
|
||||||
/// </summary>
|
|
||||||
public readonly nuint Alignment => _alignment;
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Gets the chunk size used by this allocator.
|
|
||||||
/// </summary>
|
|
||||||
public readonly nuint ChunkSize => _chunkSize;
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Gets the maximum number of dedicated thread caches.
|
|
||||||
/// </summary>
|
|
||||||
public readonly int MaxConcurrencyLevel => _maxConcurrencyLevel;
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Initializes a new variable-size FreeList allocator with the specified parameters.
|
|
||||||
/// </summary>
|
|
||||||
/// <param name="alignment">Alignment requirement for blocks (must be power of 2).</param>
|
|
||||||
/// <param name="chunkSize">Size of memory chunks to allocate (default: 64KB).</param>
|
|
||||||
/// <param name="maxConcurrencyLevel">Maximum number of dedicated thread caches.</param>
|
|
||||||
public FreeList(nuint alignment, nuint chunkSize = _DEFAULT_CHUNK_SIZE, int maxConcurrencyLevel = _DEFAULT_MAX_CONCURRENCY_LEVEL)
|
|
||||||
{
|
|
||||||
if (alignment == 0 || (alignment & (alignment - 1)) != 0)
|
|
||||||
{
|
|
||||||
throw new ArgumentException("Alignment must be a power of 2", nameof(alignment));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (chunkSize < 1024)
|
|
||||||
{
|
|
||||||
throw new ArgumentException("Chunk size must be at least 1KB", nameof(chunkSize));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (maxConcurrencyLevel < 1)
|
|
||||||
{
|
|
||||||
throw new ArgumentOutOfRangeException(nameof(maxConcurrencyLevel), "Max concurrency level must be greater than zero.");
|
|
||||||
}
|
|
||||||
|
|
||||||
_alignment = alignment;
|
|
||||||
_chunkSize = chunkSize;
|
|
||||||
_maxConcurrencyLevel = maxConcurrencyLevel;
|
|
||||||
|
|
||||||
try
|
|
||||||
{
|
|
||||||
_instanceId = Malloc((nuint)sizeof(nint));
|
|
||||||
|
|
||||||
_chunks = null;
|
|
||||||
_cacheCount = 0;
|
|
||||||
_disposed = 0;
|
|
||||||
_chunkCreationLock = 0;
|
|
||||||
_cacheRegistrationLock = 0;
|
|
||||||
_overflowLock = 0;
|
|
||||||
_chunkArena = new DynamicArena(1024);
|
|
||||||
_caches = (ThreadCache**)Malloc((nuint)sizeof(ThreadCache*) * (nuint)(maxConcurrencyLevel + 1));
|
|
||||||
|
|
||||||
for (var i = 0; i <= maxConcurrencyLevel; i++)
|
|
||||||
{
|
|
||||||
_caches[i] = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
var overflowCache = CreateCacheForThread(0);
|
|
||||||
if (overflowCache == null)
|
|
||||||
{
|
|
||||||
throw new OutOfMemoryException("Failed to initialize free list overflow cache.");
|
|
||||||
}
|
|
||||||
|
|
||||||
_caches[_OVERFLOW_CACHE_INDEX] = overflowCache;
|
|
||||||
|
|
||||||
}
|
|
||||||
catch
|
|
||||||
{
|
|
||||||
if (_instanceId != null)
|
|
||||||
{
|
|
||||||
Free(_instanceId);
|
|
||||||
_instanceId = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (_caches != null)
|
|
||||||
{
|
|
||||||
Free(_caches);
|
|
||||||
_caches = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
_chunkArena.Dispose();
|
|
||||||
|
|
||||||
throw;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
||||||
private static SizeBucket* GetBuckets(ThreadCache* cache)
|
|
||||||
{
|
|
||||||
return (SizeBucket*)cache->buckets;
|
|
||||||
}
|
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
||||||
private static void InitializeBuckets(ThreadCache* cache)
|
|
||||||
{
|
|
||||||
var buckets = GetBuckets(cache);
|
|
||||||
var size = _MIN_BLOCK_SIZE;
|
|
||||||
|
|
||||||
for (var i = 0; i < _MAX_BUCKETS; i++)
|
|
||||||
{
|
|
||||||
buckets[i].blockSize = size;
|
|
||||||
buckets[i].freeHead = 0;
|
|
||||||
buckets[i].freeCount = 0;
|
|
||||||
buckets[i].creationLock = 0;
|
|
||||||
size *= 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
cache->remoteFreeHead = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
||||||
private static int FindBucket(nuint size)
|
|
||||||
{
|
|
||||||
var blockSize = _MIN_BLOCK_SIZE;
|
|
||||||
for (var i = 0; i < _MAX_BUCKETS; i++)
|
|
||||||
{
|
|
||||||
if (size <= blockSize)
|
|
||||||
{
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
|
|
||||||
blockSize <<= 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
||||||
private ThreadCache* CreateCacheForThread(int threadId)
|
|
||||||
{
|
|
||||||
var cache = (ThreadCache*)_chunkArena.Allocate(SizeOf<ThreadCache>(), AlignOf<ThreadCache>(), AllocationOption.Clear);
|
|
||||||
if (cache == null)
|
|
||||||
{
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
InitializeBuckets(cache);
|
|
||||||
cache->threadId = threadId;
|
|
||||||
cache->active = 1;
|
|
||||||
return cache;
|
|
||||||
}
|
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
||||||
private readonly void DrainRemoteFrees(ThreadCache* cache)
|
|
||||||
{
|
|
||||||
var head = (FreeNode*)Interlocked.Exchange(ref cache->remoteFreeHead, 0);
|
|
||||||
while (head != null)
|
|
||||||
{
|
|
||||||
var next = head->next;
|
|
||||||
PushToBucket(cache, head->bucketIndex, head, head->ownerChunk, head->blockSize);
|
|
||||||
head = next;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
||||||
private readonly ThreadCache* GetOverflowCache()
|
|
||||||
{
|
|
||||||
return _caches[_OVERFLOW_CACHE_INDEX];
|
|
||||||
}
|
|
||||||
|
|
||||||
private ThreadCache* RegisterThreadCache()
|
|
||||||
{
|
|
||||||
while (Interlocked.CompareExchange(ref _cacheRegistrationLock, 1, 0) != 0)
|
|
||||||
{
|
|
||||||
Thread.SpinWait(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
try
|
|
||||||
{
|
|
||||||
if (t_ownerId == _instanceId && t_cacheIndex > 0 && t_cacheIndex <= _cacheCount)
|
|
||||||
{
|
|
||||||
return _caches[t_cacheIndex];
|
|
||||||
}
|
|
||||||
|
|
||||||
if (_cacheCount >= _maxConcurrencyLevel)
|
|
||||||
{
|
|
||||||
t_ownerId = _instanceId;
|
|
||||||
t_cacheIndex = _OVERFLOW_CACHE_INDEX;
|
|
||||||
return GetOverflowCache();
|
|
||||||
}
|
|
||||||
|
|
||||||
var threadId = Environment.CurrentManagedThreadId;
|
|
||||||
var cache = CreateCacheForThread(threadId);
|
|
||||||
if (cache == null)
|
|
||||||
{
|
|
||||||
t_ownerId = _instanceId;
|
|
||||||
t_cacheIndex = _OVERFLOW_CACHE_INDEX;
|
|
||||||
return GetOverflowCache();
|
|
||||||
}
|
|
||||||
|
|
||||||
_cacheCount++;
|
|
||||||
_caches[_cacheCount] = cache;
|
|
||||||
|
|
||||||
t_ownerId = _instanceId;
|
|
||||||
t_cacheIndex = _cacheCount;
|
|
||||||
return cache;
|
|
||||||
}
|
|
||||||
finally
|
|
||||||
{
|
|
||||||
Interlocked.Exchange(ref _cacheRegistrationLock, 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
||||||
private ThreadCache* GetCurrentCache()
|
|
||||||
{
|
|
||||||
if (t_ownerId == _instanceId)
|
|
||||||
{
|
|
||||||
var index = t_cacheIndex;
|
|
||||||
if ((uint)index <= (uint)_cacheCount)
|
|
||||||
{
|
|
||||||
var cache = _caches[index];
|
|
||||||
if (cache != null)
|
|
||||||
{
|
|
||||||
return cache;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return RegisterThreadCache();
|
|
||||||
}
|
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
||||||
private readonly void* TryPopFromBucket(ThreadCache* cache, int cacheIndex, int bucketIndex)
|
|
||||||
{
|
|
||||||
var buckets = GetBuckets(cache);
|
|
||||||
var bucket = &buckets[bucketIndex];
|
|
||||||
var head = (FreeNode*)bucket->freeHead;
|
|
||||||
if (head == null)
|
|
||||||
{
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
bucket->freeHead = (nint)head->next;
|
|
||||||
bucket->freeCount--;
|
|
||||||
|
|
||||||
AssignBlockHeader((BlockHeader*)head, head->ownerChunk, head->blockSize, cacheIndex);
|
|
||||||
return head;
|
|
||||||
}
|
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
||||||
private readonly void PushToBucket(ThreadCache* cache, int bucketIndex, void* ptr, MemoryChunk* ownerChunk, nuint blockSize)
|
|
||||||
{
|
|
||||||
var buckets = GetBuckets(cache);
|
|
||||||
var bucket = &buckets[bucketIndex];
|
|
||||||
var node = (FreeNode*)ptr;
|
|
||||||
node->ownerChunk = ownerChunk;
|
|
||||||
node->blockSize = blockSize;
|
|
||||||
node->bucketIndex = bucketIndex;
|
|
||||||
node->next = (FreeNode*)bucket->freeHead;
|
|
||||||
bucket->freeHead = (nint)node;
|
|
||||||
bucket->freeCount++;
|
|
||||||
}
|
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
||||||
private static void AssignBlockHeader(BlockHeader* header, MemoryChunk* ownerChunk, nuint blockSize, int ownerCacheIndex)
|
|
||||||
{
|
|
||||||
header->ownerChunk = ownerChunk;
|
|
||||||
header->blockSize = blockSize;
|
|
||||||
header->magicNumber = _MAGIC_NUMBER;
|
|
||||||
header->ownerCacheIndex = ownerCacheIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
private bool TryCreateBlocksForBucket(ThreadCache* cache, int cacheIndex, int bucketIndex)
|
|
||||||
{
|
|
||||||
var buckets = GetBuckets(cache);
|
|
||||||
var bucket = &buckets[bucketIndex];
|
|
||||||
|
|
||||||
while (Interlocked.CompareExchange(ref bucket->creationLock, 1, 0) != 0)
|
|
||||||
{
|
|
||||||
Thread.SpinWait(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
try
|
|
||||||
{
|
|
||||||
DrainRemoteFrees(cache);
|
|
||||||
if (bucket->freeHead != 0)
|
|
||||||
{
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
var blockSize = bucket->blockSize;
|
|
||||||
var blocksToCreate = Math.Max(1u, _chunkSize / blockSize);
|
|
||||||
blocksToCreate = Math.Min(blocksToCreate, 256);
|
|
||||||
|
|
||||||
if (blocksToCreate == 0)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
var totalSize = blocksToCreate * blockSize;
|
|
||||||
var memory = (byte*)AlignedAlloc(totalSize, _alignment);
|
|
||||||
if (memory == null)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
var chunk = (MemoryChunk*)_chunkArena.Allocate(SizeOf<MemoryChunk>(), AlignOf<MemoryChunk>(), AllocationOption.None);
|
|
||||||
if (chunk == null)
|
|
||||||
{
|
|
||||||
AlignedFree(memory);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
while (Interlocked.CompareExchange(ref _chunkCreationLock, 1, 0) != 0)
|
|
||||||
{
|
|
||||||
Thread.SpinWait(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
try
|
|
||||||
{
|
|
||||||
chunk->memory = memory;
|
|
||||||
chunk->size = totalSize;
|
|
||||||
chunk->used = totalSize;
|
|
||||||
chunk->next = _chunks;
|
|
||||||
_chunks = chunk;
|
|
||||||
}
|
|
||||||
finally
|
|
||||||
{
|
|
||||||
Interlocked.Exchange(ref _chunkCreationLock, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (nuint i = 0; i < blocksToCreate; i++)
|
|
||||||
{
|
|
||||||
var blockStartPtr = memory + (i * blockSize);
|
|
||||||
PushToBucket(cache, bucketIndex, blockStartPtr, chunk, blockSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
finally
|
|
||||||
{
|
|
||||||
Interlocked.Exchange(ref bucket->creationLock, 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void* AllocateFromChunk(int cacheIndex, nuint size, nuint alignment)
|
|
||||||
{
|
|
||||||
while (Interlocked.CompareExchange(ref _chunkCreationLock, 1, 0) != 0)
|
|
||||||
{
|
|
||||||
Thread.SpinWait(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
try
|
|
||||||
{
|
|
||||||
var chunk = _chunks;
|
|
||||||
while (chunk != null)
|
|
||||||
{
|
|
||||||
var alignedOffset = (chunk->used + alignment - 1) & ~(alignment - 1);
|
|
||||||
var totalNeeded = alignedOffset - chunk->used + size;
|
|
||||||
var available = chunk->size - chunk->used;
|
|
||||||
|
|
||||||
if (totalNeeded <= available)
|
|
||||||
{
|
|
||||||
var blockStartPtr = chunk->memory + alignedOffset;
|
|
||||||
chunk->used = alignedOffset + size;
|
|
||||||
AssignBlockHeader((BlockHeader*)blockStartPtr, chunk, size, cacheIndex);
|
|
||||||
return blockStartPtr;
|
|
||||||
}
|
|
||||||
|
|
||||||
chunk = chunk->next;
|
|
||||||
}
|
|
||||||
|
|
||||||
var newChunkSize = Math.Max(_chunkSize, size + alignment);
|
|
||||||
var newMemory = (byte*)AlignedAlloc(newChunkSize, alignment);
|
|
||||||
if (newMemory == null)
|
|
||||||
{
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
var newChunk = (MemoryChunk*)_chunkArena.Allocate(SizeOf<MemoryChunk>(), AlignOf<MemoryChunk>(), AllocationOption.None);
|
|
||||||
if (newChunk == null)
|
|
||||||
{
|
|
||||||
AlignedFree(newMemory);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
newChunk->memory = newMemory;
|
|
||||||
newChunk->size = newChunkSize;
|
|
||||||
newChunk->used = size;
|
|
||||||
newChunk->next = _chunks;
|
|
||||||
_chunks = newChunk;
|
|
||||||
|
|
||||||
AssignBlockHeader((BlockHeader*)newMemory, newChunk, size, cacheIndex);
|
|
||||||
return newMemory;
|
|
||||||
}
|
|
||||||
finally
|
|
||||||
{
|
|
||||||
Interlocked.Exchange(ref _chunkCreationLock, 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Allocates a memory block of the specified size.
|
|
||||||
/// </summary>
|
|
||||||
/// <remarks>
|
|
||||||
/// This is thread safe.
|
|
||||||
/// </remarks>
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
||||||
public void* Allocate(nuint size, nuint alignment, AllocationOption allocationOption = AllocationOption.None)
|
|
||||||
{
|
|
||||||
if (_disposed != 0)
|
|
||||||
{
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (size == 0)
|
|
||||||
{
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (alignment == 0)
|
|
||||||
{
|
|
||||||
alignment = _alignment;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((alignment & (alignment - 1)) != 0)
|
|
||||||
{
|
|
||||||
throw new ArgumentException("Alignment must be a power of two.", nameof(alignment));
|
|
||||||
}
|
|
||||||
|
|
||||||
var alignedSize = (size + alignment - 1) & ~(alignment - 1);
|
|
||||||
alignedSize = Math.Max(alignedSize, _MIN_BLOCK_SIZE);
|
|
||||||
|
|
||||||
var totalSize = alignedSize + (nuint)sizeof(BlockHeader);
|
|
||||||
var bucketIndex = FindBucket(totalSize);
|
|
||||||
var cache = GetCurrentCache();
|
|
||||||
var cacheIndex = t_cacheIndex;
|
|
||||||
var requiresOverflowLock = cacheIndex == _OVERFLOW_CACHE_INDEX;
|
|
||||||
|
|
||||||
if (requiresOverflowLock)
|
|
||||||
{
|
|
||||||
while (Interlocked.CompareExchange(ref _overflowLock, 1, 0) != 0)
|
|
||||||
{
|
|
||||||
Thread.SpinWait(1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
try
|
|
||||||
{
|
|
||||||
DrainRemoteFrees(cache);
|
|
||||||
|
|
||||||
void* ptr = null;
|
|
||||||
if (bucketIndex >= 0)
|
|
||||||
{
|
|
||||||
ptr = TryPopFromBucket(cache, cacheIndex, bucketIndex);
|
|
||||||
if (ptr == null && TryCreateBlocksForBucket(cache, cacheIndex, bucketIndex))
|
|
||||||
{
|
|
||||||
ptr = TryPopFromBucket(cache, cacheIndex, bucketIndex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Oversized block: Bypass chunk linking entirely and go straight to the OS
|
|
||||||
ptr = AlignedAlloc(totalSize, alignment);
|
|
||||||
if (ptr != null)
|
|
||||||
{
|
|
||||||
// Pass null for ownerChunk so 'Free' knows this is a standalone allocation
|
|
||||||
AssignBlockHeader((BlockHeader*)ptr, null, totalSize, cacheIndex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ptr == null)
|
|
||||||
{
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
var header = (BlockHeader*)ptr;
|
|
||||||
header->ownerCacheIndex = cacheIndex;
|
|
||||||
|
|
||||||
var userPtr = (byte*)ptr + sizeof(BlockHeader);
|
|
||||||
if (allocationOption.HasFlag(AllocationOption.Clear))
|
|
||||||
{
|
|
||||||
MemClear(userPtr, alignedSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
return userPtr;
|
|
||||||
}
|
|
||||||
finally
|
|
||||||
{
|
|
||||||
if (requiresOverflowLock)
|
|
||||||
{
|
|
||||||
Interlocked.Exchange(ref _overflowLock, 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void* Reallocate(void* ptr, nuint oldSize, nuint newSize, nuint alignment, AllocationOption allocationOption = AllocationOption.None)
|
|
||||||
{
|
|
||||||
if (_disposed != 0)
|
|
||||||
{
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
var newPtr = Allocate(newSize, alignment, allocationOption);
|
|
||||||
if (newPtr != null && ptr != null)
|
|
||||||
{
|
|
||||||
var copySize = Math.Min(oldSize, newSize);
|
|
||||||
MemCpy(newPtr, ptr, copySize);
|
|
||||||
Free(ptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
return newPtr;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// Frees a previously allocated memory block.
|
|
||||||
/// </summary>
|
|
||||||
/// <remarks>
|
|
||||||
/// This is thread safe.
|
|
||||||
/// </remarks>
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
||||||
public void Free(void* ptr)
|
|
||||||
{
|
|
||||||
if (_disposed != 0 || ptr == null)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
var blockStartPtr = (byte*)ptr - sizeof(BlockHeader);
|
|
||||||
var header = (BlockHeader*)blockStartPtr;
|
|
||||||
if (header->magicNumber != _MAGIC_NUMBER)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
var chunk = header->ownerChunk;
|
|
||||||
if (chunk == null)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
var blockSize = header->blockSize;
|
|
||||||
var ownerCacheIndex = header->ownerCacheIndex;
|
|
||||||
var bucketIndex = FindBucket(blockSize);
|
|
||||||
|
|
||||||
if (bucketIndex < 0)
|
|
||||||
{
|
|
||||||
// This is an oversized allocation. It doesn't belong to a bucket or a chunk.
|
|
||||||
// Erase the magic number for safety and instantly yield it back to the OS.
|
|
||||||
header->magicNumber = 0;
|
|
||||||
AlignedFree(blockStartPtr);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
var sameThread = t_ownerId == _instanceId && t_cacheIndex == ownerCacheIndex;
|
|
||||||
var targetCache = ownerCacheIndex >= 0 && ownerCacheIndex <= _cacheCount ? _caches[ownerCacheIndex] : null;
|
|
||||||
if (targetCache == null)
|
|
||||||
{
|
|
||||||
targetCache = GetOverflowCache();
|
|
||||||
ownerCacheIndex = _OVERFLOW_CACHE_INDEX;
|
|
||||||
sameThread = t_ownerId == _instanceId && t_cacheIndex == ownerCacheIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sameThread)
|
|
||||||
{
|
|
||||||
if (ownerCacheIndex == _OVERFLOW_CACHE_INDEX)
|
|
||||||
{
|
|
||||||
while (Interlocked.CompareExchange(ref _overflowLock, 1, 0) != 0)
|
|
||||||
{
|
|
||||||
Thread.SpinWait(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
try
|
|
||||||
{
|
|
||||||
PushToBucket(targetCache, bucketIndex, blockStartPtr, chunk, blockSize);
|
|
||||||
}
|
|
||||||
finally
|
|
||||||
{
|
|
||||||
Interlocked.Exchange(ref _overflowLock, 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
PushToBucket(targetCache, bucketIndex, blockStartPtr, chunk, blockSize);
|
|
||||||
}
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
var remoteNode = (FreeNode*)blockStartPtr;
|
|
||||||
remoteNode->ownerChunk = chunk;
|
|
||||||
remoteNode->blockSize = blockSize;
|
|
||||||
remoteNode->bucketIndex = bucketIndex;
|
|
||||||
|
|
||||||
nint head;
|
|
||||||
do
|
|
||||||
{
|
|
||||||
head = targetCache->remoteFreeHead;
|
|
||||||
remoteNode->next = (FreeNode*)head;
|
|
||||||
} while (Interlocked.CompareExchange(ref targetCache->remoteFreeHead, (nint)remoteNode, head) != head);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void Dispose()
|
|
||||||
{
|
|
||||||
if (Interlocked.CompareExchange(ref _disposed, 1, 0) != 0)
|
|
||||||
{
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (_caches != null)
|
|
||||||
{
|
|
||||||
for (var i = 0; i <= _cacheCount; i++)
|
|
||||||
{
|
|
||||||
var cache = _caches[i];
|
|
||||||
if (cache != null)
|
|
||||||
{
|
|
||||||
DrainRemoteFrees(cache);
|
|
||||||
cache->active = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (_caches != null)
|
|
||||||
{
|
|
||||||
MemoryUtility.Free(_caches);
|
|
||||||
_caches = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (_instanceId != null)
|
|
||||||
{
|
|
||||||
MemoryUtility.Free(_instanceId);
|
|
||||||
_instanceId = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
var arena = _chunkArena;
|
|
||||||
var chunk = _chunks;
|
|
||||||
_chunks = null;
|
|
||||||
|
|
||||||
while (chunk != null)
|
|
||||||
{
|
|
||||||
var next = chunk->next;
|
|
||||||
AlignedFree(chunk->memory);
|
|
||||||
chunk = next;
|
|
||||||
}
|
|
||||||
|
|
||||||
arena.Dispose();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
@@ -7,7 +7,7 @@
|
|||||||
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
||||||
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
|
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
|
||||||
<Authors>Misaki</Authors>
|
<Authors>Misaki</Authors>
|
||||||
<AssemblyVersion>1.6.18</AssemblyVersion>
|
<AssemblyVersion>1.6.19</AssemblyVersion>
|
||||||
<Version>$(AssemblyVersion)</Version>
|
<Version>$(AssemblyVersion)</Version>
|
||||||
<PackageProjectUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</PackageProjectUrl>
|
<PackageProjectUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</PackageProjectUrl>
|
||||||
<RepositoryUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</RepositoryUrl>
|
<RepositoryUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</RepositoryUrl>
|
||||||
|
|||||||
@@ -70,7 +70,13 @@ public static unsafe partial class MemoryUtility
|
|||||||
public static void* Malloc(nuint size)
|
public static void* Malloc(nuint size)
|
||||||
{
|
{
|
||||||
#if MHP_ENABLE_MIMALLOC
|
#if MHP_ENABLE_MIMALLOC
|
||||||
return Mimalloc.mi_malloc(size);
|
var ptr = Mimalloc.mi_malloc(size);
|
||||||
|
if (ptr == null)
|
||||||
|
{
|
||||||
|
throw new OutOfMemoryException("Failed to allocate memory using Malloc.");
|
||||||
|
}
|
||||||
|
|
||||||
|
return ptr;
|
||||||
#elif NET6_0_OR_GREATER
|
#elif NET6_0_OR_GREATER
|
||||||
return NativeMemory.Alloc(size);
|
return NativeMemory.Alloc(size);
|
||||||
#else
|
#else
|
||||||
@@ -87,7 +93,13 @@ public static unsafe partial class MemoryUtility
|
|||||||
public static void* Calloc(nuint size)
|
public static void* Calloc(nuint size)
|
||||||
{
|
{
|
||||||
#if MHP_ENABLE_MIMALLOC
|
#if MHP_ENABLE_MIMALLOC
|
||||||
return Mimalloc.mi_zalloc(size);
|
var ptr = Mimalloc.mi_zalloc(size);
|
||||||
|
if (ptr == null)
|
||||||
|
{
|
||||||
|
throw new OutOfMemoryException("Failed to allocate zero-initialized memory using Calloc.");
|
||||||
|
}
|
||||||
|
|
||||||
|
return ptr;
|
||||||
#elif NET6_0_OR_GREATER
|
#elif NET6_0_OR_GREATER
|
||||||
return NativeMemory.AllocZeroed(size);
|
return NativeMemory.AllocZeroed(size);
|
||||||
#else
|
#else
|
||||||
@@ -107,7 +119,13 @@ public static unsafe partial class MemoryUtility
|
|||||||
public static void* AlignedAlloc(nuint size, nuint alignment)
|
public static void* AlignedAlloc(nuint size, nuint alignment)
|
||||||
{
|
{
|
||||||
#if MHP_ENABLE_MIMALLOC
|
#if MHP_ENABLE_MIMALLOC
|
||||||
return Mimalloc.mi_aligned_alloc(alignment, size);
|
var ptr = Mimalloc.mi_aligned_alloc(alignment, size);
|
||||||
|
if (ptr == null)
|
||||||
|
{
|
||||||
|
throw new OutOfMemoryException("Failed to allocate aligned memory using AlignedAlloc.");
|
||||||
|
}
|
||||||
|
|
||||||
|
return ptr;
|
||||||
#elif NET6_0_OR_GREATER
|
#elif NET6_0_OR_GREATER
|
||||||
return NativeMemory.AlignedAlloc(size, alignment);
|
return NativeMemory.AlignedAlloc(size, alignment);
|
||||||
#else
|
#else
|
||||||
@@ -125,7 +143,13 @@ public static unsafe partial class MemoryUtility
|
|||||||
public static void* Realloc(void* ptr, nuint size)
|
public static void* Realloc(void* ptr, nuint size)
|
||||||
{
|
{
|
||||||
#if MHP_ENABLE_MIMALLOC
|
#if MHP_ENABLE_MIMALLOC
|
||||||
return Mimalloc.mi_realloc(ptr, size);
|
var ptr = Mimalloc.mi_realloc(ptr, size);
|
||||||
|
if (ptr == null)
|
||||||
|
{
|
||||||
|
throw new OutOfMemoryException("Failed to reallocate memory using Realloc.");
|
||||||
|
}
|
||||||
|
|
||||||
|
return ptr;
|
||||||
#elif NET6_0_OR_GREATER
|
#elif NET6_0_OR_GREATER
|
||||||
return NativeMemory.Realloc(ptr, size);
|
return NativeMemory.Realloc(ptr, size);
|
||||||
#else
|
#else
|
||||||
@@ -145,7 +169,13 @@ public static unsafe partial class MemoryUtility
|
|||||||
public static void* AlignedRealloc(void* ptr, nuint size, nuint alignment)
|
public static void* AlignedRealloc(void* ptr, nuint size, nuint alignment)
|
||||||
{
|
{
|
||||||
#if MHP_ENABLE_MIMALLOC
|
#if MHP_ENABLE_MIMALLOC
|
||||||
return Mimalloc.mi_realloc_aligned(ptr, size, alignment);
|
var ptr = Mimalloc.mi_realloc_aligned(ptr, size, alignment);
|
||||||
|
if (ptr == null)
|
||||||
|
{
|
||||||
|
throw new OutOfMemoryException("Failed to reallocate aligned memory using AlignedRealloc.");
|
||||||
|
}
|
||||||
|
|
||||||
|
return ptr;
|
||||||
#elif NET6_0_OR_GREATER
|
#elif NET6_0_OR_GREATER
|
||||||
return NativeMemory.AlignedRealloc(ptr, size, alignment);
|
return NativeMemory.AlignedRealloc(ptr, size, alignment);
|
||||||
#else
|
#else
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ using BenchmarkDotNet.Attributes;
|
|||||||
using BenchmarkDotNet.Engines;
|
using BenchmarkDotNet.Engines;
|
||||||
using Misaki.HighPerformance.Image;
|
using Misaki.HighPerformance.Image;
|
||||||
using Misaki.HighPerformance.Jobs;
|
using Misaki.HighPerformance.Jobs;
|
||||||
|
using Misaki.HighPerformance.Mathematics;
|
||||||
using Misaki.HighPerformance.Mathematics.SPMD;
|
using Misaki.HighPerformance.Mathematics.SPMD;
|
||||||
using SkiaSharp;
|
using SkiaSharp;
|
||||||
using System.Runtime.CompilerServices;
|
using System.Runtime.CompilerServices;
|
||||||
@@ -134,7 +135,6 @@ internal unsafe struct GGXMipGenerationJobSPMD : IJobSPMD<float, int>
|
|||||||
// 3. Monte Carlo Integration Loop
|
// 3. Monte Carlo Integration Loop
|
||||||
|
|
||||||
var dynamicSampleCount = (uint)max(1.0f, GGXMipGenerationBenchmark.SAMPLE_COUNT * mipLevel.roughness);
|
var dynamicSampleCount = (uint)max(1.0f, GGXMipGenerationBenchmark.SAMPLE_COUNT * mipLevel.roughness);
|
||||||
var vDynamicSampleCount = TFloat.Create(dynamicSampleCount);
|
|
||||||
var lumaVector = MathV.Create<TFloat, float>(0.2126f, 0.7152f, 0.0722f);
|
var lumaVector = MathV.Create<TFloat, float>(0.2126f, 0.7152f, 0.0722f);
|
||||||
|
|
||||||
for (var i = 0; i < dynamicSampleCount; i++)
|
for (var i = 0; i < dynamicSampleCount; i++)
|
||||||
@@ -169,21 +169,7 @@ internal unsafe struct GGXMipGenerationJobSPMD : IJobSPMD<float, int>
|
|||||||
|
|
||||||
// Write to output mip array
|
// Write to output mip array
|
||||||
var out_idx = (y * w + x) * 3;
|
var out_idx = (y * w + x) * 3;
|
||||||
|
prefilteredColor.MaskScatter(pData, out_idx.GetUnsafePtr(), mask);
|
||||||
// TODO: Optimize this
|
|
||||||
for (var i = 0; i < TFloat.LaneWidth; i++)
|
|
||||||
{
|
|
||||||
if (mask[i] == 0.0f)
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
var idx = out_idx[i];
|
|
||||||
|
|
||||||
pData[idx] = prefilteredColor.x[i];
|
|
||||||
pData[idx + 1] = prefilteredColor.y[i];
|
|
||||||
pData[idx + 2] = prefilteredColor.z[i];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -364,7 +350,7 @@ internal unsafe struct GGXMipGenerationJobSPMD<TFloat, TInt> : IJobParallelFor
|
|||||||
// 4. Average the result
|
// 4. Average the result
|
||||||
if (totalWeight > 0.0f)
|
if (totalWeight > 0.0f)
|
||||||
{
|
{
|
||||||
prefilteredColor *= 1.0f / totalWeight;
|
prefilteredColor *= rcp(totalWeight);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write to output mip array
|
// Write to output mip array
|
||||||
|
|||||||
@@ -1,12 +1,6 @@
|
|||||||
using BenchmarkDotNet.Running;
|
|
||||||
using Misaki.HighPerformance.LowLevel.Buffer;
|
|
||||||
using Misaki.HighPerformance.LowLevel.Collections;
|
|
||||||
using Misaki.HighPerformance.LowLevel.Utilities;
|
|
||||||
using Misaki.HighPerformance.Test.Benchmark;
|
using Misaki.HighPerformance.Test.Benchmark;
|
||||||
using Misaki.HighPerformance.Test.UnitTest;
|
using Misaki.HighPerformance.Test.UnitTest;
|
||||||
using Misaki.HighPerformance.Test.UnitTest.Jobs;
|
using Misaki.HighPerformance.Test.UnitTest.Jobs;
|
||||||
using System.Runtime.CompilerServices;
|
|
||||||
using System.Runtime.InteropServices;
|
|
||||||
|
|
||||||
//BenchmarkRunner.Run<GGXMipGenerationBenchmark>();
|
//BenchmarkRunner.Run<GGXMipGenerationBenchmark>();
|
||||||
|
|
||||||
@@ -15,14 +9,14 @@ const int count = 16;
|
|||||||
var bench = new GGXMipGenerationBenchmark();
|
var bench = new GGXMipGenerationBenchmark();
|
||||||
bench.Setup();
|
bench.Setup();
|
||||||
|
|
||||||
for (int i = 0; i < count; i++)
|
for (var i = 0; i < count; i++)
|
||||||
{
|
{
|
||||||
bench.JobGGX();
|
bench.JobGGX();
|
||||||
}
|
}
|
||||||
|
|
||||||
var sw = System.Diagnostics.Stopwatch.StartNew();
|
var sw = System.Diagnostics.Stopwatch.StartNew();
|
||||||
|
|
||||||
for (int i = 0; i < count; i++)
|
for (var i = 0; i < count; i++)
|
||||||
{
|
{
|
||||||
bench.JobGGX();
|
bench.JobGGX();
|
||||||
}
|
}
|
||||||
@@ -32,8 +26,8 @@ var avgTime = sw.Elapsed.TotalMilliseconds / count;
|
|||||||
Console.WriteLine($"GGX Mip Generation (Inline): {avgTime} ms");
|
Console.WriteLine($"GGX Mip Generation (Inline): {avgTime} ms");
|
||||||
bench.Cleanup();
|
bench.Cleanup();
|
||||||
|
|
||||||
//GlobalSetup.GlobalInitialize(null!);
|
GlobalSetup.GlobalInitialize(null!);
|
||||||
//TestJobSystem.Initialize(null!);
|
TestJobSystem.Initialize(null!);
|
||||||
|
|
||||||
//var test = new TestJobSystem();
|
//var test = new TestJobSystem();
|
||||||
//for (int i = 0; i < 10000; i++)
|
//for (int i = 0; i < 10000; i++)
|
||||||
|
|||||||
Reference in New Issue
Block a user