SPMD API overhaul: gather/scatter, job & packaging updates

- ISPMDLane: add MaskGather, MaskStore, Scatter, MaskScatter; update MaskLoad/Gather signatures for hardware parity
- WideLane/ScalarLane: implement new methods with HW/fallback logic
- MathV: gather/mask-gather now delegate to lane methods
- Vector2/3/4: add CompressStore, Scatter, MaskScatter
- SPMD jobs/tests/README: migrate to new APIs for correctness
- Use Unsafe.BitCast instead of Unsafe.As/AsRef
- Add SPMDUtility for gather index extraction
- Job system: add ICustomJob<TSelf>, ScheduleCustom overload
- FreeList concurrency obsolete; always thread-safe
- NuGet: include LICENSE/README, set license/readme in .csproj
- Docs: update SPMD usage, clarify safety notes
- Minor: doc fixes, CompressStore test improvements
This commit is contained in:
2026-05-04 13:56:49 +09:00
parent 99fcbec753
commit 155d7b0fbd
32 changed files with 1463 additions and 2028 deletions

View File

@@ -60,7 +60,8 @@ public readonly struct AllocationManagerDesc
get; init;
}
public required int FreeListConcurrencyLevel
[Obsolete("FreeList concurrency level is no longer used and will be ignored. FreeList is now designed to be thread-safe without a fixed concurrency level.")]
public int FreeListConcurrencyLevel
{
get; init;
}
@@ -71,7 +72,6 @@ public readonly struct AllocationManagerDesc
StackCapacity = 16 * 1024 * 1024, // 16 MB per thread
FreeListChunkSize = 64 * 1024 * 1024,
FreeListDefaultAlignment = 8,
FreeListConcurrencyLevel = Environment.ProcessorCount
};
}
@@ -210,8 +210,7 @@ public static unsafe class AllocationManager
s_freeListAllocator = new MemoryPool<FreeList, FreeList.CreationOptions>(new FreeList.CreationOptions
{
alignment = opts.FreeListDefaultAlignment,
chunkSize = opts.FreeListChunkSize,
maxConcurrencyLevel = opts.FreeListConcurrencyLevel
chunkSize = opts.FreeListChunkSize
});
s_pHeapAllocator = (HeapAllocator*)Malloc((nuint)(sizeof(HeapAllocator)));

View File

@@ -15,6 +15,7 @@ public unsafe struct FreeList : IMemoryAllocator<FreeList, FreeList.CreationOpti
{
public nuint alignment;
public nuint chunkSize;
[Obsolete("Max concurrency level is no longer used and will be ignored. FreeList is now designed to be thread-safe without a fixed concurrency level.")]
public int maxConcurrencyLevel;
}
@@ -501,7 +502,6 @@ public unsafe struct FreeList : IMemoryAllocator<FreeList, FreeList.CreationOpti
/// <remarks>
/// This is thread safe.
/// </remarks>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void* Allocate(nuint size, nuint alignment, AllocationOption allocationOption = AllocationOption.None)
{
if (_disposed != 0)
@@ -605,7 +605,6 @@ public unsafe struct FreeList : IMemoryAllocator<FreeList, FreeList.CreationOpti
/// <remarks>
/// This is thread safe.
/// </remarks>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public readonly void Free(void* ptr)
{
if (_disposed != 0 || ptr == null)

View File

@@ -89,9 +89,15 @@ public unsafe struct TLSF : IMemoryAllocator<TLSF, TLSF.CreationOptions>
private nuint _alignment;
private nuint _chunkSize;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static TLSF Create(in CreationOptions opts)
{
var alignment = opts.alignment == 0 ? 16 : opts.alignment;
return new TLSF(opts.alignment, opts.initialChunkSize);
}
public TLSF (nuint alignment, nuint chunkSize)
{
alignment = alignment == 0 ? 16 : alignment;
if (alignment < 16)
{
alignment = 16;
@@ -102,20 +108,15 @@ public unsafe struct TLSF : IMemoryAllocator<TLSF, TLSF.CreationOptions>
throw new ArgumentException("Alignment must be a power of 2");
}
TLSF allocator = default;
allocator._alignment = alignment;
allocator._chunkSize = opts.initialChunkSize == 0 ? 64 * 1024 : opts.initialChunkSize;
_alignment = alignment;
_chunkSize = chunkSize == 0 ? 64 * 1024 : chunkSize;
var slSize = 64 * (nuint)sizeof(uint);
var blocksSize = 64 * 32 * (nuint)sizeof(BlockHeader*);
allocator._slBitmaps = (uint*)Malloc(slSize);
allocator._blocks = (BlockHeader**)Malloc(blocksSize);
MemClear(allocator._slBitmaps, slSize);
MemClear(allocator._blocks, blocksSize);
_slBitmaps = (uint*)Calloc(slSize);
_blocks = (BlockHeader**)Calloc(blocksSize);
allocator.AddChunk(allocator._chunkSize);
return allocator;
AddChunk(_chunkSize);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]