SPMD SIMD math library & lock-free job system integration

- Add new SPMD SIMD math project with scalar/vector lanes
- Integrate SPMD jobs and scheduling into job system
- Implement lock-free job dependency management
- Update math functions for .NET 10 and SIMD performance
- Add SPMD benchmarks, compress-store tests, and race tests
- Introduce generic Result<T> error handling utilities
- Solution/project file updates and code cleanup
This commit is contained in:
2026-02-11 22:44:30 +09:00
parent c36405645b
commit a9c143c2a2
22 changed files with 3433 additions and 221 deletions

View File

@@ -0,0 +1,114 @@
using Misaki.HighPerformance.Mathematics.SPMD;
using System.Numerics;
namespace Misaki.HighPerformance.Test.UnitTest.Jobs;
public static class CompressStoreTest
{
public static void Run()
{
Console.WriteLine("--- Testing CompressStore (Double) ---");
// Test 1: Simple Pattern (True, False, True, False...)
TestPattern_Double(
input: new double[] { 1, 2, 3, 4, 5, 6, 7, 8 },
// Mask: Keep only even numbers (values > 0)
// We simulate a mask by comparing against 0 or -1
keepPattern: new bool[] { true, false, true, false, true, false, true, false }
);
// Test 2: All True
TestPattern_Double(
input: new double[] { 10, 20, 30, 40, 50, 60, 70, 80 },
keepPattern: new bool[] { true, true, true, true, true, true, true, true }
);
// Test 3: All False
TestPattern_Double(
input: new double[] { 10, 20, 30, 40, 50, 60, 70, 80 },
keepPattern: new bool[] { false, false, false, false, false, false, false, false }
);
// Test 4: Sparse (First and Last only)
TestPattern_Double(
input: new double[] { 1, 2, 3, 4, 5, 6, 7, 8 },
keepPattern: new bool[] { true, false, false, false, false, false, false, true }
);
}
private unsafe static void TestPattern_Double(double[] input, bool[] keepPattern)
{
// 1. Setup Input Vector
// Handle case where Vector<T> is smaller than 8 (e.g. 2 or 4)
var vecSize = Vector<double>.Count;
var safeInput = new double[vecSize];
var safeMaskVal = new double[vecSize];
// Expected Output Calculation
var expected = new double[vecSize];
var expectedCount = 0;
for (var i = 0; i < vecSize; i++)
{
safeInput[i] = input[i];
// If we want to keep it, make mask "GreaterThan" true
// We'll compare X > 0.
// If keep=true, val=1. If keep=false, val=-1.
safeMaskVal[i] = keepPattern[i] ? 1 : -1;
if (keepPattern[i])
{
expected[expectedCount++] = input[i];
}
}
// 2. Create WideLanes
var vInput = WideLane<double>.Load(ref safeInput.AsSpan().GetPinnableReference());
// Create Mask: greater than 0
var vMaskVal = WideLane<double>.Load(ref safeMaskVal.AsSpan().GetPinnableReference());
var vZero = WideLane<double>.Create(0);
var vMask = WideLane<double>.GreaterThan(vMaskVal, vZero);
// 3. Run CompressStore
var outputBuffer = new double[vecSize];
var actualCount = 0;
fixed (double* ptr = outputBuffer)
{
actualCount = vInput.CompressStore(vMask, ptr);
}
// 4. Verify
var pass = actualCount == expectedCount;
for (var i = 0; i < expectedCount; i++)
{
if (outputBuffer[i] != expected[i])
pass = false;
}
// 5. Report
var hardware = (vecSize == 4) ? "AVX2 (256-bit)" : (vecSize == 2) ? "SSE/NEON (128-bit)" : "Scalar";
Console.Write($"[{hardware}] Pattern: ");
for (var i = 0; i < vecSize; i++)
Console.Write(keepPattern[i] ? "1" : "0");
if (pass)
{
Console.WriteLine($" -> PASS (Count: {actualCount})");
}
else
{
Console.WriteLine($" -> FAIL!");
Console.WriteLine($" Expected Count: {expectedCount}, Actual: {actualCount}");
Console.Write(" Expected Data: ");
foreach (var d in expected)
Console.Write($"{d} ");
Console.WriteLine();
Console.Write(" Actual Data: ");
foreach (var d in outputBuffer)
Console.Write($"{d} ");
Console.WriteLine();
}
}
}

View File

@@ -1,19 +1,28 @@
using Misaki.HighPerformance.Jobs;
using Misaki.HighPerformance.Jobs;
using Misaki.HighPerformance.LowLevel.Buffer;
using Misaki.HighPerformance.LowLevel.Collections;
using Misaki.HighPerformance.LowLevel.Utilities;
using Misaki.HighPerformance.Mathematics.SPMD;
using System.Runtime.InteropServices;
namespace Misaki.HighPerformance.Test.UnitTest.Jobs;
[TestClass]
[DoNotParallelize]
public unsafe class TestJobSystem
{
private JobScheduler _jobScheduler = null!;
public TestContext TestContext
{
get;
set;
}
[TestInitialize]
public void Initialize()
{
_jobScheduler = new JobScheduler(Environment.ProcessorCount);
_jobScheduler = new JobScheduler(3);
}
[TestCleanup]
@@ -251,4 +260,102 @@ public unsafe class TestJobSystem
Assert.AreEqual(JobState.Completed, _jobScheduler.GetJobStatus(completedHandle));
}
[TestMethod]
public void RaceConditionTest()
{
const int jobCount = 20000;
var pExecutedCount = (int*)NativeMemory.Alloc(sizeof(int));
*pExecutedCount = 0;
var startSignal = false;
// 1. Create a "Gatekeeper" vectorJob that spins/blocks a worker thread until signaled.
// This allows us to control exactly when the dependency completes.
var rootJob = new WaitJob { pSignal = &startSignal };
var rootHandle = _jobScheduler.Schedule(ref rootJob);
// 2. Start a background task to flood the scheduler with dependencies on the Gatekeeper.
using var barrier = new Barrier(2);
var scheduleTask = Task.Run(() =>
{
var depJob = new IncrementJob { pCounter = pExecutedCount };
barrier.SignalAndWait(TestContext.CancellationTokenSource.Token); // Synchronize start with main thread
for (var i = 0; i < jobCount; i++)
{
// CONTENTION POINT:
// Trying to add a dependency to 'rootHandle'.
// Eventually, this will happen exactly while 'rootHandle' is transitioning to Completed.
_jobScheduler.Schedule(ref depJob, rootHandle);
}
}, TestContext.CancellationTokenSource.Token);
barrier.SignalAndWait(TestContext.CancellationTokenSource.Token); // Wait for scheduler task to be ready
// Allow the scheduling loop to get a head start and queue some readers
Thread.Sleep(5);
// 3. Open the gate.
// This triggers the Gatekeeper to complete. It will change its State and iterate its dependency list.
// This happens CONCURRENTLY with the loop above adding more items to that same list.
startSignal = true;
scheduleTask.Wait(TestContext.CancellationTokenSource.Token);
// 4. Validate results
// If the lock-free logic works, every single dependent vectorJob must eventually execute.
// If there is a race (e.g., missed notification), pExecutedCount will stick below jobCount.
var spin = new SpinWait();
var timeout = DateTime.Now.AddSeconds(10);
while (Volatile.Read(ref *pExecutedCount) < jobCount)
{
if (DateTime.Now > timeout)
{
break;
}
spin.SpinOnce();
}
// Ensure the root vectorJob is officially cleaned up
_jobScheduler.WaitComplete(rootHandle);
Assert.AreEqual(jobCount, *pExecutedCount, "Race condition detected: Some dependent jobs failed to execute (Wait timeout).");
NativeMemory.Free(pExecutedCount);
}
[TestMethod]
public void SPMDCorrectness()
{
const int size = 8;
var vectorBuf = stackalloc float[size * size];
var vs = new Span<float>(vectorBuf, size * size);
var vectorJob = new Misaki.HighPerformance.Test.Jobs.NoiseJobVector
{
buffers = vectorBuf,
width = size,
height = size,
};
vectorJob.Run(size * size, -1);
var spmdBuf = stackalloc float[size * size];
var ss = new Span<float>(spmdBuf, size * size);
var spmdJob = new Misaki.HighPerformance.Test.Jobs.NoiseJobMath
{
buffers = spmdBuf,
width = size,
height = size,
};
spmdJob.Run(size * size, -1);
var eq = vs.SequenceCompareTo(ss);
Assert.AreEqual(0, eq);
}
}

View File

@@ -1,4 +1,4 @@
using Misaki.HighPerformance.Jobs;
using Misaki.HighPerformance.Jobs;
namespace Misaki.HighPerformance.Test.UnitTest.Jobs;
@@ -70,4 +70,28 @@ internal unsafe struct ParallelMultiplyJob : IJobParallelFor
{
inout[loopIndex] *= multiplier;
}
}
public unsafe struct WaitJob : IJob
{
public bool* pSignal;
public void Execute(int loopIndex)
{
var spin = new SpinWait();
while (!Volatile.Read(ref *pSignal))
{
spin.SpinOnce();
}
}
}
public unsafe struct IncrementJob : IJob
{
public int* pCounter;
public void Execute(int loopIndex)
{
Interlocked.Increment(ref *pCounter);
}
}