SPMD SIMD math library & lock-free job system integration
- Add new SPMD SIMD math project with scalar/vector lanes - Integrate SPMD jobs and scheduling into job system - Implement lock-free job dependency management - Update math functions for .NET 10 and SIMD performance - Add SPMD benchmarks, compress-store tests, and race tests - Introduce generic Result<T> error handling utilities - Solution/project file updates and code cleanup
This commit is contained in:
114
Misaki.HighPerformance.Test/UnitTest/Jobs/CompressStoreTest.cs
Normal file
114
Misaki.HighPerformance.Test/UnitTest/Jobs/CompressStoreTest.cs
Normal file
@@ -0,0 +1,114 @@
|
||||
using Misaki.HighPerformance.Mathematics.SPMD;
|
||||
using System.Numerics;
|
||||
|
||||
namespace Misaki.HighPerformance.Test.UnitTest.Jobs;
|
||||
|
||||
public static class CompressStoreTest
|
||||
{
|
||||
public static void Run()
|
||||
{
|
||||
Console.WriteLine("--- Testing CompressStore (Double) ---");
|
||||
|
||||
// Test 1: Simple Pattern (True, False, True, False...)
|
||||
TestPattern_Double(
|
||||
input: new double[] { 1, 2, 3, 4, 5, 6, 7, 8 },
|
||||
// Mask: Keep only even numbers (values > 0)
|
||||
// We simulate a mask by comparing against 0 or -1
|
||||
keepPattern: new bool[] { true, false, true, false, true, false, true, false }
|
||||
);
|
||||
|
||||
// Test 2: All True
|
||||
TestPattern_Double(
|
||||
input: new double[] { 10, 20, 30, 40, 50, 60, 70, 80 },
|
||||
keepPattern: new bool[] { true, true, true, true, true, true, true, true }
|
||||
);
|
||||
|
||||
// Test 3: All False
|
||||
TestPattern_Double(
|
||||
input: new double[] { 10, 20, 30, 40, 50, 60, 70, 80 },
|
||||
keepPattern: new bool[] { false, false, false, false, false, false, false, false }
|
||||
);
|
||||
|
||||
// Test 4: Sparse (First and Last only)
|
||||
TestPattern_Double(
|
||||
input: new double[] { 1, 2, 3, 4, 5, 6, 7, 8 },
|
||||
keepPattern: new bool[] { true, false, false, false, false, false, false, true }
|
||||
);
|
||||
}
|
||||
|
||||
private unsafe static void TestPattern_Double(double[] input, bool[] keepPattern)
|
||||
{
|
||||
// 1. Setup Input Vector
|
||||
// Handle case where Vector<T> is smaller than 8 (e.g. 2 or 4)
|
||||
var vecSize = Vector<double>.Count;
|
||||
var safeInput = new double[vecSize];
|
||||
var safeMaskVal = new double[vecSize];
|
||||
|
||||
// Expected Output Calculation
|
||||
var expected = new double[vecSize];
|
||||
var expectedCount = 0;
|
||||
|
||||
for (var i = 0; i < vecSize; i++)
|
||||
{
|
||||
safeInput[i] = input[i];
|
||||
// If we want to keep it, make mask "GreaterThan" true
|
||||
// We'll compare X > 0.
|
||||
// If keep=true, val=1. If keep=false, val=-1.
|
||||
safeMaskVal[i] = keepPattern[i] ? 1 : -1;
|
||||
|
||||
if (keepPattern[i])
|
||||
{
|
||||
expected[expectedCount++] = input[i];
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Create WideLanes
|
||||
var vInput = WideLane<double>.Load(ref safeInput.AsSpan().GetPinnableReference());
|
||||
|
||||
// Create Mask: greater than 0
|
||||
var vMaskVal = WideLane<double>.Load(ref safeMaskVal.AsSpan().GetPinnableReference());
|
||||
var vZero = WideLane<double>.Create(0);
|
||||
var vMask = WideLane<double>.GreaterThan(vMaskVal, vZero);
|
||||
|
||||
// 3. Run CompressStore
|
||||
var outputBuffer = new double[vecSize];
|
||||
var actualCount = 0;
|
||||
|
||||
fixed (double* ptr = outputBuffer)
|
||||
{
|
||||
actualCount = vInput.CompressStore(vMask, ptr);
|
||||
}
|
||||
|
||||
// 4. Verify
|
||||
var pass = actualCount == expectedCount;
|
||||
for (var i = 0; i < expectedCount; i++)
|
||||
{
|
||||
if (outputBuffer[i] != expected[i])
|
||||
pass = false;
|
||||
}
|
||||
|
||||
// 5. Report
|
||||
var hardware = (vecSize == 4) ? "AVX2 (256-bit)" : (vecSize == 2) ? "SSE/NEON (128-bit)" : "Scalar";
|
||||
Console.Write($"[{hardware}] Pattern: ");
|
||||
for (var i = 0; i < vecSize; i++)
|
||||
Console.Write(keepPattern[i] ? "1" : "0");
|
||||
|
||||
if (pass)
|
||||
{
|
||||
Console.WriteLine($" -> PASS (Count: {actualCount})");
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine($" -> FAIL!");
|
||||
Console.WriteLine($" Expected Count: {expectedCount}, Actual: {actualCount}");
|
||||
Console.Write(" Expected Data: ");
|
||||
foreach (var d in expected)
|
||||
Console.Write($"{d} ");
|
||||
Console.WriteLine();
|
||||
Console.Write(" Actual Data: ");
|
||||
foreach (var d in outputBuffer)
|
||||
Console.Write($"{d} ");
|
||||
Console.WriteLine();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,19 +1,28 @@
|
||||
using Misaki.HighPerformance.Jobs;
|
||||
using Misaki.HighPerformance.Jobs;
|
||||
using Misaki.HighPerformance.LowLevel.Buffer;
|
||||
using Misaki.HighPerformance.LowLevel.Collections;
|
||||
using Misaki.HighPerformance.LowLevel.Utilities;
|
||||
using Misaki.HighPerformance.Mathematics.SPMD;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Misaki.HighPerformance.Test.UnitTest.Jobs;
|
||||
|
||||
[TestClass]
|
||||
[DoNotParallelize]
|
||||
public unsafe class TestJobSystem
|
||||
{
|
||||
private JobScheduler _jobScheduler = null!;
|
||||
|
||||
public TestContext TestContext
|
||||
{
|
||||
get;
|
||||
set;
|
||||
}
|
||||
|
||||
[TestInitialize]
|
||||
public void Initialize()
|
||||
{
|
||||
_jobScheduler = new JobScheduler(Environment.ProcessorCount);
|
||||
_jobScheduler = new JobScheduler(3);
|
||||
}
|
||||
|
||||
[TestCleanup]
|
||||
@@ -251,4 +260,102 @@ public unsafe class TestJobSystem
|
||||
|
||||
Assert.AreEqual(JobState.Completed, _jobScheduler.GetJobStatus(completedHandle));
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void RaceConditionTest()
|
||||
{
|
||||
const int jobCount = 20000;
|
||||
|
||||
var pExecutedCount = (int*)NativeMemory.Alloc(sizeof(int));
|
||||
*pExecutedCount = 0;
|
||||
|
||||
var startSignal = false;
|
||||
|
||||
// 1. Create a "Gatekeeper" vectorJob that spins/blocks a worker thread until signaled.
|
||||
// This allows us to control exactly when the dependency completes.
|
||||
var rootJob = new WaitJob { pSignal = &startSignal };
|
||||
var rootHandle = _jobScheduler.Schedule(ref rootJob);
|
||||
|
||||
// 2. Start a background task to flood the scheduler with dependencies on the Gatekeeper.
|
||||
using var barrier = new Barrier(2);
|
||||
var scheduleTask = Task.Run(() =>
|
||||
{
|
||||
var depJob = new IncrementJob { pCounter = pExecutedCount };
|
||||
barrier.SignalAndWait(TestContext.CancellationTokenSource.Token); // Synchronize start with main thread
|
||||
|
||||
for (var i = 0; i < jobCount; i++)
|
||||
{
|
||||
// CONTENTION POINT:
|
||||
// Trying to add a dependency to 'rootHandle'.
|
||||
// Eventually, this will happen exactly while 'rootHandle' is transitioning to Completed.
|
||||
_jobScheduler.Schedule(ref depJob, rootHandle);
|
||||
}
|
||||
}, TestContext.CancellationTokenSource.Token);
|
||||
|
||||
barrier.SignalAndWait(TestContext.CancellationTokenSource.Token); // Wait for scheduler task to be ready
|
||||
|
||||
// Allow the scheduling loop to get a head start and queue some readers
|
||||
Thread.Sleep(5);
|
||||
|
||||
// 3. Open the gate.
|
||||
// This triggers the Gatekeeper to complete. It will change its State and iterate its dependency list.
|
||||
// This happens CONCURRENTLY with the loop above adding more items to that same list.
|
||||
startSignal = true;
|
||||
|
||||
scheduleTask.Wait(TestContext.CancellationTokenSource.Token);
|
||||
|
||||
// 4. Validate results
|
||||
// If the lock-free logic works, every single dependent vectorJob must eventually execute.
|
||||
// If there is a race (e.g., missed notification), pExecutedCount will stick below jobCount.
|
||||
var spin = new SpinWait();
|
||||
var timeout = DateTime.Now.AddSeconds(10);
|
||||
|
||||
while (Volatile.Read(ref *pExecutedCount) < jobCount)
|
||||
{
|
||||
if (DateTime.Now > timeout)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
spin.SpinOnce();
|
||||
}
|
||||
|
||||
// Ensure the root vectorJob is officially cleaned up
|
||||
_jobScheduler.WaitComplete(rootHandle);
|
||||
|
||||
Assert.AreEqual(jobCount, *pExecutedCount, "Race condition detected: Some dependent jobs failed to execute (Wait timeout).");
|
||||
|
||||
NativeMemory.Free(pExecutedCount);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void SPMDCorrectness()
|
||||
{
|
||||
const int size = 8;
|
||||
|
||||
var vectorBuf = stackalloc float[size * size];
|
||||
var vs = new Span<float>(vectorBuf, size * size);
|
||||
var vectorJob = new Misaki.HighPerformance.Test.Jobs.NoiseJobVector
|
||||
{
|
||||
buffers = vectorBuf,
|
||||
width = size,
|
||||
height = size,
|
||||
};
|
||||
|
||||
vectorJob.Run(size * size, -1);
|
||||
|
||||
var spmdBuf = stackalloc float[size * size];
|
||||
var ss = new Span<float>(spmdBuf, size * size);
|
||||
var spmdJob = new Misaki.HighPerformance.Test.Jobs.NoiseJobMath
|
||||
{
|
||||
buffers = spmdBuf,
|
||||
width = size,
|
||||
height = size,
|
||||
};
|
||||
|
||||
spmdJob.Run(size * size, -1);
|
||||
|
||||
var eq = vs.SequenceCompareTo(ss);
|
||||
Assert.AreEqual(0, eq);
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
using Misaki.HighPerformance.Jobs;
|
||||
using Misaki.HighPerformance.Jobs;
|
||||
|
||||
namespace Misaki.HighPerformance.Test.UnitTest.Jobs;
|
||||
|
||||
@@ -70,4 +70,28 @@ internal unsafe struct ParallelMultiplyJob : IJobParallelFor
|
||||
{
|
||||
inout[loopIndex] *= multiplier;
|
||||
}
|
||||
}
|
||||
|
||||
public unsafe struct WaitJob : IJob
|
||||
{
|
||||
public bool* pSignal;
|
||||
|
||||
public void Execute(int loopIndex)
|
||||
{
|
||||
var spin = new SpinWait();
|
||||
while (!Volatile.Read(ref *pSignal))
|
||||
{
|
||||
spin.SpinOnce();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public unsafe struct IncrementJob : IJob
|
||||
{
|
||||
public int* pCounter;
|
||||
|
||||
public void Execute(int loopIndex)
|
||||
{
|
||||
Interlocked.Increment(ref *pCounter);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user