SPMD SIMD math library & lock-free job system integration
- Add new SPMD SIMD math project with scalar/vector lanes - Integrate SPMD jobs and scheduling into job system - Implement lock-free job dependency management - Update math functions for .NET 10 and SIMD performance - Add SPMD benchmarks, compress-store tests, and race tests - Introduce generic Result<T> error handling utilities - Solution/project file updates and code cleanup
This commit is contained in:
@@ -1,7 +1,8 @@
|
||||
#define NOISE_BENCHMARK
|
||||
#define ADD_BENCHMARK
|
||||
|
||||
using BenchmarkDotNet.Attributes;
|
||||
using Misaki.HighPerformance.Mathematics;
|
||||
using Misaki.HighPerformance.Mathematics.SPMD;
|
||||
using System.Numerics;
|
||||
using System.Runtime.Intrinsics;
|
||||
|
||||
@@ -9,7 +10,7 @@ namespace Misaki.HighPerformance.Test.Benchmark;
|
||||
|
||||
public class MathematicsBenchmark
|
||||
{
|
||||
#if VECTOR_BENCHMARK
|
||||
#if ADD_BENCHMARK
|
||||
private Vector4 _va = new Vector4(1, 2, 1, 2);
|
||||
private Vector4 _vb = new Vector4(3, 4, 3, 4);
|
||||
|
||||
@@ -39,61 +40,52 @@ public class MathematicsBenchmark
|
||||
}
|
||||
#endif
|
||||
|
||||
#if NOISE_BENCHMARK
|
||||
#if FMA_BENCHMARK
|
||||
private Vector4 _va = new Vector4(1, 2, 1, 2);
|
||||
private Vector4 _vb = new Vector4(3, 4, 3, 4);
|
||||
private Vector4 _vc = new Vector4(5, 6, 5, 6);
|
||||
|
||||
private const int _SIZE = 32;
|
||||
private Vector128<float> _va128 = Vector128.Create(1f, 2f, 1f, 2f);
|
||||
private Vector128<float> _vb128 = Vector128.Create(3f, 4f, 3f, 4f);
|
||||
private Vector128<float> _vc128 = Vector128.Create(5f, 6f, 5f, 6f);
|
||||
|
||||
private float4 _fa = new float4(1, 2, 1, 2);
|
||||
private float4 _fb = new float4(3, 4, 3, 4);
|
||||
private float4 _fc = new float4(5, 6, 5, 6);
|
||||
|
||||
[Benchmark]
|
||||
public unsafe void VectorNoise()
|
||||
public Vector4 Vector4()
|
||||
{
|
||||
var buf = stackalloc float[_SIZE * _SIZE];
|
||||
var job = new Misaki.HighPerformance.Test.Jobs.NoiseJobVector
|
||||
for (var i = 0; i < 10; i++)
|
||||
{
|
||||
buffers = buf,
|
||||
width = _SIZE,
|
||||
height = _SIZE,
|
||||
};
|
||||
|
||||
for (var i = 0; i < _SIZE * _SIZE; i++)
|
||||
{
|
||||
job.Execute(i, 0);
|
||||
_va = _vb * _vc + _va;
|
||||
}
|
||||
|
||||
return _va;
|
||||
}
|
||||
|
||||
[Benchmark]
|
||||
public unsafe void MathNoise()
|
||||
public Vector128<float> VectorFMA()
|
||||
{
|
||||
var buf = stackalloc float[_SIZE * _SIZE];
|
||||
var job = new Misaki.HighPerformance.Test.Jobs.NoiseJobMath
|
||||
for (var i = 0; i < 10; i++)
|
||||
{
|
||||
buffers = buf,
|
||||
width = _SIZE,
|
||||
height = _SIZE,
|
||||
};
|
||||
|
||||
for (var i = 0; i < _SIZE * _SIZE; i++)
|
||||
{
|
||||
job.Execute(i, 0);
|
||||
_va128 = System.Runtime.Intrinsics.X86.Fma.MultiplyAdd(_vb128, _vc128, _va128);
|
||||
}
|
||||
|
||||
return _va128;
|
||||
}
|
||||
|
||||
[Benchmark]
|
||||
// This is 10x faster than VectorNoise and MathNoise, but writing a burst like compiler to compile MathNoise into this is incredibly hard.
|
||||
public unsafe void MathVNoise()
|
||||
public float4 floatFMA()
|
||||
{
|
||||
var buf = stackalloc float[_SIZE * _SIZE];
|
||||
var job = new Misaki.HighPerformance.Test.Jobs.NoiseJobMathV
|
||||
for (var i = 0; i < 10; i++)
|
||||
{
|
||||
buffers = buf,
|
||||
width = _SIZE,
|
||||
height = _SIZE,
|
||||
};
|
||||
|
||||
for (var i = 0; i < _SIZE * _SIZE / 8; i++)
|
||||
{
|
||||
job.Execute(i, 0);
|
||||
_fa = _fb * _fc + _fa;
|
||||
}
|
||||
|
||||
return _fa;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if MATRIX_BENCHMARK
|
||||
|
||||
98
Misaki.HighPerformance.Test/Benchmark/SPMDBenchmark.cs
Normal file
98
Misaki.HighPerformance.Test/Benchmark/SPMDBenchmark.cs
Normal file
@@ -0,0 +1,98 @@
|
||||
using BenchmarkDotNet.Attributes;
|
||||
using Misaki.HighPerformance.Jobs;
|
||||
using Misaki.HighPerformance.Mathematics.SPMD;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Misaki.HighPerformance.Test.Benchmark;
|
||||
|
||||
public unsafe class SPMDBenchmark
|
||||
{
|
||||
private const int _SIZE = 512;
|
||||
|
||||
private JobScheduler _scheduler = null!;
|
||||
private float* _buf;
|
||||
|
||||
[GlobalSetup]
|
||||
public void Setup()
|
||||
{
|
||||
_scheduler = new JobScheduler(Environment.ProcessorCount);
|
||||
_buf = (float*)NativeMemory.Alloc(sizeof(float) * _SIZE * _SIZE);
|
||||
}
|
||||
|
||||
[GlobalCleanup]
|
||||
public void Cleanup()
|
||||
{
|
||||
_scheduler.Dispose();
|
||||
NativeMemory.Free(_buf);
|
||||
}
|
||||
|
||||
[Benchmark]
|
||||
public void VectorNoiseSingleThread()
|
||||
{
|
||||
var job = new Jobs.NoiseJobVector
|
||||
{
|
||||
buffers = _buf,
|
||||
width = _SIZE,
|
||||
height = _SIZE,
|
||||
};
|
||||
|
||||
job.Run(_SIZE * _SIZE, 0);
|
||||
}
|
||||
|
||||
//[Benchmark]
|
||||
public void VectorNoise()
|
||||
{
|
||||
var job = new Jobs.NoiseJobVector
|
||||
{
|
||||
buffers = _buf,
|
||||
width = _SIZE,
|
||||
height = _SIZE,
|
||||
};
|
||||
|
||||
var handle = _scheduler.ScheduleParallel(ref job, _SIZE * _SIZE, 64);
|
||||
_scheduler.WaitComplete(handle);
|
||||
}
|
||||
|
||||
//[Benchmark]
|
||||
public void MathNoise()
|
||||
{
|
||||
var job = new Jobs.NoiseJobMath
|
||||
{
|
||||
buffers = _buf,
|
||||
width = _SIZE,
|
||||
height = _SIZE,
|
||||
};
|
||||
|
||||
var handle = _scheduler.ScheduleParallel(ref job, _SIZE * _SIZE, 64);
|
||||
_scheduler.WaitComplete(handle);
|
||||
}
|
||||
|
||||
//[Benchmark(Baseline = true)]
|
||||
public void ManualSPMDNoise()
|
||||
{
|
||||
var job = new Jobs.NoiseJobMathV
|
||||
{
|
||||
buffers = _buf,
|
||||
width = _SIZE,
|
||||
height = _SIZE,
|
||||
};
|
||||
|
||||
var iterations = (_SIZE * _SIZE + 8 - 1) / 8;
|
||||
var handle = _scheduler.ScheduleParallel(ref job, iterations, 64);
|
||||
_scheduler.WaitComplete(handle);
|
||||
}
|
||||
|
||||
[Benchmark(Baseline = true)]
|
||||
public void SPMDNoise()
|
||||
{
|
||||
var job = new Jobs.NoiseJobMathSPMD
|
||||
{
|
||||
buffers = _buf,
|
||||
width = _SIZE,
|
||||
height = _SIZE,
|
||||
};
|
||||
|
||||
var handle = _scheduler.ScheduleParallelSPDM<Jobs.NoiseJobMathSPMD, float>(ref job, _SIZE * _SIZE, 64, -1, JobHandle.Invalid);
|
||||
_scheduler.WaitComplete(handle);
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,9 @@
|
||||
using Misaki.HighPerformance.Jobs;
|
||||
using Misaki.HighPerformance.Mathematics;
|
||||
using Misaki.HighPerformance.Mathematics.SPMD;
|
||||
using System.Numerics;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.Intrinsics;
|
||||
using System.Runtime.Intrinsics.X86;
|
||||
|
||||
namespace Misaki.HighPerformance.Test.Jobs;
|
||||
|
||||
@@ -16,7 +16,7 @@ internal unsafe struct NoiseJobVector : IJobParallelFor
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static float Frac(float x)
|
||||
{
|
||||
return x - MathF.Truncate(x);
|
||||
return x - MathF.Floor(x);
|
||||
}
|
||||
|
||||
private static Vector2 GradientNoiseDirect(Vector2 uv)
|
||||
@@ -101,7 +101,7 @@ internal unsafe struct NoiseJobMathV : IJobParallelFor
|
||||
private static Vector256<float> Mod289(Vector256<float> x)
|
||||
{
|
||||
var div = x / Vector256.Create(289.0f);
|
||||
var flr = Vector256.Floor(div);
|
||||
var flr = Vector256.Truncate(div);
|
||||
return x - (flr * Vector256.Create(289.0f));
|
||||
}
|
||||
|
||||
@@ -119,12 +119,13 @@ internal unsafe struct NoiseJobMathV : IJobParallelFor
|
||||
var hy = Mod289(iy);
|
||||
|
||||
var p = hx * Vector256.Create(34.0f) + Vector256.Create(1.0f);
|
||||
p = Mod289(p * hx + hy);
|
||||
p = Mod289(p * hx) + hy;
|
||||
var pPrev = p;
|
||||
p = p * Vector256.Create(34.0f) + Vector256.Create(1.0f);
|
||||
p = Mod289(p * hx);
|
||||
p = Mod289(p * pPrev);
|
||||
|
||||
var r = (p / 41.0f);
|
||||
r = (r - Vector256.Floor(r)) * 2.0f - Vector256<float>.One;
|
||||
r = (r - Vector256.Truncate(r)) * 2.0f - Vector256<float>.One;
|
||||
|
||||
var gx = r - Vector256.Floor(r + Vector256.Create(0.5f));
|
||||
var gy = Vector256.Abs(r) - Vector256.Create(0.5f);
|
||||
@@ -153,10 +154,10 @@ internal unsafe struct NoiseJobMathV : IJobParallelFor
|
||||
var d10 = GradDot(ipX + Vector256<float>.One, ipY, fpX - Vector256<float>.One, fpY);
|
||||
var d11 = GradDot(ipX + Vector256<float>.One, ipY + Vector256<float>.One, fpX - Vector256<float>.One, fpY - Vector256<float>.One);
|
||||
|
||||
var lerpX1 = d00 + (d10 - d00) * uX;
|
||||
var lerpX2 = d01 + (d11 - d01) * uX;
|
||||
var lerpY1 = d00 + (d10 - d00) * uY;
|
||||
var lerpY2 = d01 + (d11 - d01) * uY;
|
||||
|
||||
return lerpX1 + (lerpX2 - lerpX1) * uY;
|
||||
return lerpY1 + (lerpY2 - lerpY1) * uX;
|
||||
}
|
||||
|
||||
public void Execute(int loopIndex, int threadIndex)
|
||||
@@ -164,15 +165,17 @@ internal unsafe struct NoiseJobMathV : IJobParallelFor
|
||||
// ---------------------------------------------------------
|
||||
// IMPORTANT: Loop Stride is now 8!
|
||||
// ---------------------------------------------------------
|
||||
int baseIndex = loopIndex * 8;
|
||||
var baseIndex = loopIndex * 8;
|
||||
|
||||
// Safety check
|
||||
if (baseIndex + 7 >= width * height)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Calculate Coords
|
||||
int y = baseIndex / width;
|
||||
int x = baseIndex % width;
|
||||
var y = baseIndex / width;
|
||||
var x = baseIndex % width;
|
||||
|
||||
// Sequence: 0, 1, 2, 3, 4, 5, 6, 7
|
||||
var vSeqX = Vector256.Create(0f, 1f, 2f, 3f, 4f, 5f, 6f, 7f);
|
||||
@@ -185,6 +188,81 @@ internal unsafe struct NoiseJobMathV : IJobParallelFor
|
||||
var result = GradientNoiseAVX(vBaseX / vWidth, vBaseY / vHeight);
|
||||
|
||||
// Store 8 floats (32 bytes)
|
||||
Avx.Store(buffers + baseIndex, result);
|
||||
result.Store(buffers + baseIndex);
|
||||
}
|
||||
}
|
||||
|
||||
internal unsafe struct NoiseJobMathSPMD : IJobSPMD<float>
|
||||
{
|
||||
public float* buffers;
|
||||
public int width;
|
||||
public int height;
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static T GradDot<T>(T ix, T iy, T fx, T fy)
|
||||
where T : ISPMD<T, float>
|
||||
{
|
||||
var c289 = T.Create(289f);
|
||||
var c34 = T.Create(34f);
|
||||
var c1 = T.Create(1f);
|
||||
var c41 = T.Create(41f);
|
||||
var c2 = T.Create(2f);
|
||||
var half = T.Create(0.5f);
|
||||
|
||||
ix %= c289;
|
||||
iy %= c289;
|
||||
var x = (c34 * ix + c1) * ix % c289 + iy;
|
||||
x = (c34 * x + c1) * x % c289;
|
||||
x = T.Frac(x / c41) * c2 - c1;
|
||||
|
||||
var gx = x - T.Floor(x + half);
|
||||
var gy = T.Abs(x) - half;
|
||||
|
||||
// normalize
|
||||
var len = T.Sqrt(gx * gx + gy * gy);
|
||||
gx /= len;
|
||||
gy /= len;
|
||||
|
||||
return gx * fx + gy * fy;
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static T Noise<T>(T uvX, T uvY)
|
||||
where T : ISPMD<T, float>
|
||||
{
|
||||
var c1 = T.Create(1f);
|
||||
var c6 = T.Create(6f);
|
||||
var c10 = T.Create(10f);
|
||||
var c15 = T.Create(15f);
|
||||
|
||||
var ipX = T.Floor(uvX);
|
||||
var ipY = T.Floor(uvY);
|
||||
var fpX = uvX - ipX;
|
||||
var fpY = uvY - ipY;
|
||||
|
||||
var d00 = GradDot(ipX, ipY, fpX, fpY);
|
||||
var d01 = GradDot(ipX, ipY + c1, fpX, fpY - c1);
|
||||
var d10 = GradDot(ipX + c1, ipY, fpX - c1, fpY);
|
||||
var d11 = GradDot(ipX + c1, ipY + c1, fpX - c1, fpY - c1);
|
||||
|
||||
// fade
|
||||
var uX = fpX * fpX * fpX * (fpX * (fpX * c6 - c15) + c10);
|
||||
var uY = fpY * fpY * fpY * (fpY * (fpY * c6 - c15) + c10);
|
||||
|
||||
return T.Lerp(T.Lerp(d00, d10, uY), T.Lerp(d01, d11, uY), uX);
|
||||
}
|
||||
|
||||
public readonly void Execute<TLane>(int baseIndex, int threadIndex)
|
||||
where TLane : ISPMD<TLane, float>
|
||||
{
|
||||
var indices = TLane.Sequence(baseIndex, 1f);
|
||||
var w = TLane.Create(width);
|
||||
var h = TLane.Create(height);
|
||||
|
||||
var uvX = (indices % w) / w;
|
||||
var uvY = TLane.Floor(indices / w) / h;
|
||||
|
||||
var result = Noise(uvX, uvY);
|
||||
result.Store(buffers + baseIndex);
|
||||
}
|
||||
}
|
||||
@@ -24,6 +24,7 @@
|
||||
<ProjectReference Include="..\Misaki.HighPerformance.Image\Misaki.HighPerformance.Image.csproj" />
|
||||
<ProjectReference Include="..\Misaki.HighPerformance.Jobs\Misaki.HighPerformance.Jobs.csproj" />
|
||||
<ProjectReference Include="..\Misaki.HighPerformance.LowLevel\Misaki.HighPerformance.LowLevel.csproj" />
|
||||
<ProjectReference Include="..\Misaki.HighPerformance.Mathematics.SPMD\Misaki.HighPerformance.Mathematics.SPMD.csproj" />
|
||||
<ProjectReference Include="..\Misaki.HighPerformance.Mathematics\Misaki.HighPerformance.Mathematics.csproj" />
|
||||
<ProjectReference Include="..\Misaki.HighPerformance\Misaki.HighPerformance.csproj" />
|
||||
<ProjectReference Include="..\Misaki.HighPerformance.Analyzer\Misaki.HighPerformance.Analyzer\Misaki.HighPerformance.Analyzer.csproj" OutputItemType="Analyzer" ReferenceOutputAssembly="false" />
|
||||
|
||||
@@ -1,9 +1,15 @@
|
||||
using Misaki.HighPerformance;
|
||||
using Misaki.HighPerformance.Jobs;
|
||||
using Misaki.HighPerformance.LowLevel;
|
||||
using Misaki.HighPerformance.LowLevel.Utilities;
|
||||
using Misaki.HighPerformance.Mathematics.SPMD;
|
||||
using Misaki.HighPerformance.Test.UnitTest.Jobs;
|
||||
using System.Numerics;
|
||||
using System.Runtime.Intrinsics;
|
||||
using System.Text;
|
||||
|
||||
BenchmarkDotNet.Running.BenchmarkRunner.Run<Misaki.HighPerformance.Test.Benchmark.MathematicsBenchmark>();
|
||||
|
||||
BenchmarkDotNet.Running.BenchmarkRunner.Run<Misaki.HighPerformance.Test.Benchmark.SPMDBenchmark>();
|
||||
//return;
|
||||
//using Misaki.HighPerformance.Collections;
|
||||
//using Misaki.HighPerformance.LowLevel.Buffer;
|
||||
//using Misaki.HighPerformance.LowLevel.Collections;
|
||||
|
||||
114
Misaki.HighPerformance.Test/UnitTest/Jobs/CompressStoreTest.cs
Normal file
114
Misaki.HighPerformance.Test/UnitTest/Jobs/CompressStoreTest.cs
Normal file
@@ -0,0 +1,114 @@
|
||||
using Misaki.HighPerformance.Mathematics.SPMD;
|
||||
using System.Numerics;
|
||||
|
||||
namespace Misaki.HighPerformance.Test.UnitTest.Jobs;
|
||||
|
||||
public static class CompressStoreTest
|
||||
{
|
||||
public static void Run()
|
||||
{
|
||||
Console.WriteLine("--- Testing CompressStore (Double) ---");
|
||||
|
||||
// Test 1: Simple Pattern (True, False, True, False...)
|
||||
TestPattern_Double(
|
||||
input: new double[] { 1, 2, 3, 4, 5, 6, 7, 8 },
|
||||
// Mask: Keep only even numbers (values > 0)
|
||||
// We simulate a mask by comparing against 0 or -1
|
||||
keepPattern: new bool[] { true, false, true, false, true, false, true, false }
|
||||
);
|
||||
|
||||
// Test 2: All True
|
||||
TestPattern_Double(
|
||||
input: new double[] { 10, 20, 30, 40, 50, 60, 70, 80 },
|
||||
keepPattern: new bool[] { true, true, true, true, true, true, true, true }
|
||||
);
|
||||
|
||||
// Test 3: All False
|
||||
TestPattern_Double(
|
||||
input: new double[] { 10, 20, 30, 40, 50, 60, 70, 80 },
|
||||
keepPattern: new bool[] { false, false, false, false, false, false, false, false }
|
||||
);
|
||||
|
||||
// Test 4: Sparse (First and Last only)
|
||||
TestPattern_Double(
|
||||
input: new double[] { 1, 2, 3, 4, 5, 6, 7, 8 },
|
||||
keepPattern: new bool[] { true, false, false, false, false, false, false, true }
|
||||
);
|
||||
}
|
||||
|
||||
private unsafe static void TestPattern_Double(double[] input, bool[] keepPattern)
|
||||
{
|
||||
// 1. Setup Input Vector
|
||||
// Handle case where Vector<T> is smaller than 8 (e.g. 2 or 4)
|
||||
var vecSize = Vector<double>.Count;
|
||||
var safeInput = new double[vecSize];
|
||||
var safeMaskVal = new double[vecSize];
|
||||
|
||||
// Expected Output Calculation
|
||||
var expected = new double[vecSize];
|
||||
var expectedCount = 0;
|
||||
|
||||
for (var i = 0; i < vecSize; i++)
|
||||
{
|
||||
safeInput[i] = input[i];
|
||||
// If we want to keep it, make mask "GreaterThan" true
|
||||
// We'll compare X > 0.
|
||||
// If keep=true, val=1. If keep=false, val=-1.
|
||||
safeMaskVal[i] = keepPattern[i] ? 1 : -1;
|
||||
|
||||
if (keepPattern[i])
|
||||
{
|
||||
expected[expectedCount++] = input[i];
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Create WideLanes
|
||||
var vInput = WideLane<double>.Load(ref safeInput.AsSpan().GetPinnableReference());
|
||||
|
||||
// Create Mask: greater than 0
|
||||
var vMaskVal = WideLane<double>.Load(ref safeMaskVal.AsSpan().GetPinnableReference());
|
||||
var vZero = WideLane<double>.Create(0);
|
||||
var vMask = WideLane<double>.GreaterThan(vMaskVal, vZero);
|
||||
|
||||
// 3. Run CompressStore
|
||||
var outputBuffer = new double[vecSize];
|
||||
var actualCount = 0;
|
||||
|
||||
fixed (double* ptr = outputBuffer)
|
||||
{
|
||||
actualCount = vInput.CompressStore(vMask, ptr);
|
||||
}
|
||||
|
||||
// 4. Verify
|
||||
var pass = actualCount == expectedCount;
|
||||
for (var i = 0; i < expectedCount; i++)
|
||||
{
|
||||
if (outputBuffer[i] != expected[i])
|
||||
pass = false;
|
||||
}
|
||||
|
||||
// 5. Report
|
||||
var hardware = (vecSize == 4) ? "AVX2 (256-bit)" : (vecSize == 2) ? "SSE/NEON (128-bit)" : "Scalar";
|
||||
Console.Write($"[{hardware}] Pattern: ");
|
||||
for (var i = 0; i < vecSize; i++)
|
||||
Console.Write(keepPattern[i] ? "1" : "0");
|
||||
|
||||
if (pass)
|
||||
{
|
||||
Console.WriteLine($" -> PASS (Count: {actualCount})");
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine($" -> FAIL!");
|
||||
Console.WriteLine($" Expected Count: {expectedCount}, Actual: {actualCount}");
|
||||
Console.Write(" Expected Data: ");
|
||||
foreach (var d in expected)
|
||||
Console.Write($"{d} ");
|
||||
Console.WriteLine();
|
||||
Console.Write(" Actual Data: ");
|
||||
foreach (var d in outputBuffer)
|
||||
Console.Write($"{d} ");
|
||||
Console.WriteLine();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,19 +1,28 @@
|
||||
using Misaki.HighPerformance.Jobs;
|
||||
using Misaki.HighPerformance.Jobs;
|
||||
using Misaki.HighPerformance.LowLevel.Buffer;
|
||||
using Misaki.HighPerformance.LowLevel.Collections;
|
||||
using Misaki.HighPerformance.LowLevel.Utilities;
|
||||
using Misaki.HighPerformance.Mathematics.SPMD;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Misaki.HighPerformance.Test.UnitTest.Jobs;
|
||||
|
||||
[TestClass]
|
||||
[DoNotParallelize]
|
||||
public unsafe class TestJobSystem
|
||||
{
|
||||
private JobScheduler _jobScheduler = null!;
|
||||
|
||||
public TestContext TestContext
|
||||
{
|
||||
get;
|
||||
set;
|
||||
}
|
||||
|
||||
[TestInitialize]
|
||||
public void Initialize()
|
||||
{
|
||||
_jobScheduler = new JobScheduler(Environment.ProcessorCount);
|
||||
_jobScheduler = new JobScheduler(3);
|
||||
}
|
||||
|
||||
[TestCleanup]
|
||||
@@ -251,4 +260,102 @@ public unsafe class TestJobSystem
|
||||
|
||||
Assert.AreEqual(JobState.Completed, _jobScheduler.GetJobStatus(completedHandle));
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void RaceConditionTest()
|
||||
{
|
||||
const int jobCount = 20000;
|
||||
|
||||
var pExecutedCount = (int*)NativeMemory.Alloc(sizeof(int));
|
||||
*pExecutedCount = 0;
|
||||
|
||||
var startSignal = false;
|
||||
|
||||
// 1. Create a "Gatekeeper" vectorJob that spins/blocks a worker thread until signaled.
|
||||
// This allows us to control exactly when the dependency completes.
|
||||
var rootJob = new WaitJob { pSignal = &startSignal };
|
||||
var rootHandle = _jobScheduler.Schedule(ref rootJob);
|
||||
|
||||
// 2. Start a background task to flood the scheduler with dependencies on the Gatekeeper.
|
||||
using var barrier = new Barrier(2);
|
||||
var scheduleTask = Task.Run(() =>
|
||||
{
|
||||
var depJob = new IncrementJob { pCounter = pExecutedCount };
|
||||
barrier.SignalAndWait(TestContext.CancellationTokenSource.Token); // Synchronize start with main thread
|
||||
|
||||
for (var i = 0; i < jobCount; i++)
|
||||
{
|
||||
// CONTENTION POINT:
|
||||
// Trying to add a dependency to 'rootHandle'.
|
||||
// Eventually, this will happen exactly while 'rootHandle' is transitioning to Completed.
|
||||
_jobScheduler.Schedule(ref depJob, rootHandle);
|
||||
}
|
||||
}, TestContext.CancellationTokenSource.Token);
|
||||
|
||||
barrier.SignalAndWait(TestContext.CancellationTokenSource.Token); // Wait for scheduler task to be ready
|
||||
|
||||
// Allow the scheduling loop to get a head start and queue some readers
|
||||
Thread.Sleep(5);
|
||||
|
||||
// 3. Open the gate.
|
||||
// This triggers the Gatekeeper to complete. It will change its State and iterate its dependency list.
|
||||
// This happens CONCURRENTLY with the loop above adding more items to that same list.
|
||||
startSignal = true;
|
||||
|
||||
scheduleTask.Wait(TestContext.CancellationTokenSource.Token);
|
||||
|
||||
// 4. Validate results
|
||||
// If the lock-free logic works, every single dependent vectorJob must eventually execute.
|
||||
// If there is a race (e.g., missed notification), pExecutedCount will stick below jobCount.
|
||||
var spin = new SpinWait();
|
||||
var timeout = DateTime.Now.AddSeconds(10);
|
||||
|
||||
while (Volatile.Read(ref *pExecutedCount) < jobCount)
|
||||
{
|
||||
if (DateTime.Now > timeout)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
spin.SpinOnce();
|
||||
}
|
||||
|
||||
// Ensure the root vectorJob is officially cleaned up
|
||||
_jobScheduler.WaitComplete(rootHandle);
|
||||
|
||||
Assert.AreEqual(jobCount, *pExecutedCount, "Race condition detected: Some dependent jobs failed to execute (Wait timeout).");
|
||||
|
||||
NativeMemory.Free(pExecutedCount);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public void SPMDCorrectness()
|
||||
{
|
||||
const int size = 8;
|
||||
|
||||
var vectorBuf = stackalloc float[size * size];
|
||||
var vs = new Span<float>(vectorBuf, size * size);
|
||||
var vectorJob = new Misaki.HighPerformance.Test.Jobs.NoiseJobVector
|
||||
{
|
||||
buffers = vectorBuf,
|
||||
width = size,
|
||||
height = size,
|
||||
};
|
||||
|
||||
vectorJob.Run(size * size, -1);
|
||||
|
||||
var spmdBuf = stackalloc float[size * size];
|
||||
var ss = new Span<float>(spmdBuf, size * size);
|
||||
var spmdJob = new Misaki.HighPerformance.Test.Jobs.NoiseJobMath
|
||||
{
|
||||
buffers = spmdBuf,
|
||||
width = size,
|
||||
height = size,
|
||||
};
|
||||
|
||||
spmdJob.Run(size * size, -1);
|
||||
|
||||
var eq = vs.SequenceCompareTo(ss);
|
||||
Assert.AreEqual(0, eq);
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
using Misaki.HighPerformance.Jobs;
|
||||
using Misaki.HighPerformance.Jobs;
|
||||
|
||||
namespace Misaki.HighPerformance.Test.UnitTest.Jobs;
|
||||
|
||||
@@ -70,4 +70,28 @@ internal unsafe struct ParallelMultiplyJob : IJobParallelFor
|
||||
{
|
||||
inout[loopIndex] *= multiplier;
|
||||
}
|
||||
}
|
||||
|
||||
public unsafe struct WaitJob : IJob
|
||||
{
|
||||
public bool* pSignal;
|
||||
|
||||
public void Execute(int loopIndex)
|
||||
{
|
||||
var spin = new SpinWait();
|
||||
while (!Volatile.Read(ref *pSignal))
|
||||
{
|
||||
spin.SpinOnce();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public unsafe struct IncrementJob : IJob
|
||||
{
|
||||
public int* pCounter;
|
||||
|
||||
public void Execute(int loopIndex)
|
||||
{
|
||||
Interlocked.Increment(ref *pCounter);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user