SPMD SIMD math library & lock-free job system integration

- Add new SPMD SIMD math project with scalar/vector lanes
- Integrate SPMD jobs and scheduling into job system
- Implement lock-free job dependency management
- Update math functions for .NET 10 and SIMD performance
- Add SPMD benchmarks, compress-store tests, and race tests
- Introduce generic Result<T> error handling utilities
- Solution/project file updates and code cleanup
This commit is contained in:
2026-02-11 22:44:30 +09:00
parent c36405645b
commit a9c143c2a2
22 changed files with 3433 additions and 221 deletions

View File

@@ -1,9 +1,9 @@
using Misaki.HighPerformance.Jobs;
using Misaki.HighPerformance.Mathematics;
using Misaki.HighPerformance.Mathematics.SPMD;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace Misaki.HighPerformance.Test.Jobs;
@@ -16,7 +16,7 @@ internal unsafe struct NoiseJobVector : IJobParallelFor
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float Frac(float x)
{
return x - MathF.Truncate(x);
return x - MathF.Floor(x);
}
private static Vector2 GradientNoiseDirect(Vector2 uv)
@@ -101,7 +101,7 @@ internal unsafe struct NoiseJobMathV : IJobParallelFor
private static Vector256<float> Mod289(Vector256<float> x)
{
var div = x / Vector256.Create(289.0f);
var flr = Vector256.Floor(div);
var flr = Vector256.Truncate(div);
return x - (flr * Vector256.Create(289.0f));
}
@@ -119,12 +119,13 @@ internal unsafe struct NoiseJobMathV : IJobParallelFor
var hy = Mod289(iy);
var p = hx * Vector256.Create(34.0f) + Vector256.Create(1.0f);
p = Mod289(p * hx + hy);
p = Mod289(p * hx) + hy;
var pPrev = p;
p = p * Vector256.Create(34.0f) + Vector256.Create(1.0f);
p = Mod289(p * hx);
p = Mod289(p * pPrev);
var r = (p / 41.0f);
r = (r - Vector256.Floor(r)) * 2.0f - Vector256<float>.One;
r = (r - Vector256.Truncate(r)) * 2.0f - Vector256<float>.One;
var gx = r - Vector256.Floor(r + Vector256.Create(0.5f));
var gy = Vector256.Abs(r) - Vector256.Create(0.5f);
@@ -153,10 +154,10 @@ internal unsafe struct NoiseJobMathV : IJobParallelFor
var d10 = GradDot(ipX + Vector256<float>.One, ipY, fpX - Vector256<float>.One, fpY);
var d11 = GradDot(ipX + Vector256<float>.One, ipY + Vector256<float>.One, fpX - Vector256<float>.One, fpY - Vector256<float>.One);
var lerpX1 = d00 + (d10 - d00) * uX;
var lerpX2 = d01 + (d11 - d01) * uX;
var lerpY1 = d00 + (d10 - d00) * uY;
var lerpY2 = d01 + (d11 - d01) * uY;
return lerpX1 + (lerpX2 - lerpX1) * uY;
return lerpY1 + (lerpY2 - lerpY1) * uX;
}
public void Execute(int loopIndex, int threadIndex)
@@ -164,15 +165,17 @@ internal unsafe struct NoiseJobMathV : IJobParallelFor
// ---------------------------------------------------------
// IMPORTANT: Loop Stride is now 8!
// ---------------------------------------------------------
int baseIndex = loopIndex * 8;
var baseIndex = loopIndex * 8;
// Safety check
if (baseIndex + 7 >= width * height)
{
return;
}
// Calculate Coords
int y = baseIndex / width;
int x = baseIndex % width;
var y = baseIndex / width;
var x = baseIndex % width;
// Sequence: 0, 1, 2, 3, 4, 5, 6, 7
var vSeqX = Vector256.Create(0f, 1f, 2f, 3f, 4f, 5f, 6f, 7f);
@@ -185,6 +188,81 @@ internal unsafe struct NoiseJobMathV : IJobParallelFor
var result = GradientNoiseAVX(vBaseX / vWidth, vBaseY / vHeight);
// Store 8 floats (32 bytes)
Avx.Store(buffers + baseIndex, result);
result.Store(buffers + baseIndex);
}
}
internal unsafe struct NoiseJobMathSPMD : IJobSPMD<float>
{
public float* buffers;
public int width;
public int height;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static T GradDot<T>(T ix, T iy, T fx, T fy)
where T : ISPMD<T, float>
{
var c289 = T.Create(289f);
var c34 = T.Create(34f);
var c1 = T.Create(1f);
var c41 = T.Create(41f);
var c2 = T.Create(2f);
var half = T.Create(0.5f);
ix %= c289;
iy %= c289;
var x = (c34 * ix + c1) * ix % c289 + iy;
x = (c34 * x + c1) * x % c289;
x = T.Frac(x / c41) * c2 - c1;
var gx = x - T.Floor(x + half);
var gy = T.Abs(x) - half;
// normalize
var len = T.Sqrt(gx * gx + gy * gy);
gx /= len;
gy /= len;
return gx * fx + gy * fy;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static T Noise<T>(T uvX, T uvY)
where T : ISPMD<T, float>
{
var c1 = T.Create(1f);
var c6 = T.Create(6f);
var c10 = T.Create(10f);
var c15 = T.Create(15f);
var ipX = T.Floor(uvX);
var ipY = T.Floor(uvY);
var fpX = uvX - ipX;
var fpY = uvY - ipY;
var d00 = GradDot(ipX, ipY, fpX, fpY);
var d01 = GradDot(ipX, ipY + c1, fpX, fpY - c1);
var d10 = GradDot(ipX + c1, ipY, fpX - c1, fpY);
var d11 = GradDot(ipX + c1, ipY + c1, fpX - c1, fpY - c1);
// fade
var uX = fpX * fpX * fpX * (fpX * (fpX * c6 - c15) + c10);
var uY = fpY * fpY * fpY * (fpY * (fpY * c6 - c15) + c10);
return T.Lerp(T.Lerp(d00, d10, uY), T.Lerp(d01, d11, uY), uX);
}
public readonly void Execute<TLane>(int baseIndex, int threadIndex)
where TLane : ISPMD<TLane, float>
{
var indices = TLane.Sequence(baseIndex, 1f);
var w = TLane.Create(width);
var h = TLane.Create(height);
var uvX = (indices % w) / w;
var uvY = TLane.Floor(indices / w) / h;
var result = Noise(uvX, uvY);
result.Store(buffers + baseIndex);
}
}