Add Vector type in SPMD to total of load vector size * lane width of number into memory for simd calculation.

This commit is contained in:
2026-02-13 21:47:05 +09:00
parent 75d33d0763
commit 4f964b2d2a
22 changed files with 3682 additions and 447 deletions

View File

@@ -2,7 +2,6 @@
using BenchmarkDotNet.Attributes;
using Misaki.HighPerformance.Mathematics;
using Misaki.HighPerformance.Mathematics.SPMD;
using System.Numerics;
using System.Runtime.Intrinsics;

View File

@@ -0,0 +1,181 @@
using Misaki.HighPerformance.Jobs;
using Misaki.HighPerformance.Mathematics;
using static Misaki.HighPerformance.Mathematics.math;
namespace Misaki.HighPerformance.Test.Jobs;
public static partial class noise
{
// Modulo 289 without a division (only multiplications)
public static float mod289(float x)
{
return x - floor(x * (1.0f / 289.0f)) * 289.0f;
}
public static float2 mod289(float2 x)
{
return x - floor(x * (1.0f / 289.0f)) * 289.0f;
}
public static float3 mod289(float3 x)
{
return x - floor(x * (1.0f / 289.0f)) * 289.0f;
}
public static float4 mod289(float4 x)
{
return x - floor(x * (1.0f / 289.0f)) * 289.0f;
}
// Modulo 7 without a division
public static float3 mod7(float3 x)
{
return x - floor(x * (1.0f / 7.0f)) * 7.0f;
}
public static float4 mod7(float4 x)
{
return x - floor(x * (1.0f / 7.0f)) * 7.0f;
}
// Permutation polynomial: (34x^2 + x) math.mod 289
public static float permute(float x)
{
return mod289((34.0f * x + 1.0f) * x);
}
public static float3 permute(float3 x)
{
return mod289((34.0f * x + 1.0f) * x);
}
public static float4 permute(float4 x)
{
return mod289((34.0f * x + 1.0f) * x);
}
public static float taylorInvSqrt(float r)
{
return 1.79284291400159f - 0.85373472095314f * r;
}
public static float4 taylorInvSqrt(float4 r)
{
return 1.79284291400159f - 0.85373472095314f * r;
}
public static float2 fade(float2 t)
{
return t * t * t * (t * (t * 6.0f - 15.0f) + 10.0f);
}
public static float3 fade(float3 t)
{
return t * t * t * (t * (t * 6.0f - 15.0f) + 10.0f);
}
public static float4 fade(float4 t)
{
return t * t * t * (t * (t * 6.0f - 15.0f) + 10.0f);
}
public static float4 grad4(float j, float4 ip)
{
var ones = float4(1.0f, 1.0f, 1.0f, -1.0f);
var pxyz = floor(frac(float3(j) * ip.xyz) * 7.0f) * ip.z - 1.0f;
float pw = 1.5f - dot(abs(pxyz), ones.xyz);
var p = float4(pxyz, pw);
var s = float4(p < 0.0f);
p.xyz = p.xyz + (s.xyz * 2.0f - 1.0f) * s.www;
return p;
}
// Hashed 2-D gradients with an extra rotation.
// (The constant 0.0243902439 is 1/41)
public static float2 rgrad2(float2 p, float rot)
{
// For more isotropic gradients, math.sin/math.cos can be used instead.
float u = permute(permute(p.x) + p.y) * 0.0243902439f + rot; // Rotate by shift
u = frac(u) * 6.28318530718f; // 2*pi
return float2(cos(u), sin(u));
}
}
internal unsafe struct NoiseJob3D : IJobParallelFor
{
public float* buffers;
public int size; // size x size x size
public void Execute(int loopIndex, int threadIndex)
{
var v = float3(
(loopIndex % size) / (float)size,
((loopIndex / size) % size) / (float)size,
(loopIndex / (size * size)) / (float)size
);
var C = float2(1.0f / 6.0f, 1.0f / 3.0f);
var D = float4(0.0f, 0.5f, 1.0f, 2.0f);
// First corner
var i = floor(v + dot(v, C.yyy));
var x0 = v - i + dot(i, C.xxx);
// Other corners
var g = step(x0.yzx, x0.xyz);
var l = 1.0f - g;
var i1 = min(g.xyz, l.zxy);
var i2 = max(g.xyz, l.zxy);
// x0 = x0 - 0.0 + 0.0 * C.xxx;
// x1 = x0 - i1 + 1.0 * C.xxx;
// x2 = x0 - i2 + 2.0 * C.xxx;
// x3 = x0 - 1.0 + 3.0 * C.xxx;
var x1 = x0 - i1 + C.xxx;
var x2 = x0 - i2 + C.yyy; // 2.0*C.x = 1/3 = C.y
var x3 = x0 - D.yyy; // -1.0+3.0*C.x = -0.5 = -D.y
// Permutations
i = noise.mod289(i);
var p = noise.permute(noise.permute(noise.permute(
i.z + float4(0.0f, i1.z, i2.z, 1.0f))
+ i.y + float4(0.0f, i1.y, i2.y, 1.0f))
+ i.x + float4(0.0f, i1.x, i2.x, 1.0f));
// Gradients: 7x7 points over a square, mapped onto an octahedron.
// The ring size 17*17 = 289 is close to a multiple of 49 (49*6 = 294)
float n_ = 0.142857142857f; // 1.0/7.0
var ns = n_ * D.wyz - D.xzx;
var j = p - 49.0f * floor(p * ns.z * ns.z); // math.mod(p,7*7)
var x_ = floor(j * ns.z);
var y_ = floor(j - 7.0f * x_); // math.mod(j,N)
var x = x_ * ns.x + ns.yyyy;
var y = y_ * ns.x + ns.yyyy;
var h = 1.0f - abs(x) - abs(y);
var b0 = float4(x.xy, y.xy);
var b1 = float4(x.zw, y.zw);
//float4 s0 = float4(math.lessThan(b0,0.0))*2.0 - 1.0;
//float4 s1 = float4(math.lessThan(b1,0.0))*2.0 - 1.0;
var s0 = floor(b0) * 2.0f + 1.0f;
var s1 = floor(b1) * 2.0f + 1.0f;
var sh = -step(h, float4(0.0f));
var a0 = b0.xzyw + s0.xzyw * sh.xxyy;
var a1 = b1.xzyw + s1.xzyw * sh.zzww;
var p0 = float3(a0.xy, h.x);
var p1 = float3(a0.zw, h.y);
var p2 = float3(a1.xy, h.z);
var p3 = float3(a1.zw, h.w);
//Normalise gradients
var norm = noise.taylorInvSqrt(float4(dot(p0, p0), dot(p1, p1), dot(p2, p2), dot(p3, p3)));
p0 *= norm.x;
p1 *= norm.y;
p2 *= norm.z;
p3 *= norm.w;
// Mix final noise value
var m = max(0.6f - float4(dot(x0, x0), dot(x1, x1), dot(x2, x2), dot(x3, x3)), 0.0f);
m *= m;
buffers[loopIndex] = 42.0f * dot(m * m, float4(dot(p0, x0), dot(p1, x1), dot(p2, x2), dot(p3, x3)));
}
}

View File

@@ -1,6 +1,7 @@
using Misaki.HighPerformance.Mathematics;
using Misaki.HighPerformance.Mathematics.SPMD;
using System.Runtime.InteropServices;
using System.Runtime.CompilerServices;
namespace Misaki.HighPerformance.Test.UnitTest.Jobs;
@@ -21,6 +22,93 @@ internal unsafe struct DotProductJob : IJobSPMD<float>
}
}
internal unsafe struct Vector2LerpJob : IJobSPMD<float>
{
public float2[] arrayA;
public float2[] arrayB;
public float[] results;
public readonly void Execute<TLane>(int baseIndex, int threadIndex)
where TLane : ISPMD<TLane, float>
{
var a = MathV.LoadVector2<TLane, float>(ref arrayA[baseIndex].x);
var b = MathV.LoadVector2<TLane, float>(ref arrayB[baseIndex].x);
var t = TLane.Create(0.5f);
var lerped = MathV.Lerp(a, b, t);
var len = TLane.Sqrt(MathV.LengthSquared(lerped));
len.Store((float*)Unsafe.AsPointer(ref results[baseIndex]));
}
}
internal unsafe struct Vector4NormalizeJob : IJobSPMD<float>
{
public float4[] input;
public float4[] output;
public readonly void Execute<TLane>(int baseIndex, int threadIndex)
where TLane : ISPMD<TLane, float>
{
var vec = MathV.LoadVector4<TLane, float>(ref input[baseIndex].x);
var normalized = MathV.Normalize(vec);
normalized.Store((float*)Unsafe.AsPointer(ref output[baseIndex].x));
}
}
internal unsafe struct Vector3CrossJob : IJobSPMD<float>
{
public float3[] arrayA;
public float3[] arrayB;
public float3[] results;
public readonly void Execute<TLane>(int baseIndex, int threadIndex)
where TLane : ISPMD<TLane, float>
{
var a = MathV.LoadVector3<TLane, float>(ref arrayA[baseIndex].x);
var b = MathV.LoadVector3<TLane, float>(ref arrayB[baseIndex].x);
var cross = MathV.Cross(a, b);
cross.Store((float*)Unsafe.AsPointer(ref results[baseIndex].x));
}
}
internal unsafe struct MinMaxClampJob : IJobSPMD<float>
{
public float3[] values;
public float3[] mins;
public float3[] maxs;
public float3[] results;
public readonly void Execute<TLane>(int baseIndex, int threadIndex)
where TLane : ISPMD<TLane, float>
{
var val = MathV.LoadVector3<TLane, float>(ref values[baseIndex].x);
var min = MathV.LoadVector3<TLane, float>(ref mins[baseIndex].x);
var max = MathV.LoadVector3<TLane, float>(ref maxs[baseIndex].x);
var clamped = MathV.Clamp(val, min, max);
clamped.Store((float*)Unsafe.AsPointer(ref results[baseIndex].x));
}
}
internal unsafe struct DistanceJob : IJobSPMD<float>
{
public float3[] arrayA;
public float3[] arrayB;
public float[] results;
public readonly void Execute<TLane>(int baseIndex, int threadIndex)
where TLane : ISPMD<TLane, float>
{
var a = MathV.LoadVector3<TLane, float>(ref arrayA[baseIndex].x);
var b = MathV.LoadVector3<TLane, float>(ref arrayB[baseIndex].x);
var dist = MathV.Distance(a, b);
dist.Store((float*)Unsafe.AsPointer(ref results[baseIndex]));
}
}
[TestClass]
public class SPMDTest
{
@@ -57,4 +145,180 @@ public class SPMDTest
NativeMemory.Free(arrayB);
NativeMemory.Free(results);
}
[TestMethod]
public void TestSPMDVector2Lerp()
{
const int count = 100;
var arrayA = new float2[count];
var arrayB = new float2[count];
var results = new float[count];
for (var i = 0; i < count; i++)
{
arrayA[i] = new float2(i, i + 1);
arrayB[i] = new float2(i + 10, i + 11);
}
var job = new Vector2LerpJob
{
arrayA = arrayA,
arrayB = arrayB,
results = results
};
job.Run<Vector2LerpJob, float>(count, -1);
// Verify first result: lerp([0,1], [10,11], 0.5) = [5,6], length = sqrt(25+36) = sqrt(61)
var expectedFirst = math.sqrt(5 * 5 + 6 * 6);
Assert.AreEqual(expectedFirst, results[0], 0.001f);
// Verify result at index 50
var expected50 = math.sqrt(55 * 55 + 56 * 56);
Assert.AreEqual(expected50, results[50], 0.001f);
}
[TestMethod]
public void TestSPMDVector4Normalize()
{
const int count = 100;
var input = new float4[count];
var output = new float4[count];
for (var i = 0; i < count; i++)
{
input[i] = new float4(i + 1, i + 2, i + 3, i + 4);
}
var job = new Vector4NormalizeJob
{
input = input,
output = output
};
job.Run<Vector4NormalizeJob, float>(count, -1);
// Verify first result: normalize([1,2,3,4])
var len0 = math.sqrt(1 * 1 + 2 * 2 + 3 * 3 + 4 * 4);
var expected0 = new float4(1 / len0, 2 / len0, 3 / len0, 4 / len0);
Assert.AreEqual(expected0.x, output[0].x, 0.001f);
Assert.AreEqual(expected0.y, output[0].y, 0.001f);
Assert.AreEqual(expected0.z, output[0].z, 0.001f);
Assert.AreEqual(expected0.w, output[0].w, 0.001f);
// Verify all normalized vectors have length ~1
for (var i = 0; i < count; i++)
{
var length = math.sqrt(output[i].x * output[i].x + output[i].y * output[i].y +
output[i].z * output[i].z + output[i].w * output[i].w);
Assert.AreEqual(1.0f, length, 0.001f, $"Vector at index {i} is not normalized");
}
}
[TestMethod]
public void TestSPMDVector3Cross()
{
const int count = 100;
var arrayA = new float3[count];
var arrayB = new float3[count];
var results = new float3[count];
for (var i = 0; i < count; i++)
{
arrayA[i] = new float3(1, 0, 0);
arrayB[i] = new float3(0, 1, 0);
}
var job = new Vector3CrossJob
{
arrayA = arrayA,
arrayB = arrayB,
results = results
};
job.Run<Vector3CrossJob, float>(count, -1);
// cross([1,0,0], [0,1,0]) = [0,0,1]
for (var i = 0; i < count; i++)
{
Assert.AreEqual(0.0f, results[i].x, 0.001f);
Assert.AreEqual(0.0f, results[i].y, 0.001f);
Assert.AreEqual(1.0f, results[i].z, 0.001f);
}
}
[TestMethod]
public void TestSPMDMinMaxClamp()
{
const int count = 100;
var values = new float3[count];
var mins = new float3[count];
var maxs = new float3[count];
var results = new float3[count];
for (var i = 0; i < count; i++)
{
values[i] = new float3(i - 50, i + 10, i - 25);
mins[i] = new float3(-10, 0, -5);
maxs[i] = new float3(10, 50, 25);
}
var job = new MinMaxClampJob
{
values = values,
mins = mins,
maxs = maxs,
results = results
};
job.Run<MinMaxClampJob, float>(count, -1);
// Verify clamping works correctly
for (var i = 0; i < count; i++)
{
var val = values[i];
var min = mins[i];
var max = maxs[i];
var expected = math.clamp(val, min, max);
Assert.AreEqual(expected.x, results[i].x, 0.001f);
Assert.AreEqual(expected.y, results[i].y, 0.001f);
Assert.AreEqual(expected.z, results[i].z, 0.001f);
}
}
[TestMethod]
public void TestSPMDDistance()
{
const int count = 100;
var arrayA = new float3[count];
var arrayB = new float3[count];
var results = new float[count];
for (var i = 0; i < count; i++)
{
arrayA[i] = new float3(0, 0, 0);
arrayB[i] = new float3(3, 4, 0);
}
var job = new DistanceJob
{
arrayA = arrayA,
arrayB = arrayB,
results = results
};
job.Run<DistanceJob, float>(count, -1);
// distance([0,0,0], [3,4,0]) = 5
for (var i = 0; i < count; i++)
{
Assert.AreEqual(5.0f, results[i], 0.001f);
}
}
}