Improve vector and matrix performance and add swizzle support to .net build-int VectorX type.

This commit is contained in:
2025-12-17 16:55:28 +09:00
parent ef2a3a37bd
commit a1ad0bd2da
15 changed files with 2960 additions and 269 deletions

View File

@@ -1,123 +1,168 @@
#define VECTOR_BENCHMARK
using BenchmarkDotNet.Attributes;
using Misaki.HighPerformance.Mathematics;
using Misaki.HighPerformance.Test.Jobs;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
namespace Misaki.HighPerformance.Test.Benchmark;
public unsafe class MathematicsBenchmark
{
public struct f4
public struct f2
{
private Vector128<float> _vec;
public float x;
public float y;
public f4(float x, float y, float z, float w)
public f2(float x, float y)
{
_vec = Vector128.Create(x, y, z, w);
//this = Asf2(Vector128.Create(x, y, 0, 0));
this.x = x;
this.y = y;
}
public f4(Vector128<float> vec)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> AsVector128Unsafe(f2 value)
{
_vec = vec;
return Vector128.Create(value.x, value.y, 0, 0);
}
public static f4 operator +(f4 a, f4 b)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static f2 Asf2(Vector128<float> value)
{
var result = a._vec + b._vec;
return new f4(result);
//f2 result;
//result.x = value.GetElement(0);
//result.y = value.GetElement(1);
//return result;
ref byte address = ref Unsafe.As<Vector128<float>, byte>(ref value);
return Unsafe.ReadUnaligned<f2>(ref address);
}
public static f2 operator +(f2 lhs, f2 rhs)
{
//return Asf2(AsVector128Unsafe(lhs) + AsVector128Unsafe(rhs));
return new f2(lhs.x + rhs.x, lhs.y + rhs.y);
}
}
[Params(100)]
public int count;
#if VECTOR_BENCHMARK
private Vector2 _v2a = new Vector2(1, 2);
private Vector2 _v2b = new Vector2(3, 4);
private f2 _f2a = new f2(1, 2);
private f2 _f2b = new f2(3, 4);
[Benchmark]
public Vector2 Vector2Add()
public Vector2 VectorAdd()
{
var a = new Vector2(1, 2);
var b = new Vector2(3, 4);
var c = new Vector2(5, 6);
var v = new Vector2(0, 0);
for (var i = 0; i < count; i++)
for (var i = 0; i < 10; i++)
{
c += a + b;
v = _v2a + _v2b;
}
return c;
return v;
}
[Benchmark]
public float2 Float2Add()
public f2 f2Add()
{
var a = new float2(1, 2);
var b = new float2(3, 4);
var c = new float2(5, 6);
var v = new f2(0, 0);
for (var i = 0; i < count; i++)
for (var i = 0; i < 10; i++)
{
c += a + b;
v = _f2a + _f2b;
}
return c;
return v;
}
#endif
#if NOISE_BENCHMARK
private const int _SIZE = 32;
[Benchmark]
public void VectorNoise()
{
var buf = stackalloc float[_SIZE * _SIZE];
var job = new Misaki.HighPerformance.Test.Jobs.NoiseJobVector
{
buffers = buf,
width = _SIZE,
height = _SIZE,
};
for (var i = 0; i < _SIZE * _SIZE; i++)
{
job.Execute(i, 0);
}
}
[Benchmark]
public Vector4 Vector4Add()
public void MathNoise()
{
var a = new Vector4(1, 2, 3, 4);
var b = new Vector4(5, 6, 7, 8);
var result = new Vector4();
for (var i = 0; i < count; i++)
var buf = stackalloc float[_SIZE * _SIZE];
var job = new Misaki.HighPerformance.Test.Jobs.NoiseJobMath
{
result += a + b;
}
buffers = buf,
width = _SIZE,
height = _SIZE,
};
return result;
for (var i = 0; i < _SIZE * _SIZE; i++)
{
job.Execute(i, 0);
}
}
#endif
#if MATRIX_BENCHMARK
private float4x4 _a;
private float4x4 _b;
private Matrix4x4 _ma;
private Matrix4x4 _mb;
[GlobalSetup]
public void Init()
{
_a = new float4x4(
1, 2, 3, 4,
5, 6, 7, 8,
9, 10, 11, 12,
13, 14, 15, 16);
_b = new float4x4(
16, 15, 14, 13,
12, 11, 10, 9,
8, 7, 6, 5,
4, 3, 2, 1);
_ma = new Matrix4x4(
1, 2, 3, 4,
5, 6, 7, 8,
9, 10, 11, 12,
13, 14, 15, 16);
_mb = new Matrix4x4(
16, 15, 14, 13,
12, 11, 10, 9,
8, 7, 6, 5,
4, 3, 2, 1);
}
[Benchmark]
public float4 Float4Add()
public float4x4 Float4x4Multiplication()
{
var a = new float4(1, 2, 3, 4);
var b = new float4(5, 6, 7, 8);
var result = new float4();
for (var i = 0; i < count; i++)
{
result += a + b;
}
return result;
return math.mul(_a, _b);
}
[Benchmark]
public f4 f4Add()
public Matrix4x4 Matrix4x4Multiplication()
{
var a = new f4(1, 2, 3, 4);
var b = new f4(5, 6, 7, 8);
var result = new f4(0, 0, 0, 0);
for (var i = 0; i < count; i++)
{
result += a + b;
}
return result;
}
[Benchmark]
public Vector128<float> v128Add()
{
var a = Vector128.Create(1f, 2f, 3f, 4f);
var b = Vector128.Create(5f, 6f, 7f, 8f);
var result = Vector128<float>.Zero;
for (var i = 0; i < count; i++)
{
result += a + b;
}
return result;
return Matrix4x4.Multiply(_ma, _mb);
}
#endif
}

View File

@@ -1,4 +1,4 @@
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Attributes;
using Misaki.HighPerformance.Jobs;
using Misaki.HighPerformance.LowLevel.Buffer;
using Misaki.HighPerformance.LowLevel.Collections;
@@ -32,11 +32,11 @@ public class ParallelNoiseBenchmark
}
[Benchmark]
public void JobSystem()
public unsafe void JobSystem()
{
var job = new NoiseJob()
var job = new NoiseJobVector()
{
buffers = _buffers,
buffers = (float*)_buffers.GetUnsafePtr(),
width = _WIDTH,
height = _HEIGHT
};
@@ -53,7 +53,7 @@ public class ParallelNoiseBenchmark
var x = i % _WIDTH;
var y = i / _HEIGHT;
var uv = new Vector2(x, y);
_buffers[i] = NoiseJob.GradientNoise(uv);
_buffers[i] = NoiseJobVector.GradientNoise(uv);
});
}
@@ -65,7 +65,7 @@ public class ParallelNoiseBenchmark
var x = i % _WIDTH;
var y = i / _HEIGHT;
var uv = new Vector2(x, y);
_buffers[i] = NoiseJob.GradientNoise(uv);
_buffers[i] = NoiseJobVector.GradientNoise(uv);
}
}
}

View File

@@ -1,13 +1,13 @@
using Misaki.HighPerformance.Jobs;
using Misaki.HighPerformance.LowLevel.Collections;
using Misaki.HighPerformance.Jobs;
using Misaki.HighPerformance.Mathematics;
using System.Numerics;
using System.Runtime.CompilerServices;
namespace Misaki.HighPerformance.Test.Jobs;
internal struct NoiseJob : IJobParallelFor
internal unsafe struct NoiseJobVector : IJobParallelFor
{
public UnsafeArray<float> buffers;
public float* buffers;
public int width;
public int height;
@@ -48,4 +48,43 @@ internal struct NoiseJob : IJobParallelFor
var uv = new Vector2(x, y) / new Vector2(width, height);
buffers[loopIndex] = GradientNoise(uv);
}
}
internal unsafe struct NoiseJobMath : IJobParallelFor
{
public float* buffers;
public int width;
public int height;
private static float2 GradientNoiseDirect(float2 uv)
{
uv.x %= 289;
uv.y %= 289;
var x = (34 * uv.x + 1) * uv.x % 289 + uv.y;
x = (34 * x + 1) * x % 289;
x = math.frac(x / 41) * 2 - 1;
return math.normalize(new float2(x - math.floor(x + 0.5f), math.abs(x) - 0.5f));
}
public static float GradientNoise(float2 uv)
{
var ip = new float2(math.floor(uv.x), math.floor(uv.y));
var fp = new float2(math.frac(uv.x), math.frac(uv.y));
var d00 = math.dot(GradientNoiseDirect(ip), fp);
var d01 = math.dot(GradientNoiseDirect(ip + new float2(0, 1)), fp - new float2(0, 1));
var d10 = math.dot(GradientNoiseDirect(ip + new float2(1, 0)), fp - new float2(1, 0));
var d11 = math.dot(GradientNoiseDirect(ip + new float2(1, 1)), fp - new float2(1, 1));
fp = fp * fp * fp * (fp * (fp * new float2(6.0f) - new float2(15.0f)) + new float2(10.0f));
return float.Lerp(float.Lerp(d00, d10, fp.y), float.Lerp(d01, d11, fp.y), fp.x);
}
public void Execute(int loopIndex, int threadIndex)
{
var x = loopIndex % width;
var y = loopIndex / height;
var uv = new float2(x, y) / new float2(width, height);
buffers[loopIndex] = GradientNoise(uv);
}
}

View File

@@ -20,12 +20,42 @@
//using Misaki.HighPerformance.LowLevel;
//BenchmarkDotNet.Running.BenchmarkRunner.Run<Misaki.HighPerformance.Test.Benchmark.CollectionBenchmark>();
using System.Runtime.Intrinsics;
using Misaki.HighPerformance.Collections;
using Misaki.HighPerformance.LowLevel.Buffer;
using Misaki.HighPerformance.LowLevel.Collections;
BenchmarkDotNet.Running.BenchmarkRunner.Run<Misaki.HighPerformance.Test.Benchmark.MathematicsBenchmark>();
AllocationManager.EnableDebugLayer();
using var csm = new UnsafeSlotMap<int>(4, Allocator.Persistent);
AllocationManager.Dispose();
//using Misaki.HighPerformance.Collections;
//using Misaki.HighPerformance.LowLevel.Buffer;
//using Misaki.HighPerformance.LowLevel.Collections;
//AllocationManager.EnableDebugLayer();
//using var csm = new UnsafeSlotMap<int>(4, Allocator.Persistent);
//AllocationManager.Dispose();
//using Misaki.HighPerformance.Mathematics;
//using System.Numerics;
//var a = new Misaki.HighPerformance.Mathematics.float4x4(
// 1, 2, 3, 4,
// 5, 6, 7, 8,
// 9, 10, 11, 12,
// 13, 14, 15, 16);
//var b = new Misaki.HighPerformance.Mathematics.float4x4(
// 16, 15, 14, 13,
// 12, 11, 10, 9,
// 8, 7, 6, 5,
// 4, 3, 2, 1);
//Console.WriteLine(math.mul(a, b));
//var ma = new Matrix4x4(
// 1, 2, 3, 4,
// 5, 6, 7, 8,
// 9, 10, 11, 12,
// 13, 14, 15, 16);
//var mb = new Matrix4x4(
// 16, 15, 14, 13,
// 12, 11, 10, 9,
// 8, 7, 6, 5,
// 4, 3, 2, 1);
//Console.WriteLine(Matrix4x4.Multiply(ma, mb));