Improve vector and matrix performance and add swizzle support to .net build-int VectorX type.
This commit is contained in:
@@ -1,123 +1,168 @@
|
||||
#define VECTOR_BENCHMARK
|
||||
|
||||
using BenchmarkDotNet.Attributes;
|
||||
using Misaki.HighPerformance.Mathematics;
|
||||
using Misaki.HighPerformance.Test.Jobs;
|
||||
using System.Numerics;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.Intrinsics;
|
||||
|
||||
namespace Misaki.HighPerformance.Test.Benchmark;
|
||||
|
||||
public unsafe class MathematicsBenchmark
|
||||
{
|
||||
public struct f4
|
||||
public struct f2
|
||||
{
|
||||
private Vector128<float> _vec;
|
||||
public float x;
|
||||
public float y;
|
||||
|
||||
public f4(float x, float y, float z, float w)
|
||||
public f2(float x, float y)
|
||||
{
|
||||
_vec = Vector128.Create(x, y, z, w);
|
||||
//this = Asf2(Vector128.Create(x, y, 0, 0));
|
||||
this.x = x;
|
||||
this.y = y;
|
||||
}
|
||||
|
||||
public f4(Vector128<float> vec)
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static Vector128<float> AsVector128Unsafe(f2 value)
|
||||
{
|
||||
_vec = vec;
|
||||
return Vector128.Create(value.x, value.y, 0, 0);
|
||||
}
|
||||
|
||||
public static f4 operator +(f4 a, f4 b)
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static f2 Asf2(Vector128<float> value)
|
||||
{
|
||||
var result = a._vec + b._vec;
|
||||
return new f4(result);
|
||||
//f2 result;
|
||||
//result.x = value.GetElement(0);
|
||||
//result.y = value.GetElement(1);
|
||||
//return result;
|
||||
|
||||
ref byte address = ref Unsafe.As<Vector128<float>, byte>(ref value);
|
||||
return Unsafe.ReadUnaligned<f2>(ref address);
|
||||
}
|
||||
|
||||
public static f2 operator +(f2 lhs, f2 rhs)
|
||||
{
|
||||
//return Asf2(AsVector128Unsafe(lhs) + AsVector128Unsafe(rhs));
|
||||
return new f2(lhs.x + rhs.x, lhs.y + rhs.y);
|
||||
}
|
||||
}
|
||||
|
||||
[Params(100)]
|
||||
public int count;
|
||||
#if VECTOR_BENCHMARK
|
||||
private Vector2 _v2a = new Vector2(1, 2);
|
||||
private Vector2 _v2b = new Vector2(3, 4);
|
||||
|
||||
private f2 _f2a = new f2(1, 2);
|
||||
private f2 _f2b = new f2(3, 4);
|
||||
|
||||
[Benchmark]
|
||||
public Vector2 Vector2Add()
|
||||
public Vector2 VectorAdd()
|
||||
{
|
||||
var a = new Vector2(1, 2);
|
||||
var b = new Vector2(3, 4);
|
||||
var c = new Vector2(5, 6);
|
||||
var v = new Vector2(0, 0);
|
||||
|
||||
for (var i = 0; i < count; i++)
|
||||
for (var i = 0; i < 10; i++)
|
||||
{
|
||||
c += a + b;
|
||||
v = _v2a + _v2b;
|
||||
}
|
||||
|
||||
return c;
|
||||
return v;
|
||||
}
|
||||
|
||||
[Benchmark]
|
||||
public float2 Float2Add()
|
||||
public f2 f2Add()
|
||||
{
|
||||
var a = new float2(1, 2);
|
||||
var b = new float2(3, 4);
|
||||
var c = new float2(5, 6);
|
||||
var v = new f2(0, 0);
|
||||
|
||||
for (var i = 0; i < count; i++)
|
||||
for (var i = 0; i < 10; i++)
|
||||
{
|
||||
c += a + b;
|
||||
v = _f2a + _f2b;
|
||||
}
|
||||
|
||||
return c;
|
||||
return v;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if NOISE_BENCHMARK
|
||||
|
||||
private const int _SIZE = 32;
|
||||
|
||||
[Benchmark]
|
||||
public void VectorNoise()
|
||||
{
|
||||
var buf = stackalloc float[_SIZE * _SIZE];
|
||||
var job = new Misaki.HighPerformance.Test.Jobs.NoiseJobVector
|
||||
{
|
||||
buffers = buf,
|
||||
width = _SIZE,
|
||||
height = _SIZE,
|
||||
};
|
||||
|
||||
for (var i = 0; i < _SIZE * _SIZE; i++)
|
||||
{
|
||||
job.Execute(i, 0);
|
||||
}
|
||||
}
|
||||
|
||||
[Benchmark]
|
||||
public Vector4 Vector4Add()
|
||||
public void MathNoise()
|
||||
{
|
||||
var a = new Vector4(1, 2, 3, 4);
|
||||
var b = new Vector4(5, 6, 7, 8);
|
||||
var result = new Vector4();
|
||||
|
||||
for (var i = 0; i < count; i++)
|
||||
var buf = stackalloc float[_SIZE * _SIZE];
|
||||
var job = new Misaki.HighPerformance.Test.Jobs.NoiseJobMath
|
||||
{
|
||||
result += a + b;
|
||||
}
|
||||
buffers = buf,
|
||||
width = _SIZE,
|
||||
height = _SIZE,
|
||||
};
|
||||
|
||||
return result;
|
||||
for (var i = 0; i < _SIZE * _SIZE; i++)
|
||||
{
|
||||
job.Execute(i, 0);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if MATRIX_BENCHMARK
|
||||
private float4x4 _a;
|
||||
private float4x4 _b;
|
||||
private Matrix4x4 _ma;
|
||||
private Matrix4x4 _mb;
|
||||
|
||||
[GlobalSetup]
|
||||
public void Init()
|
||||
{
|
||||
_a = new float4x4(
|
||||
1, 2, 3, 4,
|
||||
5, 6, 7, 8,
|
||||
9, 10, 11, 12,
|
||||
13, 14, 15, 16);
|
||||
_b = new float4x4(
|
||||
16, 15, 14, 13,
|
||||
12, 11, 10, 9,
|
||||
8, 7, 6, 5,
|
||||
4, 3, 2, 1);
|
||||
|
||||
_ma = new Matrix4x4(
|
||||
1, 2, 3, 4,
|
||||
5, 6, 7, 8,
|
||||
9, 10, 11, 12,
|
||||
13, 14, 15, 16);
|
||||
_mb = new Matrix4x4(
|
||||
16, 15, 14, 13,
|
||||
12, 11, 10, 9,
|
||||
8, 7, 6, 5,
|
||||
4, 3, 2, 1);
|
||||
}
|
||||
|
||||
[Benchmark]
|
||||
public float4 Float4Add()
|
||||
public float4x4 Float4x4Multiplication()
|
||||
{
|
||||
var a = new float4(1, 2, 3, 4);
|
||||
var b = new float4(5, 6, 7, 8);
|
||||
var result = new float4();
|
||||
|
||||
for (var i = 0; i < count; i++)
|
||||
{
|
||||
result += a + b;
|
||||
}
|
||||
|
||||
return result;
|
||||
return math.mul(_a, _b);
|
||||
}
|
||||
|
||||
[Benchmark]
|
||||
public f4 f4Add()
|
||||
public Matrix4x4 Matrix4x4Multiplication()
|
||||
{
|
||||
var a = new f4(1, 2, 3, 4);
|
||||
var b = new f4(5, 6, 7, 8);
|
||||
var result = new f4(0, 0, 0, 0);
|
||||
|
||||
for (var i = 0; i < count; i++)
|
||||
{
|
||||
result += a + b;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
[Benchmark]
|
||||
public Vector128<float> v128Add()
|
||||
{
|
||||
var a = Vector128.Create(1f, 2f, 3f, 4f);
|
||||
var b = Vector128.Create(5f, 6f, 7f, 8f);
|
||||
var result = Vector128<float>.Zero;
|
||||
|
||||
for (var i = 0; i < count; i++)
|
||||
{
|
||||
result += a + b;
|
||||
}
|
||||
|
||||
return result;
|
||||
return Matrix4x4.Multiply(_ma, _mb);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
using BenchmarkDotNet.Attributes;
|
||||
using BenchmarkDotNet.Attributes;
|
||||
using Misaki.HighPerformance.Jobs;
|
||||
using Misaki.HighPerformance.LowLevel.Buffer;
|
||||
using Misaki.HighPerformance.LowLevel.Collections;
|
||||
@@ -32,11 +32,11 @@ public class ParallelNoiseBenchmark
|
||||
}
|
||||
|
||||
[Benchmark]
|
||||
public void JobSystem()
|
||||
public unsafe void JobSystem()
|
||||
{
|
||||
var job = new NoiseJob()
|
||||
var job = new NoiseJobVector()
|
||||
{
|
||||
buffers = _buffers,
|
||||
buffers = (float*)_buffers.GetUnsafePtr(),
|
||||
width = _WIDTH,
|
||||
height = _HEIGHT
|
||||
};
|
||||
@@ -53,7 +53,7 @@ public class ParallelNoiseBenchmark
|
||||
var x = i % _WIDTH;
|
||||
var y = i / _HEIGHT;
|
||||
var uv = new Vector2(x, y);
|
||||
_buffers[i] = NoiseJob.GradientNoise(uv);
|
||||
_buffers[i] = NoiseJobVector.GradientNoise(uv);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -65,7 +65,7 @@ public class ParallelNoiseBenchmark
|
||||
var x = i % _WIDTH;
|
||||
var y = i / _HEIGHT;
|
||||
var uv = new Vector2(x, y);
|
||||
_buffers[i] = NoiseJob.GradientNoise(uv);
|
||||
_buffers[i] = NoiseJobVector.GradientNoise(uv);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,13 +1,13 @@
|
||||
using Misaki.HighPerformance.Jobs;
|
||||
using Misaki.HighPerformance.LowLevel.Collections;
|
||||
using Misaki.HighPerformance.Jobs;
|
||||
using Misaki.HighPerformance.Mathematics;
|
||||
using System.Numerics;
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
namespace Misaki.HighPerformance.Test.Jobs;
|
||||
|
||||
internal struct NoiseJob : IJobParallelFor
|
||||
internal unsafe struct NoiseJobVector : IJobParallelFor
|
||||
{
|
||||
public UnsafeArray<float> buffers;
|
||||
public float* buffers;
|
||||
public int width;
|
||||
public int height;
|
||||
|
||||
@@ -48,4 +48,43 @@ internal struct NoiseJob : IJobParallelFor
|
||||
var uv = new Vector2(x, y) / new Vector2(width, height);
|
||||
buffers[loopIndex] = GradientNoise(uv);
|
||||
}
|
||||
}
|
||||
|
||||
internal unsafe struct NoiseJobMath : IJobParallelFor
|
||||
{
|
||||
public float* buffers;
|
||||
public int width;
|
||||
public int height;
|
||||
|
||||
private static float2 GradientNoiseDirect(float2 uv)
|
||||
{
|
||||
uv.x %= 289;
|
||||
uv.y %= 289;
|
||||
var x = (34 * uv.x + 1) * uv.x % 289 + uv.y;
|
||||
x = (34 * x + 1) * x % 289;
|
||||
x = math.frac(x / 41) * 2 - 1;
|
||||
return math.normalize(new float2(x - math.floor(x + 0.5f), math.abs(x) - 0.5f));
|
||||
}
|
||||
|
||||
public static float GradientNoise(float2 uv)
|
||||
{
|
||||
var ip = new float2(math.floor(uv.x), math.floor(uv.y));
|
||||
var fp = new float2(math.frac(uv.x), math.frac(uv.y));
|
||||
|
||||
var d00 = math.dot(GradientNoiseDirect(ip), fp);
|
||||
var d01 = math.dot(GradientNoiseDirect(ip + new float2(0, 1)), fp - new float2(0, 1));
|
||||
var d10 = math.dot(GradientNoiseDirect(ip + new float2(1, 0)), fp - new float2(1, 0));
|
||||
var d11 = math.dot(GradientNoiseDirect(ip + new float2(1, 1)), fp - new float2(1, 1));
|
||||
|
||||
fp = fp * fp * fp * (fp * (fp * new float2(6.0f) - new float2(15.0f)) + new float2(10.0f));
|
||||
return float.Lerp(float.Lerp(d00, d10, fp.y), float.Lerp(d01, d11, fp.y), fp.x);
|
||||
}
|
||||
|
||||
public void Execute(int loopIndex, int threadIndex)
|
||||
{
|
||||
var x = loopIndex % width;
|
||||
var y = loopIndex / height;
|
||||
var uv = new float2(x, y) / new float2(width, height);
|
||||
buffers[loopIndex] = GradientNoise(uv);
|
||||
}
|
||||
}
|
||||
@@ -20,12 +20,42 @@
|
||||
|
||||
//using Misaki.HighPerformance.LowLevel;
|
||||
|
||||
//BenchmarkDotNet.Running.BenchmarkRunner.Run<Misaki.HighPerformance.Test.Benchmark.CollectionBenchmark>();
|
||||
using System.Runtime.Intrinsics;
|
||||
|
||||
using Misaki.HighPerformance.Collections;
|
||||
using Misaki.HighPerformance.LowLevel.Buffer;
|
||||
using Misaki.HighPerformance.LowLevel.Collections;
|
||||
BenchmarkDotNet.Running.BenchmarkRunner.Run<Misaki.HighPerformance.Test.Benchmark.MathematicsBenchmark>();
|
||||
|
||||
AllocationManager.EnableDebugLayer();
|
||||
using var csm = new UnsafeSlotMap<int>(4, Allocator.Persistent);
|
||||
AllocationManager.Dispose();
|
||||
//using Misaki.HighPerformance.Collections;
|
||||
//using Misaki.HighPerformance.LowLevel.Buffer;
|
||||
//using Misaki.HighPerformance.LowLevel.Collections;
|
||||
|
||||
//AllocationManager.EnableDebugLayer();
|
||||
//using var csm = new UnsafeSlotMap<int>(4, Allocator.Persistent);
|
||||
//AllocationManager.Dispose();
|
||||
|
||||
//using Misaki.HighPerformance.Mathematics;
|
||||
//using System.Numerics;
|
||||
|
||||
//var a = new Misaki.HighPerformance.Mathematics.float4x4(
|
||||
// 1, 2, 3, 4,
|
||||
// 5, 6, 7, 8,
|
||||
// 9, 10, 11, 12,
|
||||
// 13, 14, 15, 16);
|
||||
//var b = new Misaki.HighPerformance.Mathematics.float4x4(
|
||||
// 16, 15, 14, 13,
|
||||
// 12, 11, 10, 9,
|
||||
// 8, 7, 6, 5,
|
||||
// 4, 3, 2, 1);
|
||||
|
||||
//Console.WriteLine(math.mul(a, b));
|
||||
|
||||
//var ma = new Matrix4x4(
|
||||
// 1, 2, 3, 4,
|
||||
// 5, 6, 7, 8,
|
||||
// 9, 10, 11, 12,
|
||||
// 13, 14, 15, 16);
|
||||
//var mb = new Matrix4x4(
|
||||
// 16, 15, 14, 13,
|
||||
// 12, 11, 10, 9,
|
||||
// 8, 7, 6, 5,
|
||||
// 4, 3, 2, 1);
|
||||
//Console.WriteLine(Matrix4x4.Multiply(ma, mb));
|
||||
|
||||
Reference in New Issue
Block a user