Refactor trigonometric funcs, optimize GGX benchmark
- Replaced SIMD-based Sin/Cos/SinCos in WideLane with generic polynomial approximations for hardware independence. - Updated ScalarLane Cast to use CreateTruncating. - Applied AggressiveOptimization to key GGX methods; improved luma calculation and radical inverse LUT handling. - Enhanced GGX benchmark setup, cleanup, and timing logic. - Bumped project version to 1.3.1.
This commit is contained in:
@@ -7,7 +7,7 @@
|
|||||||
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
||||||
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
|
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
|
||||||
<Authors>Misaki</Authors>
|
<Authors>Misaki</Authors>
|
||||||
<AssemblyVersion>1.3.0</AssemblyVersion>
|
<AssemblyVersion>1.3.1</AssemblyVersion>
|
||||||
<Version>$(AssemblyVersion)</Version>
|
<Version>$(AssemblyVersion)</Version>
|
||||||
<PackageProjectUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</PackageProjectUrl>
|
<PackageProjectUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</PackageProjectUrl>
|
||||||
<RepositoryUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</RepositoryUrl>
|
<RepositoryUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</RepositoryUrl>
|
||||||
|
|||||||
@@ -171,7 +171,7 @@ public readonly unsafe struct ScalarLane<TNumber> : ISPMDLane<ScalarLane<TNumber
|
|||||||
where TOther : ISPMDLane<TOther, TOtherNumber>
|
where TOther : ISPMDLane<TOther, TOtherNumber>
|
||||||
where TOtherNumber : unmanaged, INumber<TOtherNumber>, IBinaryNumber<TOtherNumber>, IMinMaxValue<TOtherNumber>, IBitwiseOperators<TOtherNumber, TOtherNumber, TOtherNumber>
|
where TOtherNumber : unmanaged, INumber<TOtherNumber>, IBinaryNumber<TOtherNumber>, IMinMaxValue<TOtherNumber>, IBitwiseOperators<TOtherNumber, TOtherNumber, TOtherNumber>
|
||||||
{
|
{
|
||||||
return TOther.Create(TOtherNumber.CreateChecked(value));
|
return TOther.Create(TOtherNumber.CreateTruncating(value));
|
||||||
}
|
}
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
|||||||
@@ -578,58 +578,129 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static WideLane<TNumber> Sin(WideLane<TNumber> value)
|
public static WideLane<TNumber> Sin(WideLane<TNumber> value)
|
||||||
{
|
{
|
||||||
if (typeof(TNumber) == typeof(float))
|
var invPi = Create(TNumber.CreateTruncating(0.318309886f)); // 1 / PI
|
||||||
{
|
|
||||||
ref var v = ref Unsafe.As<WideLane<TNumber>, Vector<float>>(ref value);
|
|
||||||
var result = Vector.Sin(v);
|
|
||||||
return new WideLane<TNumber>(Unsafe.As<Vector<float>, Vector<TNumber>>(ref result));
|
|
||||||
}
|
|
||||||
else if (typeof(TNumber) == typeof(double))
|
|
||||||
{
|
|
||||||
ref var v = ref Unsafe.As<WideLane<TNumber>, Vector<double>>(ref value);
|
|
||||||
var result = Vector.Sin(v);
|
|
||||||
return new WideLane<TNumber>(Unsafe.As<Vector<double>, Vector<TNumber>>(ref result));
|
|
||||||
}
|
|
||||||
|
|
||||||
return value;
|
var x_sin = value;
|
||||||
|
var y_sin = x_sin * invPi;
|
||||||
|
var k_sin = Round(y_sin);
|
||||||
|
var z_sin = y_sin - k_sin;
|
||||||
|
|
||||||
|
var half = Create(TNumber.CreateTruncating(0.5f));
|
||||||
|
var two = Create(TNumber.CreateTruncating(2.0f));
|
||||||
|
|
||||||
|
var k_even_sin = Round(k_sin * half) * two;
|
||||||
|
var sign_sin = One - two * Abs(k_sin - k_even_sin);
|
||||||
|
|
||||||
|
var c1 = Create(TNumber.CreateTruncating(3.14159265f)); // PI
|
||||||
|
var c3 = Create(TNumber.CreateTruncating(-5.16771278f)); // -PI^3 / 6
|
||||||
|
var c5 = Create(TNumber.CreateTruncating(2.55016404f)); // PI^5 / 120
|
||||||
|
var c7 = Create(TNumber.CreateTruncating(-0.59926453f)); // -PI^7 / 5040
|
||||||
|
var c9 = Create(TNumber.CreateTruncating(0.08214589f)); // PI^9 / 362880
|
||||||
|
|
||||||
|
var z2_sin = z_sin * z_sin;
|
||||||
|
var poly_sin = MultipleAdd(z2_sin, c9, c7); // c7 + c9*z^2
|
||||||
|
poly_sin = MultipleAdd(z2_sin, poly_sin, c5); // c5 + z^2*(...)
|
||||||
|
poly_sin = MultipleAdd(z2_sin, poly_sin, c3); // c3 + z^2*(...)
|
||||||
|
poly_sin = MultipleAdd(z2_sin, poly_sin, c1); // c1 + z^2*(...)
|
||||||
|
poly_sin = z_sin * poly_sin; // z * (...)
|
||||||
|
|
||||||
|
return poly_sin * sign_sin;
|
||||||
}
|
}
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static WideLane<TNumber> Cos(WideLane<TNumber> value)
|
public static WideLane<TNumber> Cos(WideLane<TNumber> value)
|
||||||
{
|
{
|
||||||
if (typeof(TNumber) == typeof(float))
|
var halfPi = Create(TNumber.CreateTruncating(1.570796327f));
|
||||||
{
|
var invPi = Create(TNumber.CreateTruncating(0.318309886f)); // 1 / PI
|
||||||
ref var v = ref Unsafe.As<WideLane<TNumber>, Vector<float>>(ref value);
|
|
||||||
var result = Vector.Cos(v);
|
|
||||||
return new WideLane<TNumber>(Unsafe.As<Vector<float>, Vector<TNumber>>(ref result));
|
|
||||||
}
|
|
||||||
else if (typeof(TNumber) == typeof(double))
|
|
||||||
{
|
|
||||||
ref var v = ref Unsafe.As<WideLane<TNumber>, Vector<double>>(ref value);
|
|
||||||
var result = Vector.Cos(v);
|
|
||||||
return new WideLane<TNumber>(Unsafe.As<Vector<double>, Vector<TNumber>>(ref result));
|
|
||||||
}
|
|
||||||
|
|
||||||
return value;
|
var x_cos = value + halfPi;
|
||||||
|
var y_cos = x_cos * invPi;
|
||||||
|
var k_cos = Round(y_cos);
|
||||||
|
var z_cos = y_cos - k_cos;
|
||||||
|
|
||||||
|
var half = Create(TNumber.CreateTruncating(0.5f));
|
||||||
|
var two = Create(TNumber.CreateTruncating(2.0f));
|
||||||
|
|
||||||
|
var k_even_cos = Round(k_cos * half) * two;
|
||||||
|
var sign_cos = One - two * Abs(k_cos - k_even_cos);
|
||||||
|
|
||||||
|
var c1 = Create(TNumber.CreateTruncating(3.14159265f)); // PI
|
||||||
|
var c3 = Create(TNumber.CreateTruncating(-5.16771278f)); // -PI^3 / 6
|
||||||
|
var c5 = Create(TNumber.CreateTruncating(2.55016404f)); // PI^5 / 120
|
||||||
|
var c7 = Create(TNumber.CreateTruncating(-0.59926453f)); // -PI^7 / 5040
|
||||||
|
var c9 = Create(TNumber.CreateTruncating(0.08214589f)); // PI^9 / 362880
|
||||||
|
|
||||||
|
var z2_cos = z_cos * z_cos;
|
||||||
|
var poly_cos = MultipleAdd(z2_cos, c9, c7);
|
||||||
|
poly_cos = MultipleAdd(z2_cos, poly_cos, c5);
|
||||||
|
poly_cos = MultipleAdd(z2_cos, poly_cos, c3);
|
||||||
|
poly_cos = MultipleAdd(z2_cos, poly_cos, c1);
|
||||||
|
poly_cos = z_cos * poly_cos;
|
||||||
|
|
||||||
|
return poly_cos * sign_cos;
|
||||||
}
|
}
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static (WideLane<TNumber> sin, WideLane<TNumber> cos) SinCos(WideLane<TNumber> value)
|
public static (WideLane<TNumber> sin, WideLane<TNumber> cos) SinCos(WideLane<TNumber> value)
|
||||||
{
|
{
|
||||||
if (typeof(TNumber) == typeof(float))
|
var halfPi = Create(TNumber.CreateTruncating(1.570796327f));
|
||||||
{
|
var invPi = Create(TNumber.CreateTruncating(0.318309886f)); // 1 / PI
|
||||||
ref var v = ref Unsafe.As<WideLane<TNumber>, Vector<float>>(ref value);
|
|
||||||
var (sin, cos) = Vector.SinCos(v);
|
|
||||||
return (new WideLane<TNumber>(Unsafe.As<Vector<float>, Vector<TNumber>>(ref sin)), new WideLane<TNumber>(Unsafe.As<Vector<float>, Vector<TNumber>>(ref cos)));
|
|
||||||
}
|
|
||||||
else if (typeof(TNumber) == typeof(double))
|
|
||||||
{
|
|
||||||
ref var v = ref Unsafe.As<WideLane<TNumber>, Vector<double>>(ref value);
|
|
||||||
var (sin, cos) = Vector.SinCos(v);
|
|
||||||
return (new WideLane<TNumber>(Unsafe.As<Vector<double>, Vector<TNumber>>(ref sin)), new WideLane<TNumber>(Unsafe.As<Vector<double>, Vector<TNumber>>(ref cos)));
|
|
||||||
}
|
|
||||||
|
|
||||||
return (value, value);
|
var x_sin = value;
|
||||||
|
var x_cos = value + halfPi;
|
||||||
|
|
||||||
|
// Range Reduction
|
||||||
|
// We map any angle to the interval [-0.5, 0.5] (corresponding to the actual angle range [-PI/2, PI/2])
|
||||||
|
// y = x * (1 / PI)
|
||||||
|
var y_sin = x_sin * invPi;
|
||||||
|
var y_cos = x_cos * invPi;
|
||||||
|
|
||||||
|
// k = Round(y)
|
||||||
|
var k_sin = Round(y_sin);
|
||||||
|
var k_cos = Round(y_cos);
|
||||||
|
|
||||||
|
// z = y - k (Now, the range of z is perfectly reduced to [-0.5, 0.5])
|
||||||
|
var z_sin = y_sin - k_sin;
|
||||||
|
var z_cos = y_cos - k_cos;
|
||||||
|
|
||||||
|
// 2. Branchless Sign Flip
|
||||||
|
// Mathematical principle: Sin(x + k*PI) = Sin(x) * (-1)^k
|
||||||
|
// We need to compute (-1)^k. To avoid inefficient bit operations or branches, we compute it with floating-point math:
|
||||||
|
// sign = 1.0 - 2.0 * Abs(k - 2.0 * Round(k * 0.5))
|
||||||
|
var half = Create(TNumber.CreateTruncating(0.5f));
|
||||||
|
var two = Create(TNumber.CreateTruncating(2.0f));
|
||||||
|
var one = One;
|
||||||
|
|
||||||
|
var k_even_sin = Round(k_sin * half) * two;
|
||||||
|
var sign_sin = one - two * Abs(k_sin - k_even_sin);
|
||||||
|
|
||||||
|
var k_even_cos = Round(k_cos * half) * two;
|
||||||
|
var sign_cos = one - two * Abs(k_cos - k_even_cos);
|
||||||
|
|
||||||
|
// 3. Taylor/Remez Polynomial for Sin(PI * z)
|
||||||
|
// For z in [-0.5, 0.5],Calculate sin(PI * z)
|
||||||
|
// z * (C1 + z^2 * (C3 + z^2 * (C5 + z^2 * (C7 + z^2 * C9))))
|
||||||
|
var c1 = Create(TNumber.CreateTruncating(3.14159265f)); // PI
|
||||||
|
var c3 = Create(TNumber.CreateTruncating(-5.16771278f)); // -PI^3 / 6
|
||||||
|
var c5 = Create(TNumber.CreateTruncating(2.55016404f)); // PI^5 / 120
|
||||||
|
var c7 = Create(TNumber.CreateTruncating(-0.59926453f)); // -PI^7 / 5040
|
||||||
|
var c9 = Create(TNumber.CreateTruncating(0.08214589f)); // PI^9 / 362880
|
||||||
|
|
||||||
|
var z2_sin = z_sin * z_sin;
|
||||||
|
var poly_sin = MultipleAdd(z2_sin, c9, c7); // c7 + c9*z^2
|
||||||
|
poly_sin = MultipleAdd(z2_sin, poly_sin, c5); // c5 + z^2*(...)
|
||||||
|
poly_sin = MultipleAdd(z2_sin, poly_sin, c3); // c3 + z^2*(...)
|
||||||
|
poly_sin = MultipleAdd(z2_sin, poly_sin, c1); // c1 + z^2*(...)
|
||||||
|
poly_sin = z_sin * poly_sin; // z * (...)
|
||||||
|
|
||||||
|
var z2_cos = z_cos * z_cos;
|
||||||
|
var poly_cos = MultipleAdd(z2_cos, c9, c7);
|
||||||
|
poly_cos = MultipleAdd(z2_cos, poly_cos, c5);
|
||||||
|
poly_cos = MultipleAdd(z2_cos, poly_cos, c3);
|
||||||
|
poly_cos = MultipleAdd(z2_cos, poly_cos, c1);
|
||||||
|
poly_cos = z_cos * poly_cos;
|
||||||
|
|
||||||
|
return (poly_sin * sign_sin, poly_cos * sign_cos);
|
||||||
}
|
}
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
|||||||
@@ -1,4 +1,6 @@
|
|||||||
using BenchmarkDotNet.Attributes;
|
using BenchmarkDotNet.Attributes;
|
||||||
|
using BenchmarkDotNet.Diagnosers;
|
||||||
|
using BenchmarkDotNet.Engines;
|
||||||
using Misaki.HighPerformance.Image;
|
using Misaki.HighPerformance.Image;
|
||||||
using Misaki.HighPerformance.Jobs;
|
using Misaki.HighPerformance.Jobs;
|
||||||
using Misaki.HighPerformance.Mathematics;
|
using Misaki.HighPerformance.Mathematics;
|
||||||
@@ -41,7 +43,7 @@ internal unsafe struct GGXMipGenerationJobSPMD<TFloat, TInt> : IJobParallelFor
|
|||||||
return bits * 2.3283064365386963e-10f; // bits / 0x100000000
|
return bits * 2.3283064365386963e-10f; // bits / 0x100000000
|
||||||
}
|
}
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||||
private static Vector2<TFloat, float> Hammersley(TFloat i, uint N, float* lut)
|
private static Vector2<TFloat, float> Hammersley(TFloat i, uint N, float* lut)
|
||||||
{
|
{
|
||||||
var x = i / N;
|
var x = i / N;
|
||||||
@@ -50,7 +52,7 @@ internal unsafe struct GGXMipGenerationJobSPMD<TFloat, TInt> : IJobParallelFor
|
|||||||
}
|
}
|
||||||
|
|
||||||
// --- GGX Importance Sampling ---
|
// --- GGX Importance Sampling ---
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||||
private static Vector3<TFloat, float> ImportanceSampleGGX(Vector2<TFloat, float> Xi, Vector3<TFloat, float> N, float roughness)
|
private static Vector3<TFloat, float> ImportanceSampleGGX(Vector2<TFloat, float> Xi, Vector3<TFloat, float> N, float roughness)
|
||||||
{
|
{
|
||||||
var a = roughness * roughness; // Disney/Epic remap roughness for better visual linearity
|
var a = roughness * roughness; // Disney/Epic remap roughness for better visual linearity
|
||||||
@@ -82,7 +84,7 @@ internal unsafe struct GGXMipGenerationJobSPMD<TFloat, TInt> : IJobParallelFor
|
|||||||
|
|
||||||
// --- Image Sampling Helpers ---
|
// --- Image Sampling Helpers ---
|
||||||
// Maps a 3D direction vector to 2D equirectangular UVs
|
// Maps a 3D direction vector to 2D equirectangular UVs
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||||
private static Vector2<TFloat, float> DirToEquirectangularUV(Vector3<TFloat, float> dir)
|
private static Vector2<TFloat, float> DirToEquirectangularUV(Vector3<TFloat, float> dir)
|
||||||
{
|
{
|
||||||
var u = TFloat.Atan2(dir.z, dir.x);
|
var u = TFloat.Atan2(dir.z, dir.x);
|
||||||
@@ -94,7 +96,7 @@ internal unsafe struct GGXMipGenerationJobSPMD<TFloat, TInt> : IJobParallelFor
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Samples the source HDR image using bilinear interpolation (simplified to nearest neighbor for brevity here)
|
// Samples the source HDR image using bilinear interpolation (simplified to nearest neighbor for brevity here)
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.NoInlining | MethodImplOptions.AggressiveOptimization)]
|
||||||
private static Vector3<TFloat, float> SampleEquirectangularMap(float* img, int w, int h, Vector3<TFloat, float> dir)
|
private static Vector3<TFloat, float> SampleEquirectangularMap(float* img, int w, int h, Vector3<TFloat, float> dir)
|
||||||
{
|
{
|
||||||
var uv = DirToEquirectangularUV(dir);
|
var uv = DirToEquirectangularUV(dir);
|
||||||
@@ -112,6 +114,7 @@ internal unsafe struct GGXMipGenerationJobSPMD<TFloat, TInt> : IJobParallelFor
|
|||||||
return MathV.GatherVector3<TFloat, float>(img, idx.GetUnsafePtr(), 1);
|
return MathV.GatherVector3<TFloat, float>(img, idx.GetUnsafePtr(), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveOptimization)]
|
||||||
public void Execute(int loopIndex, ref readonly JobExecutionContext ctx)
|
public void Execute(int loopIndex, ref readonly JobExecutionContext ctx)
|
||||||
{
|
{
|
||||||
var m = 0;
|
var m = 0;
|
||||||
@@ -157,17 +160,14 @@ internal unsafe struct GGXMipGenerationJobSPMD<TFloat, TInt> : IJobParallelFor
|
|||||||
TFloat.Create(V.z)
|
TFloat.Create(V.z)
|
||||||
);
|
);
|
||||||
|
|
||||||
//var vPrefilteredColorX = TFloat.Zero;
|
|
||||||
//var vPrefilteredColorY = TFloat.Zero;
|
|
||||||
//var vPrefilteredColorZ = TFloat.Zero;
|
|
||||||
var vPrefilteredColor = Vector3<TFloat, float>.Zero;
|
var vPrefilteredColor = Vector3<TFloat, float>.Zero;
|
||||||
var vTotalWeight = TFloat.Zero;
|
var vTotalWeight = TFloat.Zero;
|
||||||
|
|
||||||
// 3. Monte Carlo Integration Loop
|
// 3. Monte Carlo Integration Loop
|
||||||
// We assume WideLane is supported in the test.
|
|
||||||
|
|
||||||
var dynamicSampleCount = (uint)max(1.0f, SAMPLE_COUNT * pLevel->roughness);
|
var dynamicSampleCount = (uint)max(1.0f, SAMPLE_COUNT * pLevel->roughness);
|
||||||
var vDynamicSampleCount = TFloat.Create(dynamicSampleCount);
|
var vDynamicSampleCount = TFloat.Create(dynamicSampleCount);
|
||||||
|
var vLumaVector = MathV.Create<TFloat, float>(0.2126f, 0.7152f, 0.0722f);
|
||||||
|
|
||||||
for (var i = 0u; i < dynamicSampleCount; i += (uint)TFloat.LaneWidth)
|
for (var i = 0u; i < dynamicSampleCount; i += (uint)TFloat.LaneWidth)
|
||||||
{
|
{
|
||||||
@@ -194,7 +194,7 @@ internal unsafe struct GGXMipGenerationJobSPMD<TFloat, TInt> : IJobParallelFor
|
|||||||
// A sun pixel (luma 1000.0) gets a tiny weight of ~0.001, naturally suppressing it.
|
// A sun pixel (luma 1000.0) gets a tiny weight of ~0.001, naturally suppressing it.
|
||||||
// This introduce bias, but significantly reduces fireflies without needing solid angle sampling or cdf inversion.
|
// This introduce bias, but significantly reduces fireflies without needing solid angle sampling or cdf inversion.
|
||||||
// And since this is a mip generation step, a little bias is acceptable for much better performance and stability.
|
// And since this is a mip generation step, a little bias is acceptable for much better performance and stability.
|
||||||
var luma = MathV.Dot(sampleColor, MathV.Create<TFloat, float>(0.2126f, 0.7152f, 0.0722f));
|
var luma = MathV.Dot(sampleColor, vLumaVector);
|
||||||
var fireflyWeight = TFloat.One / (TFloat.One + luma);
|
var fireflyWeight = TFloat.One / (TFloat.One + luma);
|
||||||
var finalWeight = NdotL * fireflyWeight;
|
var finalWeight = NdotL * fireflyWeight;
|
||||||
|
|
||||||
@@ -381,6 +381,7 @@ internal unsafe struct GGXMipGenerationJob : IJobParallelFor
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[SimpleJob(RunStrategy.ColdStart, launchCount: 1, warmupCount: 0, iterationCount: 1, invocationCount: 1, id: "QuickRun")]
|
||||||
public unsafe class GGXMipGenerationBenchmark
|
public unsafe class GGXMipGenerationBenchmark
|
||||||
{
|
{
|
||||||
private ImageResultFloat _image;
|
private ImageResultFloat _image;
|
||||||
@@ -388,7 +389,7 @@ public unsafe class GGXMipGenerationBenchmark
|
|||||||
private int _totalPixel;
|
private int _totalPixel;
|
||||||
private float** _pResult;
|
private float** _pResult;
|
||||||
private MipLevel* _pMipLevels;
|
private MipLevel* _pMipLevels;
|
||||||
private float* radicalInverse_VdCLut;
|
private float* _radicalInverse_VdCLut;
|
||||||
|
|
||||||
private JobScheduler _jobScheduler = null!;
|
private JobScheduler _jobScheduler = null!;
|
||||||
|
|
||||||
@@ -434,10 +435,10 @@ public unsafe class GGXMipGenerationBenchmark
|
|||||||
ThreadPriority = ThreadPriority.Normal,
|
ThreadPriority = ThreadPriority.Normal,
|
||||||
};
|
};
|
||||||
|
|
||||||
radicalInverse_VdCLut = (float*)NativeMemory.Alloc(GGXMipGenerationJob.SAMPLE_COUNT * sizeof(float));
|
_radicalInverse_VdCLut = (float*)NativeMemory.Alloc(GGXMipGenerationJob.SAMPLE_COUNT * sizeof(float));
|
||||||
for (var i = 0u; i < GGXMipGenerationJob.SAMPLE_COUNT; i++)
|
for (var i = 0u; i < GGXMipGenerationJob.SAMPLE_COUNT; i++)
|
||||||
{
|
{
|
||||||
radicalInverse_VdCLut[i] = GGXMipGenerationJob.RadicalInverse_VdC(i);
|
_radicalInverse_VdCLut[i] = GGXMipGenerationJob.RadicalInverse_VdC(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
_jobScheduler = new JobScheduler(in desc);
|
_jobScheduler = new JobScheduler(in desc);
|
||||||
@@ -490,10 +491,12 @@ public unsafe class GGXMipGenerationBenchmark
|
|||||||
[GlobalCleanup]
|
[GlobalCleanup]
|
||||||
public void Cleanup()
|
public void Cleanup()
|
||||||
{
|
{
|
||||||
|
#if false
|
||||||
for (var i = 0; i < _mipLevels; i++)
|
for (var i = 0; i < _mipLevels; i++)
|
||||||
{
|
{
|
||||||
DumpMipLevelToPng(_pResult[i], (int)_pMipLevels[i].width, (int)_pMipLevels[i].height, $"C:\\Users\\Misaki\\Downloads\\Im\\mip_level_{i}.png");
|
DumpMipLevelToPng(_pResult[i], (int)_pMipLevels[i].width, (int)_pMipLevels[i].height, $"C:\\Users\\Misaki\\Downloads\\Im\\mip_level_{i}.png");
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
_image.Dispose();
|
_image.Dispose();
|
||||||
for (var i = 0; i < _mipLevels; i++)
|
for (var i = 0; i < _mipLevels; i++)
|
||||||
@@ -503,12 +506,12 @@ public unsafe class GGXMipGenerationBenchmark
|
|||||||
|
|
||||||
NativeMemory.Free(_pResult);
|
NativeMemory.Free(_pResult);
|
||||||
NativeMemory.Free(_pMipLevels);
|
NativeMemory.Free(_pMipLevels);
|
||||||
NativeMemory.Free(radicalInverse_VdCLut);
|
NativeMemory.Free(_radicalInverse_VdCLut);
|
||||||
|
|
||||||
_jobScheduler.Dispose();
|
_jobScheduler.Dispose();
|
||||||
}
|
}
|
||||||
|
|
||||||
[Benchmark]
|
[Benchmark(Baseline = true)]
|
||||||
public void JobGGX()
|
public void JobGGX()
|
||||||
{
|
{
|
||||||
JobHandle handle;
|
JobHandle handle;
|
||||||
@@ -519,7 +522,7 @@ public unsafe class GGXMipGenerationBenchmark
|
|||||||
image = _image,
|
image = _image,
|
||||||
pMipLevels = _pMipLevels,
|
pMipLevels = _pMipLevels,
|
||||||
numMipLevels = _mipLevels,
|
numMipLevels = _mipLevels,
|
||||||
radicalInverse_VdCLut = radicalInverse_VdCLut
|
radicalInverse_VdCLut = _radicalInverse_VdCLut
|
||||||
};
|
};
|
||||||
|
|
||||||
handle = _jobScheduler.ScheduleParallelFor(in job, _totalPixel, 64);
|
handle = _jobScheduler.ScheduleParallelFor(in job, _totalPixel, 64);
|
||||||
@@ -531,7 +534,7 @@ public unsafe class GGXMipGenerationBenchmark
|
|||||||
image = _image,
|
image = _image,
|
||||||
pMipLevels = _pMipLevels,
|
pMipLevels = _pMipLevels,
|
||||||
numMipLevels = _mipLevels,
|
numMipLevels = _mipLevels,
|
||||||
radicalInverse_VdCLut = radicalInverse_VdCLut
|
radicalInverse_VdCLut = _radicalInverse_VdCLut
|
||||||
};
|
};
|
||||||
|
|
||||||
handle = _jobScheduler.ScheduleParallelFor(in job, _totalPixel, 64);
|
handle = _jobScheduler.ScheduleParallelFor(in job, _totalPixel, 64);
|
||||||
@@ -548,7 +551,7 @@ public unsafe class GGXMipGenerationBenchmark
|
|||||||
image = _image,
|
image = _image,
|
||||||
pMipLevels = _pMipLevels,
|
pMipLevels = _pMipLevels,
|
||||||
numMipLevels = _mipLevels,
|
numMipLevels = _mipLevels,
|
||||||
radicalInverse_VdCLut = radicalInverse_VdCLut
|
radicalInverse_VdCLut = _radicalInverse_VdCLut
|
||||||
};
|
};
|
||||||
|
|
||||||
Parallel.For(0, _totalPixel, new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount - 1 }, i =>
|
Parallel.For(0, _totalPixel, new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount - 1 }, i =>
|
||||||
@@ -567,12 +570,10 @@ public unsafe class GGXMipGenerationBenchmark
|
|||||||
image = _image,
|
image = _image,
|
||||||
pMipLevels = _pMipLevels,
|
pMipLevels = _pMipLevels,
|
||||||
numMipLevels = _mipLevels,
|
numMipLevels = _mipLevels,
|
||||||
radicalInverse_VdCLut = radicalInverse_VdCLut
|
radicalInverse_VdCLut = _radicalInverse_VdCLut
|
||||||
};
|
};
|
||||||
|
|
||||||
//var handle = _jobScheduler.ScheduleParallelFor(in job, _totalPixel, 64);
|
|
||||||
//_jobScheduler.Wait(handle);
|
|
||||||
var ctx = new JobExecutionContext();
|
var ctx = new JobExecutionContext();
|
||||||
job.Run(_totalPixel, in ctx);
|
job.Run(_totalPixel, in ctx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,16 +8,22 @@ using System.Runtime.InteropServices;
|
|||||||
|
|
||||||
//BenchmarkRunner.Run<GGXMipGenerationBenchmark>();
|
//BenchmarkRunner.Run<GGXMipGenerationBenchmark>();
|
||||||
|
|
||||||
|
const int count = 1;
|
||||||
|
|
||||||
var bench = new GGXMipGenerationBenchmark();
|
var bench = new GGXMipGenerationBenchmark();
|
||||||
bench.Setup();
|
bench.Setup();
|
||||||
|
|
||||||
var sw = System.Diagnostics.Stopwatch.StartNew();
|
var sw = System.Diagnostics.Stopwatch.StartNew();
|
||||||
bench.JobGGX();
|
|
||||||
sw.Stop();
|
|
||||||
Console.WriteLine($"GGX Mip Generation: {sw.Elapsed.TotalMilliseconds} ms");
|
|
||||||
bench.Cleanup();
|
|
||||||
|
|
||||||
//Console.WriteLine(sw.Elapsed.TotalMilliseconds);
|
for (int i = 0; i < count; i++)
|
||||||
|
{
|
||||||
|
bench.JobGGX();
|
||||||
|
}
|
||||||
|
|
||||||
|
sw.Stop();
|
||||||
|
var avgTime = sw.Elapsed.TotalMilliseconds / count;
|
||||||
|
Console.WriteLine($"GGX Mip Generation: {avgTime} ms");
|
||||||
|
bench.Cleanup();
|
||||||
|
|
||||||
//AllocationManager.Initialize(AllocationManagerInitOpts.Default);
|
//AllocationManager.Initialize(AllocationManagerInitOpts.Default);
|
||||||
//var set = new UnsafeBitSet(100, AllocationHandle.Persistent, AllocationOption.Clear);
|
//var set = new UnsafeBitSet(100, AllocationHandle.Persistent, AllocationOption.Clear);
|
||||||
|
|||||||
Reference in New Issue
Block a user