Removed unmanaged struct requirement from SPMD job wrappers and extension methods, allowing managed types. Updated all wrappers and extension methods to require only the interface constraint. Refactored SPMD test jobs to use safe ref-based Store overloads. Improved README and docs with clearer debug/mimalloc instructions and a better SPMD example. Cleaned up Program.cs by removing obsolete experimental code. Enhanced math precision in GGXMipGenerationBenchmark. Updated T4 template to generate new constraints and APIs.
579 lines
20 KiB
C#
579 lines
20 KiB
C#
using BenchmarkDotNet.Attributes;
|
|
using BenchmarkDotNet.Engines;
|
|
using Misaki.HighPerformance.Image;
|
|
using Misaki.HighPerformance.Jobs;
|
|
using Misaki.HighPerformance.Mathematics;
|
|
using Misaki.HighPerformance.Mathematics.SPMD;
|
|
using SkiaSharp;
|
|
using System.Runtime.CompilerServices;
|
|
using System.Runtime.InteropServices;
|
|
using static Misaki.HighPerformance.Mathematics.math;
|
|
|
|
//using TFloat = Misaki.HighPerformance.Mathematics.SPMD.WideLane<float>;
|
|
//using TInt = Misaki.HighPerformance.Mathematics.SPMD.WideLane<int>;
|
|
|
|
namespace Misaki.HighPerformance.Test.Benchmark;
|
|
|
|
internal unsafe struct MipLevel
|
|
{
|
|
public float* data;
|
|
public uint width;
|
|
public uint height;
|
|
public int offset;
|
|
public float roughness;
|
|
}
|
|
|
|
//internal unsafe struct GGXMipGenerationJobSPMD : IJobParallelFor
|
|
internal unsafe struct GGXMipGenerationJobSPMD<TFloat, TInt> : IJobParallelFor
|
|
where TFloat : unmanaged, ISPMDLane<TFloat, float>
|
|
where TInt : unmanaged, ISPMDLane<TInt, int>
|
|
{
|
|
public const uint SAMPLE_COUNT = 1024u;
|
|
|
|
public ImageResultFloat image;
|
|
public MipLevel* pMipLevels;
|
|
public float* radicalInverse_VdCLut;
|
|
public int numMipLevels;
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
|
public static float RadicalInverse_VdC(uint bits)
|
|
{
|
|
bits = (bits << 16) | (bits >> 16);
|
|
bits = ((bits & 0x55555555u) << 1) | ((bits & 0xAAAAAAAAu) >> 1);
|
|
bits = ((bits & 0x33333333u) << 2) | ((bits & 0xCCCCCCCCu) >> 2);
|
|
bits = ((bits & 0x0F0F0F0Fu) << 4) | ((bits & 0xF0F0F0F0u) >> 4);
|
|
bits = ((bits & 0x00FF00FFu) << 8) | ((bits & 0xFF00FF00u) >> 8);
|
|
return bits * 2.3283064365386963e-10f; // bits / 0x100000000
|
|
}
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
|
private static Vector2<TFloat, float> Hammersley(TFloat i, uint N, float* lut)
|
|
{
|
|
var x = i / N;
|
|
var y = TFloat.Load(lut + (int)i[0]);
|
|
return MathV.Create<TFloat, float>(x, y);
|
|
}
|
|
|
|
// --- GGX Importance Sampling ---
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
|
private static Vector3<TFloat, float> ImportanceSampleGGX(Vector2<TFloat, float> Xi, Vector3<TFloat, float> N, float roughness)
|
|
{
|
|
var a = roughness * roughness; // Disney/Epic remap roughness for better visual linearity
|
|
|
|
var phi = 2.0f * PI * Xi.x;
|
|
|
|
var cosTheta = TFloat.Sqrt((1.0f - Xi.y) / TFloat.MultipleAdd(a * a - 1.0f, Xi.y, 1.0f));
|
|
var sinTheta = TFloat.Sqrt(1.0f - cosTheta * cosTheta);
|
|
|
|
// Spherical to Cartesian coordinates (Halfway vector)
|
|
TFloat.SinCos(phi, out var sinPhi, out var cosPhi);
|
|
var H = MathV.Create<TFloat, float>(cosPhi * sinTheta, sinPhi * sinTheta, cosTheta);
|
|
|
|
// Tangent space to World space
|
|
var mask = TFloat.Abs(N.z) < 0.999f;
|
|
var up = MathV.Select(mask, MathV.Create<TFloat, float>(0.0f, 0.0f, 1.0f), MathV.Create<TFloat, float>(1.0f, 0.0f, 0.0f));
|
|
|
|
var tangent = MathV.Normalize(MathV.Cross(up, N));
|
|
var bitangent = MathV.Cross(N, tangent);
|
|
|
|
var sampleVec = (tangent * H.x) + (bitangent * H.y) + (N * H.z);
|
|
return MathV.Normalize(sampleVec);
|
|
}
|
|
|
|
// --- Image Sampling Helpers ---
|
|
// Maps a 3D direction vector to 2D equirectangular UVs
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
|
private static Vector2<TFloat, float> DirToEquirectangularUV(Vector3<TFloat, float> dir)
|
|
{
|
|
var u = TFloat.Atan2(dir.z, dir.x);
|
|
var v = TFloat.Asin(dir.y);
|
|
|
|
u = u / (2.0f * PI) + 0.5f;
|
|
v = v / PI + 0.5f;
|
|
return MathV.Create<TFloat, float>(u, v);
|
|
}
|
|
|
|
// Samples the source HDR image using bilinear interpolation (simplified to nearest neighbor for brevity here)
|
|
// Do not inline this function to avoid register pressure and code bloat in the main sampling loop, as this is a relatively heavy operation and we want to keep it separate for better instruction cache usage.
|
|
[MethodImpl(MethodImplOptions.NoInlining | MethodImplOptions.AggressiveOptimization)]
|
|
private static Vector3<TFloat, float> SampleEquirectangularMap(float* img, int w, int h, Vector3<TFloat, float> dir)
|
|
{
|
|
var uv = DirToEquirectangularUV(dir);
|
|
|
|
// Nearest neighbor pixel coordinates
|
|
var px = (uv.x * (w - 1.0f)).Cast<TInt, int>();
|
|
var py = (uv.y * (h - 1.0f)).Cast<TInt, int>();
|
|
|
|
// Clamp
|
|
px = TInt.Clamp(px, TInt.Zero, w - 1);
|
|
py = TInt.Clamp(py, TInt.Zero, h - 1);
|
|
|
|
// Assuming float RGB array format
|
|
var idx = (py * w + px) * 3;
|
|
return MathV.GatherVector3<TFloat, float>(img, idx.GetUnsafePtr(), 1);
|
|
}
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveOptimization)]
|
|
public void Execute(int loopIndex, ref readonly JobExecutionContext ctx)
|
|
{
|
|
var m = 0;
|
|
while (m < numMipLevels - 1 && loopIndex >= pMipLevels[m + 1].offset)
|
|
{
|
|
m++;
|
|
}
|
|
|
|
var span = new ReadOnlySpan<MipLevel>(pMipLevels, numMipLevels);
|
|
var pLevel = &pMipLevels[m];
|
|
|
|
var w = (int)pLevel->width;
|
|
var h = (int)pLevel->height;
|
|
var pData = pLevel->data;
|
|
|
|
var local_i = loopIndex - pLevel->offset;
|
|
var x = local_i % w;
|
|
var y = local_i / w;
|
|
var u = (float)x / (w - 1);
|
|
var v = (float)y / (h - 1);
|
|
|
|
var phi = (u - 0.5f) * 2.0f * PI;
|
|
var theta = (v - 0.5f) * PI;
|
|
|
|
sincos(theta, out var sinTheta, out var cosTheta);
|
|
sincos(phi, out var sinPhi, out var cosPhi);
|
|
var N = float3(cosTheta * cosPhi, sinTheta, cosTheta * sinPhi);
|
|
N = normalize(N);
|
|
|
|
// For split-sum, we assume View and Reflection directions equal the Normal
|
|
var V = N;
|
|
var R = N;
|
|
|
|
var vN = MathV.Create<TFloat, float>(
|
|
TFloat.Create(N.x),
|
|
TFloat.Create(N.y),
|
|
TFloat.Create(N.z)
|
|
);
|
|
|
|
var vV = MathV.Create<TFloat, float>(
|
|
TFloat.Create(V.x),
|
|
TFloat.Create(V.y),
|
|
TFloat.Create(V.z)
|
|
);
|
|
|
|
var vPrefilteredColor = Vector3<TFloat, float>.Zero;
|
|
var vTotalWeight = TFloat.Zero;
|
|
|
|
// 3. Monte Carlo Integration Loop
|
|
|
|
var dynamicSampleCount = (uint)max(1.0f, SAMPLE_COUNT * pLevel->roughness);
|
|
var vDynamicSampleCount = TFloat.Create(dynamicSampleCount);
|
|
var vLumaVector = MathV.Create<TFloat, float>(0.2126f, 0.7152f, 0.0722f);
|
|
|
|
for (var i = 0u; i < dynamicSampleCount; i += (uint)TFloat.LaneWidth)
|
|
{
|
|
var laneIndices = TFloat.Sequence(i, 1.0f);
|
|
var validLaneMask = laneIndices < vDynamicSampleCount;
|
|
|
|
// Generate a Hammersley random sequence point
|
|
var Xi = Hammersley(laneIndices, dynamicSampleCount, radicalInverse_VdCLut);
|
|
|
|
// Get the halfway vector based on GGX NDF
|
|
var H = ImportanceSampleGGX(Xi, vN, pLevel->roughness);
|
|
|
|
// Calculate Light direction
|
|
var L = MathV.Reflect(-vV, H);
|
|
L = MathV.Normalize(L);
|
|
|
|
var NdotL = TFloat.Max(MathV.Dot(vN, L), TFloat.Zero);
|
|
var sampleColor = SampleEquirectangularMap(image.Data, (int)image.Width, (int)image.Height, L);
|
|
|
|
NdotL &= validLaneMask;
|
|
|
|
// The Karis Average Weight: 1 / (1 + luma)
|
|
// A normal sky pixel (luma 1.0) gets a weight of 0.5.
|
|
// A sun pixel (luma 1000.0) gets a tiny weight of ~0.001, naturally suppressing it.
|
|
// This introduce bias, but significantly reduces fireflies without needing solid angle sampling or cdf inversion.
|
|
// And since this is a mip generation step, a little bias is acceptable for much better performance and stability.
|
|
var luma = MathV.Dot(sampleColor, vLumaVector);
|
|
var fireflyWeight = TFloat.One / (TFloat.One + luma);
|
|
var finalWeight = NdotL * fireflyWeight;
|
|
|
|
vPrefilteredColor += sampleColor * finalWeight;
|
|
vTotalWeight += finalWeight;
|
|
}
|
|
|
|
var totalWeight = 0.0f;
|
|
var prefilteredColor = float3(0, 0, 0);
|
|
|
|
for (var i = 0; i < TFloat.LaneWidth; i++)
|
|
{
|
|
prefilteredColor.x += vPrefilteredColor.x[i];
|
|
prefilteredColor.y += vPrefilteredColor.y[i];
|
|
prefilteredColor.z += vPrefilteredColor.z[i];
|
|
totalWeight += vTotalWeight[i];
|
|
}
|
|
|
|
// 4. Average the result
|
|
if (totalWeight > 0.0f)
|
|
{
|
|
prefilteredColor *= 1.0f / totalWeight;
|
|
}
|
|
|
|
// Write to output mip array
|
|
var out_idx = (y * w + x) * 3;
|
|
pData[out_idx] = prefilteredColor.x;
|
|
pData[out_idx + 1] = prefilteredColor.y;
|
|
pData[out_idx + 2] = prefilteredColor.z;
|
|
}
|
|
}
|
|
|
|
internal unsafe struct GGXMipGenerationJob : IJobParallelFor
|
|
{
|
|
public const uint SAMPLE_COUNT = 1024u;
|
|
|
|
public ImageResultFloat image;
|
|
public MipLevel* pMipLevels;
|
|
public float* radicalInverse_VdCLut;
|
|
public int numMipLevels;
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static float RadicalInverse_VdC(uint bits)
|
|
{
|
|
bits = (bits << 16) | (bits >> 16);
|
|
bits = ((bits & 0x55555555u) << 1) | ((bits & 0xAAAAAAAAu) >> 1);
|
|
bits = ((bits & 0x33333333u) << 2) | ((bits & 0xCCCCCCCCu) >> 2);
|
|
bits = ((bits & 0x0F0F0F0Fu) << 4) | ((bits & 0xF0F0F0F0u) >> 4);
|
|
bits = ((bits & 0x00FF00FFu) << 8) | ((bits & 0xFF00FF00u) >> 8);
|
|
return bits * 2.3283064365386963e-10f; // bits / 0x100000000
|
|
}
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
private static float2 Hammersley(uint i, uint N, float* lut)
|
|
{
|
|
return float2((float)i / N, lut[i]);
|
|
}
|
|
|
|
// --- GGX Importance Sampling ---
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
private static float3 ImportanceSampleGGX(float2 Xi, float3 N, float roughness)
|
|
{
|
|
var a = roughness * roughness; // Disney/Epic remap roughness for better visual linearity
|
|
|
|
var phi = 2.0f * PI * Xi.x;
|
|
var cosTheta = sqrt((1.0f - Xi.y) / (1.0f + (a * a - 1.0f) * Xi.y));
|
|
var sinTheta = sqrt(1.0f - cosTheta * cosTheta);
|
|
|
|
// Spherical to Cartesian coordinates (Halfway vector)
|
|
sincos(phi, out var sinPhi, out var cosPhi);
|
|
var H = float3(cosPhi * sinTheta, sinPhi * sinTheta, cosTheta);
|
|
|
|
// Tangent space to World space
|
|
var up = abs(N.z) < 0.999f ? float3(0.0f, 0.0f, 1.0f) : float3(1.0f, 0.0f, 0.0f);
|
|
var tangent = normalize(cross(up, N));
|
|
var bitangent = cross(N, tangent);
|
|
|
|
var sampleVec = (tangent * H.x) + (bitangent * H.y) + (N * H.z);
|
|
return normalize(sampleVec);
|
|
}
|
|
|
|
// --- Image Sampling Helpers ---
|
|
// Maps a 3D direction vector to 2D equirectangular UVs
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
private static float2 DirToEquirectangularUV(float3 dir)
|
|
{
|
|
var uv = float2(atan2(dir.z, dir.x), asin(dir.y));
|
|
uv.x = uv.x / (2.0f * PI) + 0.5f;
|
|
uv.y = uv.y / PI + 0.5f;
|
|
return uv;
|
|
}
|
|
|
|
// Samples the source HDR image using bilinear interpolation (simplified to nearest neighbor for brevity here)
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
private static float3 SampleEquirectangularMap(float* img, int w, int h, float3 dir)
|
|
{
|
|
var uv = DirToEquirectangularUV(dir);
|
|
|
|
// Nearest neighbor pixel coordinates
|
|
var px = (int)(uv.x * (w - 1));
|
|
var py = (int)(uv.y * (h - 1));
|
|
|
|
// Clamp
|
|
px = clamp(px, 0, w - 1);
|
|
py = clamp(py, 0, h - 1);
|
|
|
|
// Assuming float RGB array format
|
|
var idx = (py * w + px) * 3;
|
|
return float3(img[idx], img[idx + 1], img[idx + 2]);
|
|
}
|
|
|
|
public void Execute(int loopIndex, ref readonly JobExecutionContext ctx)
|
|
{
|
|
var m = 0;
|
|
while (m < numMipLevels - 1 && loopIndex >= pMipLevels[m + 1].offset)
|
|
{
|
|
m++;
|
|
}
|
|
|
|
var pLevel = &pMipLevels[m];
|
|
|
|
var w = (int)pLevel->width;
|
|
var h = (int)pLevel->height;
|
|
var pData = pLevel->data;
|
|
|
|
var local_i = loopIndex - pLevel->offset;
|
|
var x = local_i % w;
|
|
var y = local_i / w;
|
|
var u = (float)x / (w - 1);
|
|
var v = (float)y / (h - 1);
|
|
|
|
var phi = (u - 0.5f) * 2.0f * PI;
|
|
var theta = (v - 0.5f) * PI;
|
|
|
|
sincos(theta, out var sinTheta, out var cosTheta);
|
|
sincos(phi, out var sinPhi, out var cosPhi);
|
|
var N = float3(cosTheta * cosPhi, sinTheta, cosTheta * sinPhi);
|
|
N = normalize(N);
|
|
|
|
// For split-sum, we assume View and Reflection directions equal the Normal
|
|
var V = N;
|
|
var R = N;
|
|
|
|
var prefilteredColor = float3(0, 0, 0);
|
|
var totalWeight = 0.0f;
|
|
|
|
// 3. Monte Carlo Integration Loop
|
|
var dynamicSampleCount = (uint)max(1.0f, SAMPLE_COUNT * pLevel->roughness);
|
|
for (var i = 0u; i < dynamicSampleCount; i++)
|
|
{
|
|
// Generate a Hammersley random sequence point
|
|
var Xi = Hammersley(i, dynamicSampleCount, radicalInverse_VdCLut);
|
|
|
|
// Get the halfway vector based on GGX NDF
|
|
var H = ImportanceSampleGGX(Xi, N, pLevel->roughness);
|
|
|
|
// Calculate Light direction
|
|
var L = reflect(-V, H);
|
|
L = normalize(L);
|
|
|
|
var NdotL = max(dot(N, L), 0.0f);
|
|
|
|
// If light is above the horizon
|
|
if (NdotL > 0.0f)
|
|
{
|
|
var sampleColor = SampleEquirectangularMap(image.Data, (int)image.Width, (int)image.Height, L);
|
|
|
|
prefilteredColor += sampleColor * NdotL;
|
|
totalWeight += NdotL;
|
|
}
|
|
}
|
|
|
|
// 4. Average the result
|
|
if (totalWeight > 0.0f)
|
|
{
|
|
prefilteredColor *= 1.0f / totalWeight;
|
|
}
|
|
|
|
// Write to output mip array
|
|
var out_idx = (y * w + x) * 3;
|
|
pData[out_idx] = prefilteredColor.x;
|
|
pData[out_idx + 1] = prefilteredColor.y;
|
|
pData[out_idx + 2] = prefilteredColor.z;
|
|
}
|
|
}
|
|
|
|
[SimpleJob(RunStrategy.ColdStart, launchCount: 1, warmupCount: 0, iterationCount: 1, invocationCount: 1, id: "QuickRun")]
|
|
public unsafe class GGXMipGenerationBenchmark
|
|
{
|
|
private ImageResultFloat _image;
|
|
private int _mipLevels;
|
|
private int _totalPixel;
|
|
private float** _pResult;
|
|
private MipLevel* _pMipLevels;
|
|
private float* _radicalInverse_VdCLut;
|
|
|
|
private JobScheduler _jobScheduler = null!;
|
|
|
|
[GlobalSetup]
|
|
public void Setup()
|
|
{
|
|
const string imagePath = "F:\\c\\SimpleRayTracer\\native\\assets\\hdri\\golden_gate_hills_1k.hdr";
|
|
//const string imagePath = "C:\\Users\\Misaki\\Downloads\\grasslands_sunset_4k.hdr";
|
|
using var stream = new FileStream(imagePath, FileMode.Open, FileAccess.Read);
|
|
_image = ImageResultFloat.FromStream(stream, ColorComponents.RGB);
|
|
|
|
_mipLevels = (int)Math.Floor(Math.Log2(Math.Max(_image.Width, _image.Height))) + 1;
|
|
|
|
_pResult = (float**)NativeMemory.Alloc((nuint)(_mipLevels * sizeof(float*)));
|
|
_pMipLevels = (MipLevel*)NativeMemory.Alloc((nuint)(_mipLevels * sizeof(MipLevel)));
|
|
|
|
uint w, h;
|
|
|
|
for (var i = 0; i < _mipLevels; i++)
|
|
{
|
|
w = Math.Max(1, _image.Width >> i);
|
|
h = Math.Max(1, _image.Height >> i);
|
|
|
|
var sizeInBytes = (nuint)(w * h * 3 * sizeof(float));
|
|
_pResult[i] = (float*)NativeMemory.Alloc(sizeInBytes);
|
|
|
|
_pMipLevels[i] = new MipLevel
|
|
{
|
|
width = w,
|
|
height = h,
|
|
offset = _totalPixel,
|
|
data = _pResult[i],
|
|
roughness = (float)i / (_mipLevels - 1) // Linear roughness from 0 to 1 across mip levels
|
|
};
|
|
|
|
_totalPixel += (int)(w * h);
|
|
}
|
|
|
|
var desc = new JobSchedulerDesc
|
|
{
|
|
DependencyChainCapacity = 16,
|
|
ThreadCount = Environment.ProcessorCount - 1,
|
|
ThreadPriority = ThreadPriority.Normal,
|
|
};
|
|
|
|
_radicalInverse_VdCLut = (float*)NativeMemory.Alloc(GGXMipGenerationJob.SAMPLE_COUNT * sizeof(float));
|
|
for (var i = 0u; i < GGXMipGenerationJob.SAMPLE_COUNT; i++)
|
|
{
|
|
_radicalInverse_VdCLut[i] = GGXMipGenerationJob.RadicalInverse_VdC(i);
|
|
}
|
|
|
|
_jobScheduler = new JobScheduler(in desc);
|
|
}
|
|
|
|
public void DumpMipLevelToPng(float* pData, int width, int height, string filePath)
|
|
{
|
|
// Create a standard 32-bit RGBA bitmap
|
|
using var bitmap = new SKBitmap(width, height, SKColorType.Rgba8888, SKAlphaType.Opaque);
|
|
|
|
// Get a pointer to the SkiaSharp pixel buffer
|
|
var pPixels = (byte*)bitmap.GetPixels();
|
|
|
|
for (var y = 0; y < height; y++)
|
|
{
|
|
for (var x = 0; x < width; x++)
|
|
{
|
|
// Your data is tightly packed floats: R, G, B
|
|
var inIdx = (y * width + x) * 3;
|
|
var r = pData[inIdx];
|
|
var g = pData[inIdx + 1];
|
|
var b = pData[inIdx + 2];
|
|
|
|
// Basic Tone Mapping (Exposure + Gamma Correction) so we can see HDR values on a normal screen
|
|
// Gamma 2.2 = roughly pow(color, 1.0/2.2)
|
|
r = MathF.Pow(MathF.Max(0, r), 1.0f / 2.2f);
|
|
g = MathF.Pow(MathF.Max(0, g), 1.0f / 2.2f);
|
|
b = MathF.Pow(MathF.Max(0, b), 1.0f / 2.2f);
|
|
|
|
// Convert 0.0-1.0 to 0-255 byte
|
|
var rByte = (byte)Math.Clamp(r * 255.0f, 0, 255);
|
|
var gByte = (byte)Math.Clamp(g * 255.0f, 0, 255);
|
|
var bByte = (byte)Math.Clamp(b * 255.0f, 0, 255);
|
|
|
|
// Write to Skia's buffer (RGBA)
|
|
var outIdx = (y * width + x) * 4;
|
|
pPixels[outIdx] = rByte;
|
|
pPixels[outIdx + 1] = gByte;
|
|
pPixels[outIdx + 2] = bByte;
|
|
pPixels[outIdx + 3] = 255; // Alpha
|
|
}
|
|
}
|
|
|
|
// Save out the preview
|
|
using var data = bitmap.Encode(SKEncodedImageFormat.Png, 100);
|
|
using var stream = File.OpenWrite(filePath);
|
|
data.SaveTo(stream);
|
|
}
|
|
|
|
[GlobalCleanup]
|
|
public void Cleanup()
|
|
{
|
|
#if true
|
|
for (var i = 0; i < _mipLevels; i++)
|
|
{
|
|
DumpMipLevelToPng(_pResult[i], (int)_pMipLevels[i].width, (int)_pMipLevels[i].height, $"C:\\Users\\Misaki\\Downloads\\Im\\mip_level_{i}.png");
|
|
}
|
|
#endif
|
|
|
|
_image.Dispose();
|
|
for (var i = 0; i < _mipLevels; i++)
|
|
{
|
|
NativeMemory.Free(_pResult[i]);
|
|
}
|
|
|
|
NativeMemory.Free(_pResult);
|
|
NativeMemory.Free(_pMipLevels);
|
|
NativeMemory.Free(_radicalInverse_VdCLut);
|
|
|
|
_jobScheduler.Dispose();
|
|
}
|
|
|
|
[Benchmark(Baseline = true)]
|
|
public void JobGGX()
|
|
{
|
|
JobHandle handle;
|
|
if (WideLane.IsSupported)
|
|
{
|
|
var job = new GGXMipGenerationJobSPMD<WideLane<float>, WideLane<int>>
|
|
{
|
|
image = _image,
|
|
pMipLevels = _pMipLevels,
|
|
numMipLevels = _mipLevels,
|
|
radicalInverse_VdCLut = _radicalInverse_VdCLut
|
|
};
|
|
|
|
handle = _jobScheduler.ScheduleParallelFor(in job, _totalPixel, 64);
|
|
}
|
|
else
|
|
{
|
|
var job = new GGXMipGenerationJobSPMD<ScalarLane<float>, ScalarLane<int>>
|
|
{
|
|
image = _image,
|
|
pMipLevels = _pMipLevels,
|
|
numMipLevels = _mipLevels,
|
|
radicalInverse_VdCLut = _radicalInverse_VdCLut
|
|
};
|
|
|
|
handle = _jobScheduler.ScheduleParallelFor(in job, _totalPixel, 64);
|
|
}
|
|
|
|
_jobScheduler.Wait(handle);
|
|
}
|
|
|
|
//[Benchmark]
|
|
public void ParallelGGX()
|
|
{
|
|
var job = new GGXMipGenerationJob
|
|
{
|
|
image = _image,
|
|
pMipLevels = _pMipLevels,
|
|
numMipLevels = _mipLevels,
|
|
radicalInverse_VdCLut = _radicalInverse_VdCLut
|
|
};
|
|
|
|
Parallel.For(0, _totalPixel, new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount - 1 }, i =>
|
|
{
|
|
var localJob = job;
|
|
var ctx = new JobExecutionContext();
|
|
localJob.Execute(i, in ctx);
|
|
});
|
|
}
|
|
|
|
[Benchmark]
|
|
public void SingleThreadGGX()
|
|
{
|
|
var job = new GGXMipGenerationJob
|
|
{
|
|
image = _image,
|
|
pMipLevels = _pMipLevels,
|
|
numMipLevels = _mipLevels,
|
|
radicalInverse_VdCLut = _radicalInverse_VdCLut
|
|
};
|
|
|
|
var ctx = new JobExecutionContext();
|
|
job.Run(_totalPixel, in ctx);
|
|
}
|
|
}
|