Refactor SPMD jobs for true vectorized/masked execution
- Change IJobSPMD.Execute to (indices, mask, ctx) signature for all arities, enabling proper vectorized/masked SPMD execution. - Update all SPMD job wrappers, extension methods, and test jobs to use new interface. - Add AVX2 gather/masked gather support to MathV.GatherVector2/3/4 and related methods; use [ConstantExpected] byte scale. - Improve gather/select logic, pointer arithmetic, and overloads for ref/int* index access. - Refactor GGXMipGenerationBenchmark and jobs for SPMD, with per-mip-level vectorized jobs and improved memory access. - Clean up code, fix naming, update comments, and bump version to 1.3.6.
This commit is contained in:
@@ -2,7 +2,6 @@ using BenchmarkDotNet.Attributes;
|
||||
using BenchmarkDotNet.Engines;
|
||||
using Misaki.HighPerformance.Image;
|
||||
using Misaki.HighPerformance.Jobs;
|
||||
using Misaki.HighPerformance.Mathematics;
|
||||
using Misaki.HighPerformance.Mathematics.SPMD;
|
||||
using SkiaSharp;
|
||||
using System.Runtime.CompilerServices;
|
||||
@@ -23,29 +22,180 @@ internal unsafe struct MipLevel
|
||||
public float roughness;
|
||||
}
|
||||
|
||||
//internal unsafe struct GGXMipGenerationJobSPMD : IJobParallelFor
|
||||
internal unsafe struct GGXMipGenerationJobSPMD : IJobSPMD<float, int>
|
||||
{
|
||||
public ImageResultFloat image;
|
||||
public MipLevel mipLevel;
|
||||
public float* radicalInverse_VdCLut;
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||
private static Vector2<TFloat, float> Hammersley<TFloat>(int i, uint N, float* lut)
|
||||
where TFloat : unmanaged, ISPMDLane<TFloat, float>
|
||||
{
|
||||
var x = i / N;
|
||||
var y = TFloat.Load(lut + i);
|
||||
return MathV.Create<TFloat, float>(x, y);
|
||||
}
|
||||
|
||||
// --- GGX Importance Sampling ---
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||
private static Vector3<TFloat, float> ImportanceSampleGGX<TFloat>(Vector2<TFloat, float> Xi, Vector3<TFloat, float> N, float roughness)
|
||||
where TFloat : unmanaged, ISPMDLane<TFloat, float>
|
||||
{
|
||||
var a = roughness * roughness; // Disney/Epic remap roughness for better visual linearity
|
||||
|
||||
var phi = 2.0f * PI * Xi.x;
|
||||
|
||||
var cosTheta = TFloat.Sqrt((1.0f - Xi.y) / TFloat.MultipleAdd(a * a - 1.0f, Xi.y, 1.0f));
|
||||
var sinTheta = TFloat.Sqrt(1.0f - cosTheta * cosTheta);
|
||||
|
||||
// Spherical to Cartesian coordinates (Halfway vector)
|
||||
TFloat.SinCos(phi, out var sinPhi, out var cosPhi);
|
||||
var H = MathV.Create<TFloat, float>(cosPhi * sinTheta, sinPhi * sinTheta, cosTheta);
|
||||
|
||||
// Tangent space to World space
|
||||
var mask = TFloat.Abs(N.z) < 0.999f;
|
||||
var up = MathV.Select(mask, MathV.Create<TFloat, float>(0.0f, 0.0f, 1.0f), MathV.Create<TFloat, float>(1.0f, 0.0f, 0.0f));
|
||||
|
||||
var tangent = MathV.Normalize(MathV.Cross(up, N));
|
||||
var bitangent = MathV.Cross(N, tangent);
|
||||
|
||||
var sampleVec = (tangent * H.x) + (bitangent * H.y) + (N * H.z);
|
||||
return MathV.Normalize(sampleVec);
|
||||
}
|
||||
|
||||
// --- Image Sampling Helpers ---
|
||||
// Maps a 3D direction vector to 2D equirectangular UVs
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||
private static Vector2<TFloat, float> DirToEquirectangularUV<TFloat>(Vector3<TFloat, float> dir)
|
||||
where TFloat : unmanaged, ISPMDLane<TFloat, float>
|
||||
{
|
||||
var u = TFloat.Atan2(dir.z, dir.x);
|
||||
var v = TFloat.Asin(dir.y);
|
||||
|
||||
u = u / (2.0f * PI) + 0.5f;
|
||||
v = v / PI + 0.5f;
|
||||
return MathV.Create<TFloat, float>(u, v);
|
||||
}
|
||||
|
||||
// Samples the source HDR image using bilinear interpolation (simplified to nearest neighbor for brevity here)
|
||||
// Do not inline this function to avoid register pressure and code bloat in the main sampling loop, as this is a relatively heavy operation and we want to keep it separate for better instruction cache usage.
|
||||
[MethodImpl(MethodImplOptions.NoInlining | MethodImplOptions.AggressiveOptimization)]
|
||||
private static Vector3<TFloat, float> SampleEquirectangularMap<TFloat, TInt>(float* img, int w, int h, Vector3<TFloat, float> dir, TFloat mask)
|
||||
where TFloat : unmanaged, ISPMDLane<TFloat, float>
|
||||
where TInt : unmanaged, ISPMDLane<TInt, int>
|
||||
{
|
||||
var uv = DirToEquirectangularUV(dir);
|
||||
|
||||
// Nearest neighbor pixel coordinates
|
||||
var px = (uv.x * (w - 1.0f)).Cast<TInt, int>();
|
||||
var py = (uv.y * (h - 1.0f)).Cast<TInt, int>();
|
||||
|
||||
// Clamp
|
||||
px = TInt.Clamp(px, TInt.Zero, w - 1);
|
||||
py = TInt.Clamp(py, TInt.Zero, h - 1);
|
||||
|
||||
// Assuming float RGB array format
|
||||
var idx = (py * w + px) * 3;
|
||||
return MathV.MaskGatherVector3<TFloat, float>(img, idx.GetUnsafePtr(), mask, 4);
|
||||
}
|
||||
|
||||
public readonly void Execute<TFloat, TInt>(TFloat indices, TFloat mask, ref readonly JobExecutionContext ctx)
|
||||
where TFloat : unmanaged, ISPMDLane<TFloat, float>
|
||||
where TInt : unmanaged, ISPMDLane<TInt, int>
|
||||
{
|
||||
var w = (int)mipLevel.width;
|
||||
var h = (int)mipLevel.height;
|
||||
var pData = mipLevel.data;
|
||||
|
||||
var indexSequence = indices.Cast<TInt, int>();
|
||||
|
||||
var x = indexSequence % w;
|
||||
var y = indexSequence / w;
|
||||
var u = x.Cast<TFloat, float>() / (w - 1);
|
||||
var v = y.Cast<TFloat, float>() / (h - 1);
|
||||
|
||||
var phi = (u - 0.5f) * 2.0f * PI;
|
||||
var theta = (v - 0.5f) * PI;
|
||||
|
||||
TFloat.SinCos(theta, out var sinTheta, out var cosTheta);
|
||||
TFloat.SinCos(phi, out var sinPhi, out var cosPhi);
|
||||
|
||||
var N = MathV.Create<TFloat, float>(cosTheta * cosPhi, sinTheta, cosTheta * sinPhi);
|
||||
N = MathV.Normalize(N);
|
||||
|
||||
// For split-sum, we assume View and Reflection directions equal the Normal
|
||||
var V = N;
|
||||
var R = N;
|
||||
|
||||
var prefilteredColor = Vector3<TFloat, float>.Zero;
|
||||
var totalWeight = TFloat.Zero;
|
||||
|
||||
// 3. Monte Carlo Integration Loop
|
||||
|
||||
var dynamicSampleCount = (uint)max(1.0f, GGXMipGenerationBenchmark.SAMPLE_COUNT * mipLevel.roughness);
|
||||
var vDynamicSampleCount = TFloat.Create(dynamicSampleCount);
|
||||
var lumaVector = MathV.Create<TFloat, float>(0.2126f, 0.7152f, 0.0722f);
|
||||
|
||||
for (var i = 0; i < dynamicSampleCount; i++)
|
||||
{
|
||||
// Generate a Hammersley random sequence point
|
||||
var Xi = Hammersley<TFloat>(i, dynamicSampleCount, radicalInverse_VdCLut);
|
||||
|
||||
// Get the halfway vector based on GGX NDF
|
||||
var H = ImportanceSampleGGX(Xi, N, mipLevel.roughness);
|
||||
|
||||
// Calculate Light direction
|
||||
var L = MathV.Reflect(-V, H);
|
||||
L = MathV.Normalize(L);
|
||||
|
||||
var NdotL = TFloat.Max(MathV.Dot(N, L), TFloat.Zero);
|
||||
var sampleColor = SampleEquirectangularMap<TFloat, TInt>(image.Data, (int)image.Width, (int)image.Height, L, mask);
|
||||
|
||||
// The Karis Average Weight: 1 / (1 + luma)
|
||||
// A normal sky pixel (luma 1.0) gets a weight of 0.5.
|
||||
// A sun pixel (luma 1000.0) gets a tiny weight of ~0.001, naturally suppressing it.
|
||||
// This introduce bias, but significantly reduces fireflies without needing solid angle sampling or cdf inversion.
|
||||
// And since this is a mip generation step, a little bias is acceptable for much better performance and stability.
|
||||
var luma = MathV.Dot(sampleColor, lumaVector);
|
||||
var fireflyWeight = TFloat.One / (TFloat.One + luma);
|
||||
var finalWeight = NdotL * fireflyWeight;
|
||||
|
||||
prefilteredColor += sampleColor * finalWeight;
|
||||
totalWeight += finalWeight;
|
||||
}
|
||||
|
||||
prefilteredColor = MathV.Select(totalWeight > 0.0f, prefilteredColor * TFloat.Rcp(totalWeight), prefilteredColor);
|
||||
|
||||
// Write to output mip array
|
||||
var out_idx = (y * w + x) * 3;
|
||||
|
||||
// TODO: Optimize this
|
||||
for (var i = 0; i < TFloat.LaneWidth; i++)
|
||||
{
|
||||
if (mask[i] == 0.0f)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var idx = out_idx[i];
|
||||
|
||||
pData[idx] = prefilteredColor.x[i];
|
||||
pData[idx + 1] = prefilteredColor.y[i];
|
||||
pData[idx + 2] = prefilteredColor.z[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
internal unsafe struct GGXMipGenerationJobSPMD<TFloat, TInt> : IJobParallelFor
|
||||
where TFloat : unmanaged, ISPMDLane<TFloat, float>
|
||||
where TInt : unmanaged, ISPMDLane<TInt, int>
|
||||
{
|
||||
public const uint SAMPLE_COUNT = 1024u;
|
||||
|
||||
public ImageResultFloat image;
|
||||
public MipLevel* pMipLevels;
|
||||
public float* radicalInverse_VdCLut;
|
||||
public int numMipLevels;
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||
public static float RadicalInverse_VdC(uint bits)
|
||||
{
|
||||
bits = (bits << 16) | (bits >> 16);
|
||||
bits = ((bits & 0x55555555u) << 1) | ((bits & 0xAAAAAAAAu) >> 1);
|
||||
bits = ((bits & 0x33333333u) << 2) | ((bits & 0xCCCCCCCCu) >> 2);
|
||||
bits = ((bits & 0x0F0F0F0Fu) << 4) | ((bits & 0xF0F0F0F0u) >> 4);
|
||||
bits = ((bits & 0x00FF00FFu) << 8) | ((bits & 0xFF00FF00u) >> 8);
|
||||
return bits * 2.3283064365386963e-10f; // bits / 0x100000000
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
|
||||
private static Vector2<TFloat, float> Hammersley(TFloat i, uint N, float* lut)
|
||||
{
|
||||
@@ -110,7 +260,7 @@ internal unsafe struct GGXMipGenerationJobSPMD<TFloat, TInt> : IJobParallelFor
|
||||
|
||||
// Assuming float RGB array format
|
||||
var idx = (py * w + px) * 3;
|
||||
return MathV.GatherVector3<TFloat, float>(img, idx.GetUnsafePtr(), 1);
|
||||
return MathV.GatherVector3<TFloat, float>(img, idx.GetUnsafePtr(), 4);
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveOptimization)]
|
||||
@@ -122,7 +272,6 @@ internal unsafe struct GGXMipGenerationJobSPMD<TFloat, TInt> : IJobParallelFor
|
||||
m++;
|
||||
}
|
||||
|
||||
var span = new ReadOnlySpan<MipLevel>(pMipLevels, numMipLevels);
|
||||
var pLevel = &pMipLevels[m];
|
||||
|
||||
var w = (int)pLevel->width;
|
||||
@@ -164,7 +313,7 @@ internal unsafe struct GGXMipGenerationJobSPMD<TFloat, TInt> : IJobParallelFor
|
||||
|
||||
// 3. Monte Carlo Integration Loop
|
||||
|
||||
var dynamicSampleCount = (uint)max(1.0f, SAMPLE_COUNT * pLevel->roughness);
|
||||
var dynamicSampleCount = (uint)max(1.0f, GGXMipGenerationBenchmark.SAMPLE_COUNT * pLevel->roughness);
|
||||
var vDynamicSampleCount = TFloat.Create(dynamicSampleCount);
|
||||
var vLumaVector = MathV.Create<TFloat, float>(0.2126f, 0.7152f, 0.0722f);
|
||||
|
||||
@@ -226,15 +375,11 @@ internal unsafe struct GGXMipGenerationJobSPMD<TFloat, TInt> : IJobParallelFor
|
||||
}
|
||||
}
|
||||
|
||||
internal unsafe struct GGXMipGenerationJob : IJobParallelFor
|
||||
[SimpleJob(RunStrategy.ColdStart, launchCount: 1, warmupCount: 0, iterationCount: 1, invocationCount: 1, id: "QuickRun")]
|
||||
public unsafe class GGXMipGenerationBenchmark
|
||||
{
|
||||
public const uint SAMPLE_COUNT = 1024u;
|
||||
|
||||
public ImageResultFloat image;
|
||||
public MipLevel* pMipLevels;
|
||||
public float* radicalInverse_VdCLut;
|
||||
public int numMipLevels;
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static float RadicalInverse_VdC(uint bits)
|
||||
{
|
||||
@@ -246,143 +391,6 @@ internal unsafe struct GGXMipGenerationJob : IJobParallelFor
|
||||
return bits * 2.3283064365386963e-10f; // bits / 0x100000000
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static float2 Hammersley(uint i, uint N, float* lut)
|
||||
{
|
||||
return float2((float)i / N, lut[i]);
|
||||
}
|
||||
|
||||
// --- GGX Importance Sampling ---
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static float3 ImportanceSampleGGX(float2 Xi, float3 N, float roughness)
|
||||
{
|
||||
var a = roughness * roughness; // Disney/Epic remap roughness for better visual linearity
|
||||
|
||||
var phi = 2.0f * PI * Xi.x;
|
||||
var cosTheta = sqrt((1.0f - Xi.y) / (1.0f + (a * a - 1.0f) * Xi.y));
|
||||
var sinTheta = sqrt(1.0f - cosTheta * cosTheta);
|
||||
|
||||
// Spherical to Cartesian coordinates (Halfway vector)
|
||||
sincos(phi, out var sinPhi, out var cosPhi);
|
||||
var H = float3(cosPhi * sinTheta, sinPhi * sinTheta, cosTheta);
|
||||
|
||||
// Tangent space to World space
|
||||
var up = abs(N.z) < 0.999f ? float3(0.0f, 0.0f, 1.0f) : float3(1.0f, 0.0f, 0.0f);
|
||||
var tangent = normalize(cross(up, N));
|
||||
var bitangent = cross(N, tangent);
|
||||
|
||||
var sampleVec = (tangent * H.x) + (bitangent * H.y) + (N * H.z);
|
||||
return normalize(sampleVec);
|
||||
}
|
||||
|
||||
// --- Image Sampling Helpers ---
|
||||
// Maps a 3D direction vector to 2D equirectangular UVs
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static float2 DirToEquirectangularUV(float3 dir)
|
||||
{
|
||||
var uv = float2(atan2(dir.z, dir.x), asin(dir.y));
|
||||
uv.x = uv.x / (2.0f * PI) + 0.5f;
|
||||
uv.y = uv.y / PI + 0.5f;
|
||||
return uv;
|
||||
}
|
||||
|
||||
// Samples the source HDR image using bilinear interpolation (simplified to nearest neighbor for brevity here)
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static float3 SampleEquirectangularMap(float* img, int w, int h, float3 dir)
|
||||
{
|
||||
var uv = DirToEquirectangularUV(dir);
|
||||
|
||||
// Nearest neighbor pixel coordinates
|
||||
var px = (int)(uv.x * (w - 1));
|
||||
var py = (int)(uv.y * (h - 1));
|
||||
|
||||
// Clamp
|
||||
px = clamp(px, 0, w - 1);
|
||||
py = clamp(py, 0, h - 1);
|
||||
|
||||
// Assuming float RGB array format
|
||||
var idx = (py * w + px) * 3;
|
||||
return float3(img[idx], img[idx + 1], img[idx + 2]);
|
||||
}
|
||||
|
||||
public void Execute(int loopIndex, ref readonly JobExecutionContext ctx)
|
||||
{
|
||||
var m = 0;
|
||||
while (m < numMipLevels - 1 && loopIndex >= pMipLevels[m + 1].offset)
|
||||
{
|
||||
m++;
|
||||
}
|
||||
|
||||
var pLevel = &pMipLevels[m];
|
||||
|
||||
var w = (int)pLevel->width;
|
||||
var h = (int)pLevel->height;
|
||||
var pData = pLevel->data;
|
||||
|
||||
var local_i = loopIndex - pLevel->offset;
|
||||
var x = local_i % w;
|
||||
var y = local_i / w;
|
||||
var u = (float)x / (w - 1);
|
||||
var v = (float)y / (h - 1);
|
||||
|
||||
var phi = (u - 0.5f) * 2.0f * PI;
|
||||
var theta = (v - 0.5f) * PI;
|
||||
|
||||
sincos(theta, out var sinTheta, out var cosTheta);
|
||||
sincos(phi, out var sinPhi, out var cosPhi);
|
||||
var N = float3(cosTheta * cosPhi, sinTheta, cosTheta * sinPhi);
|
||||
N = normalize(N);
|
||||
|
||||
// For split-sum, we assume View and Reflection directions equal the Normal
|
||||
var V = N;
|
||||
var R = N;
|
||||
|
||||
var prefilteredColor = float3(0, 0, 0);
|
||||
var totalWeight = 0.0f;
|
||||
|
||||
// 3. Monte Carlo Integration Loop
|
||||
var dynamicSampleCount = (uint)max(1.0f, SAMPLE_COUNT * pLevel->roughness);
|
||||
for (var i = 0u; i < dynamicSampleCount; i++)
|
||||
{
|
||||
// Generate a Hammersley random sequence point
|
||||
var Xi = Hammersley(i, dynamicSampleCount, radicalInverse_VdCLut);
|
||||
|
||||
// Get the halfway vector based on GGX NDF
|
||||
var H = ImportanceSampleGGX(Xi, N, pLevel->roughness);
|
||||
|
||||
// Calculate Light direction
|
||||
var L = reflect(-V, H);
|
||||
L = normalize(L);
|
||||
|
||||
var NdotL = max(dot(N, L), 0.0f);
|
||||
|
||||
// If light is above the horizon
|
||||
if (NdotL > 0.0f)
|
||||
{
|
||||
var sampleColor = SampleEquirectangularMap(image.Data, (int)image.Width, (int)image.Height, L);
|
||||
|
||||
prefilteredColor += sampleColor * NdotL;
|
||||
totalWeight += NdotL;
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Average the result
|
||||
if (totalWeight > 0.0f)
|
||||
{
|
||||
prefilteredColor *= 1.0f / totalWeight;
|
||||
}
|
||||
|
||||
// Write to output mip array
|
||||
var out_idx = (y * w + x) * 3;
|
||||
pData[out_idx] = prefilteredColor.x;
|
||||
pData[out_idx + 1] = prefilteredColor.y;
|
||||
pData[out_idx + 2] = prefilteredColor.z;
|
||||
}
|
||||
}
|
||||
|
||||
[SimpleJob(RunStrategy.ColdStart, launchCount: 1, warmupCount: 0, iterationCount: 1, invocationCount: 1, id: "QuickRun")]
|
||||
public unsafe class GGXMipGenerationBenchmark
|
||||
{
|
||||
private ImageResultFloat _image;
|
||||
private int _mipLevels;
|
||||
private int _totalPixel;
|
||||
@@ -395,12 +403,12 @@ public unsafe class GGXMipGenerationBenchmark
|
||||
[GlobalSetup]
|
||||
public void Setup()
|
||||
{
|
||||
const string imagePath = "F:\\c\\SimpleRayTracer\\native\\assets\\hdri\\golden_gate_hills_1k.hdr";
|
||||
//const string imagePath = "C:\\Users\\Misaki\\Downloads\\grasslands_sunset_4k.hdr";
|
||||
//const string imagePath = "F:\\c\\SimpleRayTracer\\native\\assets\\hdri\\golden_gate_hills_1k.hdr";
|
||||
const string imagePath = "C:\\Users\\Misaki\\Downloads\\grasslands_sunset_4k.hdr";
|
||||
using var stream = new FileStream(imagePath, FileMode.Open, FileAccess.Read);
|
||||
_image = ImageResultFloat.FromStream(stream, ColorComponents.RGB);
|
||||
|
||||
_mipLevels = (int)Math.Floor(Math.Log2(Math.Max(_image.Width, _image.Height))) + 1;
|
||||
_mipLevels = (int)MathF.Floor(MathF.Log2(Math.Max(_image.Width, _image.Height))) + 1;
|
||||
|
||||
_pResult = (float**)NativeMemory.Alloc((nuint)(_mipLevels * sizeof(float*)));
|
||||
_pMipLevels = (MipLevel*)NativeMemory.Alloc((nuint)(_mipLevels * sizeof(MipLevel)));
|
||||
@@ -434,10 +442,10 @@ public unsafe class GGXMipGenerationBenchmark
|
||||
ThreadPriority = ThreadPriority.Normal,
|
||||
};
|
||||
|
||||
_radicalInverse_VdCLut = (float*)NativeMemory.Alloc(GGXMipGenerationJob.SAMPLE_COUNT * sizeof(float));
|
||||
for (var i = 0u; i < GGXMipGenerationJob.SAMPLE_COUNT; i++)
|
||||
_radicalInverse_VdCLut = (float*)NativeMemory.Alloc(SAMPLE_COUNT * sizeof(float));
|
||||
for (var i = 0u; i < SAMPLE_COUNT; i++)
|
||||
{
|
||||
_radicalInverse_VdCLut[i] = GGXMipGenerationJob.RadicalInverse_VdC(i);
|
||||
_radicalInverse_VdCLut[i] = RadicalInverse_VdC(i);
|
||||
}
|
||||
|
||||
_jobScheduler = new JobScheduler(in desc);
|
||||
@@ -490,7 +498,7 @@ public unsafe class GGXMipGenerationBenchmark
|
||||
[GlobalCleanup]
|
||||
public void Cleanup()
|
||||
{
|
||||
#if true
|
||||
#if false
|
||||
for (var i = 0; i < _mipLevels; i++)
|
||||
{
|
||||
DumpMipLevelToPng(_pResult[i], (int)_pMipLevels[i].width, (int)_pMipLevels[i].height, $"C:\\Users\\Misaki\\Downloads\\Im\\mip_level_{i}.png");
|
||||
@@ -542,37 +550,43 @@ public unsafe class GGXMipGenerationBenchmark
|
||||
_jobScheduler.Wait(handle);
|
||||
}
|
||||
|
||||
//[Benchmark]
|
||||
public void ParallelGGX()
|
||||
{
|
||||
var job = new GGXMipGenerationJob
|
||||
{
|
||||
image = _image,
|
||||
pMipLevels = _pMipLevels,
|
||||
numMipLevels = _mipLevels,
|
||||
radicalInverse_VdCLut = _radicalInverse_VdCLut
|
||||
};
|
||||
|
||||
Parallel.For(0, _totalPixel, new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount - 1 }, i =>
|
||||
{
|
||||
var localJob = job;
|
||||
var ctx = new JobExecutionContext();
|
||||
localJob.Execute(i, in ctx);
|
||||
});
|
||||
}
|
||||
|
||||
[Benchmark]
|
||||
public void SingleThreadGGX()
|
||||
public void MipLevelJobGGX()
|
||||
{
|
||||
var job = new GGXMipGenerationJob
|
||||
var pJobHandles = (Span<JobHandle>)stackalloc JobHandle[_mipLevels];
|
||||
for (var m = 0; m < _mipLevels; m++)
|
||||
{
|
||||
image = _image,
|
||||
pMipLevels = _pMipLevels,
|
||||
numMipLevels = _mipLevels,
|
||||
radicalInverse_VdCLut = _radicalInverse_VdCLut
|
||||
};
|
||||
var job = new GGXMipGenerationJobSPMD
|
||||
{
|
||||
image = _image,
|
||||
mipLevel = _pMipLevels[m],
|
||||
radicalInverse_VdCLut = _radicalInverse_VdCLut
|
||||
};
|
||||
|
||||
var ctx = new JobExecutionContext();
|
||||
job.Run(_totalPixel, in ctx);
|
||||
var totalPixel = (int)(_pMipLevels[m].width * _pMipLevels[m].height);
|
||||
var handle = _jobScheduler.ScheduleParallelSPDM<GGXMipGenerationJobSPMD, float, int>(ref job, totalPixel, 64, false, JobPriority.Normal);
|
||||
pJobHandles[m] = handle;
|
||||
}
|
||||
|
||||
_jobScheduler.WaitAll(pJobHandles);
|
||||
}
|
||||
|
||||
//[Benchmark]
|
||||
//public void ParallelGGX()
|
||||
//{
|
||||
// var job = new GGXMipGenerationJob
|
||||
// {
|
||||
// image = _image,
|
||||
// pMipLevels = _pMipLevels,
|
||||
// numMipLevels = _mipLevels,
|
||||
// radicalInverse_VdCLut = _radicalInverse_VdCLut
|
||||
// };
|
||||
|
||||
// Parallel.For(0, _totalPixel, new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount - 1 }, i =>
|
||||
// {
|
||||
// var localJob = job;
|
||||
// var ctx = new JobExecutionContext();
|
||||
// localJob.Execute(i, in ctx);
|
||||
// });
|
||||
//}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user