Refactor SPMD job system, add GGX mipmap benchmark
- Replace IJobSPMD with T4-generated, multi-type SPMD job interfaces and wrappers (up to 8 numeric types) - Extend ISPMD with Cast/BitCast; implement for ScalarLane and WideLane (SIMD-aware) - Add unary minus, scalar-lane, and lane-scalar operators to Vector2/3/4; improve Select methods - WideLane now partial with T4-generated Cast/BitCast (SIMD conversions) - SPMD job Execute now requires unmanaged TLane; update all usages and benchmarks - Add GGXMipGenerationBenchmark with vectorized and scalar paths, SkiaSharp output - Update project files: add generated code, SkiaSharp, bump version to 1.3.0 - Misc: fix formatting, method signatures, FreeList logic
This commit is contained in:
@@ -0,0 +1,602 @@
|
||||
using BenchmarkDotNet.Attributes;
|
||||
using Misaki.HighPerformance.Image;
|
||||
using Misaki.HighPerformance.Jobs;
|
||||
using Misaki.HighPerformance.Mathematics;
|
||||
using Misaki.HighPerformance.Mathematics.SPMD;
|
||||
using SkiaSharp;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.InteropServices;
|
||||
using static Misaki.HighPerformance.Mathematics.math;
|
||||
|
||||
namespace Misaki.HighPerformance.Test.Benchmark;
|
||||
|
||||
internal unsafe struct MipLevel
|
||||
{
|
||||
public float* data;
|
||||
public uint width;
|
||||
public uint height;
|
||||
public int offset;
|
||||
public float roughness;
|
||||
}
|
||||
|
||||
internal unsafe struct GGXMipGenerationJobSPMD<TFloat, TInt> : IJobParallelFor
|
||||
where TFloat : unmanaged, ISPMD<TFloat, float>
|
||||
where TInt : unmanaged, ISPMD<TInt, int>
|
||||
{
|
||||
public const uint SAMPLE_COUNT = 1024u;
|
||||
|
||||
public ImageResultFloat image;
|
||||
public MipLevel* pMipLevels;
|
||||
public float* radicalInverse_VdCLut;
|
||||
public int numMipLevels;
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static float RadicalInverse_VdC(uint bits)
|
||||
{
|
||||
bits = (bits << 16) | (bits >> 16);
|
||||
bits = ((bits & 0x55555555u) << 1) | ((bits & 0xAAAAAAAAu) >> 1);
|
||||
bits = ((bits & 0x33333333u) << 2) | ((bits & 0xCCCCCCCCu) >> 2);
|
||||
bits = ((bits & 0x0F0F0F0Fu) << 4) | ((bits & 0xF0F0F0F0u) >> 4);
|
||||
bits = ((bits & 0x00FF00FFu) << 8) | ((bits & 0xFF00FF00u) >> 8);
|
||||
return bits * 2.3283064365386963e-10f; // bits / 0x100000000
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static Vector2<TFloat, float> Hammersley(TFloat i, uint N, float* lut)
|
||||
{
|
||||
var x = i / N;
|
||||
var y = TFloat.Load(lut + (int)i[0]);
|
||||
return MathV.Create<TFloat, float>(x, y);
|
||||
}
|
||||
|
||||
// --- GGX Importance Sampling ---
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static Vector3<TFloat, float> ImportanceSampleGGX(Vector2<TFloat, float> Xi, Vector3<TFloat, float> N, float roughness)
|
||||
{
|
||||
var a = roughness * roughness; // Disney/Epic remap roughness for better visual linearity
|
||||
|
||||
var phi = 2.0f * PI * Xi.x;
|
||||
|
||||
// Clamp the inside of the cosTheta Sqrt to prevent NaN on division precision edges
|
||||
var cosThetaInner = TFloat.Max((1.0f - Xi.y) / (1.0f + (a * a - 1.0f) * Xi.y), TFloat.Zero);
|
||||
var cosTheta = TFloat.Sqrt(cosThetaInner);
|
||||
|
||||
// Clamp the inside of sinTheta to prevent sqrt of negative floating-point errors
|
||||
var sinThetaInner = TFloat.Max(1.0f - cosTheta * cosTheta, TFloat.Zero);
|
||||
var sinTheta = TFloat.Sqrt(sinThetaInner);
|
||||
|
||||
// Spherical to Cartesian coordinates (Halfway vector)
|
||||
var (sinPhi, cosPhi) = TFloat.SinCos(phi);
|
||||
var H = MathV.Create<TFloat, float>(cosPhi * sinTheta, sinPhi * sinTheta, cosTheta);
|
||||
|
||||
// Tangent space to World space
|
||||
var mask = TFloat.Abs(N.z) < 0.999f;
|
||||
var up = MathV.Select(mask, MathV.Create<TFloat, float>(0.0f, 0.0f, 1.0f), MathV.Create<TFloat, float>(1.0f, 0.0f, 0.0f));
|
||||
|
||||
var tangent = MathV.Normalize(MathV.Cross(up, N));
|
||||
var bitangent = MathV.Cross(N, tangent);
|
||||
|
||||
var sampleVec = (tangent * H.x) + (bitangent * H.y) + (N * H.z);
|
||||
return MathV.Normalize(sampleVec);
|
||||
}
|
||||
|
||||
// --- Image Sampling Helpers ---
|
||||
// Maps a 3D direction vector to 2D equirectangular UVs
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static Vector2<TFloat, float> DirToEquirectangularUV(Vector3<TFloat, float> dir)
|
||||
{
|
||||
var u = TFloat.Atan2(dir.z, dir.x);
|
||||
var v = TFloat.Asin(dir.y);
|
||||
|
||||
u = u / (2.0f * PI) + 0.5f;
|
||||
v = v / PI + 0.5f;
|
||||
return MathV.Create<TFloat, float>(u, v);
|
||||
}
|
||||
|
||||
// Samples the source HDR image using bilinear interpolation (simplified to nearest neighbor for brevity here)
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static Vector3<TFloat, float> SampleEquirectangularMap(float* img, int w, int h, Vector3<TFloat, float> dir)
|
||||
{
|
||||
var uv = DirToEquirectangularUV(dir);
|
||||
|
||||
// Nearest neighbor pixel coordinates
|
||||
var px = (uv.x * (w - 1.0f)).Cast<TInt, int>();
|
||||
var py = (uv.y * (h - 1.0f)).Cast<TInt, int>();
|
||||
|
||||
// Clamp
|
||||
px = TInt.Clamp(px, TInt.Zero, TInt.Create(w - 1));
|
||||
py = TInt.Clamp(py, TInt.Zero, TInt.Create(h - 1));
|
||||
|
||||
// Assuming float RGB array format
|
||||
var idx = (py * w + px) * 3;
|
||||
|
||||
var laneWidth = TFloat.LaneWidth;
|
||||
|
||||
var rBuffer = stackalloc float[laneWidth];
|
||||
var gBuffer = stackalloc float[laneWidth];
|
||||
var bBuffer = stackalloc float[laneWidth];
|
||||
|
||||
// Gather operation: extract scalar indices, perform random memory reads, and construct SoA buffers
|
||||
for (var i = 0; i < laneWidth; i++)
|
||||
{
|
||||
var scalarIdx = idx[i];
|
||||
|
||||
rBuffer[i] = img[scalarIdx];
|
||||
gBuffer[i] = img[scalarIdx + 1];
|
||||
bBuffer[i] = img[scalarIdx + 2];
|
||||
}
|
||||
|
||||
// Load the gathered contiguous arrays back into TLane types
|
||||
var rLane = TFloat.Load(rBuffer);
|
||||
var gLane = TFloat.Load(gBuffer);
|
||||
var bLane = TFloat.Load(bBuffer);
|
||||
|
||||
return MathV.Create<TFloat, float>(rLane, gLane, bLane);
|
||||
}
|
||||
|
||||
public void Execute(int loopIndex, ref readonly JobExecutionContext ctx)
|
||||
{
|
||||
var m = 0;
|
||||
while (m < numMipLevels - 1 && loopIndex >= pMipLevels[m + 1].offset)
|
||||
{
|
||||
m++;
|
||||
}
|
||||
|
||||
var span = new ReadOnlySpan<MipLevel>(pMipLevels, numMipLevels);
|
||||
var pLevel = &pMipLevels[m];
|
||||
|
||||
var w = (int)pLevel->width;
|
||||
var h = (int)pLevel->height;
|
||||
var pData = pLevel->data;
|
||||
|
||||
var local_i = loopIndex - pLevel->offset;
|
||||
var x = local_i % w;
|
||||
var y = local_i / w;
|
||||
var u = (float)x / (w - 1);
|
||||
var v = (float)y / (h - 1);
|
||||
|
||||
var phi = (u - 0.5f) * 2.0f * PI;
|
||||
var theta = (v - 0.5f) * PI;
|
||||
|
||||
sincos(theta, out var sinTheta, out var cosTheta);
|
||||
sincos(phi, out var sinPhi, out var cosPhi);
|
||||
var N = float3(cosTheta * cosPhi, sinTheta, cosTheta * sinPhi);
|
||||
N = normalize(N);
|
||||
|
||||
// For split-sum, we assume View and Reflection directions equal the Normal
|
||||
var V = N;
|
||||
var R = N;
|
||||
|
||||
var vN = MathV.Create<TFloat, float>(
|
||||
TFloat.Create(N.x),
|
||||
TFloat.Create(N.y),
|
||||
TFloat.Create(N.z)
|
||||
);
|
||||
|
||||
var vV = MathV.Create<TFloat, float>(
|
||||
TFloat.Create(V.x),
|
||||
TFloat.Create(V.y),
|
||||
TFloat.Create(V.z)
|
||||
);
|
||||
|
||||
var vPrefilteredColorX = TFloat.Zero;
|
||||
var vPrefilteredColorY = TFloat.Zero;
|
||||
var vPrefilteredColorZ = TFloat.Zero;
|
||||
var vTotalWeight = TFloat.Zero;
|
||||
|
||||
// 3. Monte Carlo Integration Loop
|
||||
// We assume WideLane is supported in the test.
|
||||
|
||||
var dynamicSampleCount = (uint)max(1.0f, SAMPLE_COUNT * pLevel->roughness);
|
||||
var vDynamicSampleCount = TFloat.Create(dynamicSampleCount);
|
||||
|
||||
for (var i = 0u; i < dynamicSampleCount; i += (uint)TFloat.LaneWidth)
|
||||
{
|
||||
var laneIndices = TFloat.Sequence(i, 1.0f);
|
||||
var validLaneMask = laneIndices < vDynamicSampleCount;
|
||||
|
||||
// Generate a Hammersley random sequence point
|
||||
var Xi = Hammersley(laneIndices, dynamicSampleCount, radicalInverse_VdCLut);
|
||||
|
||||
// Get the halfway vector based on GGX NDF
|
||||
var H = ImportanceSampleGGX(Xi, vN, pLevel->roughness);
|
||||
|
||||
// Calculate Light direction
|
||||
var L = MathV.Reflect(-vV, H);
|
||||
L = MathV.Normalize(L);
|
||||
|
||||
var NdotL = TFloat.Max(MathV.Dot(vN, L), TFloat.Zero);
|
||||
var sampleColor = SampleEquirectangularMap(image.Data, (int)image.Width, (int)image.Height, L);
|
||||
|
||||
NdotL &= validLaneMask;
|
||||
|
||||
// The Karis Average Weight: 1 / (1 + luma)
|
||||
// A normal sky pixel (luma 1.0) gets a weight of 0.5.
|
||||
// A sun pixel (luma 1000.0) gets a tiny weight of ~0.001, naturally suppressing it.
|
||||
// This introduce bias, but significantly reduces fireflies without needing solid angle sampling or cdf inversion.
|
||||
// And since this is a mip generation step, a little bias is acceptable for much better performance and stability.
|
||||
var luma = MathV.Dot(sampleColor, MathV.Create<TFloat, float>(0.2126f, 0.7152f, 0.0722f));
|
||||
var fireflyWeight = TFloat.One / (TFloat.One + luma);
|
||||
var finalWeight = NdotL * fireflyWeight;
|
||||
|
||||
vPrefilteredColorX += sampleColor.x * finalWeight;
|
||||
vPrefilteredColorY += sampleColor.y * finalWeight;
|
||||
vPrefilteredColorZ += sampleColor.z * finalWeight;
|
||||
|
||||
vTotalWeight += finalWeight;
|
||||
}
|
||||
|
||||
var totalWeight = 0.0f;
|
||||
var prefilteredColor = float3(0, 0, 0);
|
||||
|
||||
for (var i = 0; i < TFloat.LaneWidth; i++)
|
||||
{
|
||||
prefilteredColor.x += vPrefilteredColorX[i];
|
||||
prefilteredColor.y += vPrefilteredColorY[i];
|
||||
prefilteredColor.z += vPrefilteredColorZ[i];
|
||||
totalWeight += vTotalWeight[i];
|
||||
}
|
||||
|
||||
// 4. Average the result
|
||||
if (totalWeight > 0.0f)
|
||||
{
|
||||
prefilteredColor *= 1.0f / totalWeight;
|
||||
}
|
||||
|
||||
// Write to output mip array
|
||||
var out_idx = (y * w + x) * 3;
|
||||
pData[out_idx] = prefilteredColor.x;
|
||||
pData[out_idx + 1] = prefilteredColor.y;
|
||||
pData[out_idx + 2] = prefilteredColor.z;
|
||||
}
|
||||
}
|
||||
|
||||
internal unsafe struct GGXMipGenerationJob : IJobParallelFor
|
||||
{
|
||||
public const uint SAMPLE_COUNT = 1024u;
|
||||
|
||||
public ImageResultFloat image;
|
||||
public MipLevel* pMipLevels;
|
||||
public float* radicalInverse_VdCLut;
|
||||
public int numMipLevels;
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static float RadicalInverse_VdC(uint bits)
|
||||
{
|
||||
bits = (bits << 16) | (bits >> 16);
|
||||
bits = ((bits & 0x55555555u) << 1) | ((bits & 0xAAAAAAAAu) >> 1);
|
||||
bits = ((bits & 0x33333333u) << 2) | ((bits & 0xCCCCCCCCu) >> 2);
|
||||
bits = ((bits & 0x0F0F0F0Fu) << 4) | ((bits & 0xF0F0F0F0u) >> 4);
|
||||
bits = ((bits & 0x00FF00FFu) << 8) | ((bits & 0xFF00FF00u) >> 8);
|
||||
return bits * 2.3283064365386963e-10f; // bits / 0x100000000
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static float2 Hammersley(uint i, uint N, float* lut)
|
||||
{
|
||||
return float2((float)i / N, lut[i]);
|
||||
}
|
||||
|
||||
// --- GGX Importance Sampling ---
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static float3 ImportanceSampleGGX(float2 Xi, float3 N, float roughness)
|
||||
{
|
||||
var a = roughness * roughness; // Disney/Epic remap roughness for better visual linearity
|
||||
|
||||
var phi = 2.0f * PI * Xi.x;
|
||||
var cosTheta = sqrt((1.0f - Xi.y) / (1.0f + (a * a - 1.0f) * Xi.y));
|
||||
var sinTheta = sqrt(1.0f - cosTheta * cosTheta);
|
||||
|
||||
// Spherical to Cartesian coordinates (Halfway vector)
|
||||
sincos(phi, out var sinPhi, out var cosPhi);
|
||||
var H = float3(cosPhi * sinTheta, sinPhi * sinTheta, cosTheta);
|
||||
|
||||
// Tangent space to World space
|
||||
var up = abs(N.z) < 0.999f ? float3(0.0f, 0.0f, 1.0f) : float3(1.0f, 0.0f, 0.0f);
|
||||
var tangent = normalize(cross(up, N));
|
||||
var bitangent = cross(N, tangent);
|
||||
|
||||
var sampleVec = (tangent * H.x) + (bitangent * H.y) + (N * H.z);
|
||||
return normalize(sampleVec);
|
||||
}
|
||||
|
||||
// --- Image Sampling Helpers ---
|
||||
// Maps a 3D direction vector to 2D equirectangular UVs
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static float2 DirToEquirectangularUV(float3 dir)
|
||||
{
|
||||
var uv = float2(atan2(dir.z, dir.x), asin(dir.y));
|
||||
uv.x = uv.x / (2.0f * PI) + 0.5f;
|
||||
uv.y = uv.y / PI + 0.5f;
|
||||
return uv;
|
||||
}
|
||||
|
||||
// Samples the source HDR image using bilinear interpolation (simplified to nearest neighbor for brevity here)
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static float3 SampleEquirectangularMap(float* img, int w, int h, float3 dir)
|
||||
{
|
||||
var uv = DirToEquirectangularUV(dir);
|
||||
|
||||
// Nearest neighbor pixel coordinates
|
||||
var px = (int)(uv.x * (w - 1));
|
||||
var py = (int)(uv.y * (h - 1));
|
||||
|
||||
// Clamp
|
||||
px = clamp(px, 0, w - 1);
|
||||
py = clamp(py, 0, h - 1);
|
||||
|
||||
// Assuming float RGB array format
|
||||
var idx = (py * w + px) * 3;
|
||||
return float3(img[idx], img[idx + 1], img[idx + 2]);
|
||||
}
|
||||
|
||||
public void Execute(int loopIndex, ref readonly JobExecutionContext ctx)
|
||||
{
|
||||
var m = 0;
|
||||
while (m < numMipLevels - 1 && loopIndex >= pMipLevels[m + 1].offset)
|
||||
{
|
||||
m++;
|
||||
}
|
||||
|
||||
var pLevel = &pMipLevels[m];
|
||||
|
||||
var w = (int)pLevel->width;
|
||||
var h = (int)pLevel->height;
|
||||
var pData = pLevel->data;
|
||||
|
||||
var local_i = loopIndex - pLevel->offset;
|
||||
var x = local_i % w;
|
||||
var y = local_i / w;
|
||||
var u = (float)x / (w - 1);
|
||||
var v = (float)y / (h - 1);
|
||||
|
||||
var phi = (u - 0.5f) * 2.0f * PI;
|
||||
var theta = (v - 0.5f) * PI;
|
||||
|
||||
sincos(theta, out var sinTheta, out var cosTheta);
|
||||
sincos(phi, out var sinPhi, out var cosPhi);
|
||||
var N = float3(cosTheta * cosPhi, sinTheta, cosTheta * sinPhi);
|
||||
N = normalize(N);
|
||||
|
||||
// For split-sum, we assume View and Reflection directions equal the Normal
|
||||
var V = N;
|
||||
var R = N;
|
||||
|
||||
var prefilteredColor = float3(0, 0, 0);
|
||||
var totalWeight = 0.0f;
|
||||
|
||||
// 3. Monte Carlo Integration Loop
|
||||
var dynamicSampleCount = (uint)max(1.0f, SAMPLE_COUNT * sqrt(pLevel->roughness));
|
||||
for (var i = 0u; i < dynamicSampleCount; i++)
|
||||
{
|
||||
// Generate a Hammersley random sequence point
|
||||
var Xi = Hammersley(i, dynamicSampleCount, radicalInverse_VdCLut);
|
||||
|
||||
// Get the halfway vector based on GGX NDF
|
||||
var H = ImportanceSampleGGX(Xi, N, pLevel->roughness);
|
||||
|
||||
// Calculate Light direction
|
||||
var L = reflect(-V, H);
|
||||
L = normalize(L);
|
||||
|
||||
var NdotL = max(dot(N, L), 0.0f);
|
||||
|
||||
// If light is above the horizon
|
||||
if (NdotL > 0.0f)
|
||||
{
|
||||
var sampleColor = SampleEquirectangularMap(image.Data, (int)image.Width, (int)image.Height, L);
|
||||
|
||||
prefilteredColor += sampleColor * NdotL;
|
||||
totalWeight += NdotL;
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Average the result
|
||||
if (totalWeight > 0.0f)
|
||||
{
|
||||
prefilteredColor *= 1.0f / totalWeight;
|
||||
}
|
||||
|
||||
// Write to output mip array
|
||||
var out_idx = (y * w + x) * 3;
|
||||
pData[out_idx] = prefilteredColor.x;
|
||||
pData[out_idx + 1] = prefilteredColor.y;
|
||||
pData[out_idx + 2] = prefilteredColor.z;
|
||||
}
|
||||
}
|
||||
|
||||
public unsafe class GGXMipGenerationBenchmark
|
||||
{
|
||||
private ImageResultFloat _image;
|
||||
private int _mipLevels;
|
||||
private int _totalPixel;
|
||||
private float** _pResult;
|
||||
private MipLevel* _pMipLevels;
|
||||
private float* radicalInverse_VdCLut;
|
||||
|
||||
private JobScheduler _jobScheduler = null!;
|
||||
|
||||
[GlobalSetup]
|
||||
public void Setup()
|
||||
{
|
||||
//const string imagePath = "F:\\c\\SimpleRayTracer\\native\\assets\\hdri\\golden_gate_hills_1k.hdr";
|
||||
const string imagePath = "C:\\Users\\Misaki\\Downloads\\grasslands_sunset_4k.hdr";
|
||||
using var stream = new FileStream(imagePath, FileMode.Open, FileAccess.Read);
|
||||
_image = ImageResultFloat.FromStream(stream, ColorComponents.RGB);
|
||||
|
||||
_mipLevels = (int)Math.Floor(Math.Log2(Math.Max(_image.Width, _image.Height))) + 1;
|
||||
|
||||
_pResult = (float**)NativeMemory.Alloc((nuint)(_mipLevels * sizeof(float*)));
|
||||
_pMipLevels = (MipLevel*)NativeMemory.Alloc((nuint)(_mipLevels * sizeof(MipLevel)));
|
||||
|
||||
uint w, h;
|
||||
|
||||
for (var i = 0; i < _mipLevels; i++)
|
||||
{
|
||||
w = Math.Max(1, _image.Width >> i);
|
||||
h = Math.Max(1, _image.Height >> i);
|
||||
|
||||
var sizeInBytes = (nuint)(w * h * 3 * sizeof(float));
|
||||
_pResult[i] = (float*)NativeMemory.Alloc(sizeInBytes);
|
||||
|
||||
_pMipLevels[i] = new MipLevel
|
||||
{
|
||||
width = w,
|
||||
height = h,
|
||||
offset = _totalPixel,
|
||||
data = _pResult[i],
|
||||
roughness = (float)i / (_mipLevels - 1) // Linear roughness from 0 to 1 across mip levels
|
||||
};
|
||||
|
||||
_totalPixel += (int)(w * h);
|
||||
}
|
||||
|
||||
var desc = new JobSchedulerDesc
|
||||
{
|
||||
DependencyChainCapacity = 16,
|
||||
ThreadCount = Environment.ProcessorCount - 1,
|
||||
ThreadPriority = ThreadPriority.Normal,
|
||||
};
|
||||
|
||||
radicalInverse_VdCLut = (float*)NativeMemory.Alloc(GGXMipGenerationJob.SAMPLE_COUNT * sizeof(float));
|
||||
for (var i = 0u; i < GGXMipGenerationJob.SAMPLE_COUNT; i++)
|
||||
{
|
||||
radicalInverse_VdCLut[i] = GGXMipGenerationJob.RadicalInverse_VdC(i);
|
||||
}
|
||||
|
||||
_jobScheduler = new JobScheduler(in desc);
|
||||
}
|
||||
|
||||
public void DumpMipLevelToPng(float* pData, int width, int height, string filePath)
|
||||
{
|
||||
// Create a standard 32-bit RGBA bitmap
|
||||
using var bitmap = new SKBitmap(width, height, SKColorType.Rgba8888, SKAlphaType.Opaque);
|
||||
|
||||
// Get a pointer to the SkiaSharp pixel buffer
|
||||
var pPixels = (byte*)bitmap.GetPixels();
|
||||
|
||||
for (var y = 0; y < height; y++)
|
||||
{
|
||||
for (var x = 0; x < width; x++)
|
||||
{
|
||||
// Your data is tightly packed floats: R, G, B
|
||||
var inIdx = (y * width + x) * 3;
|
||||
var r = pData[inIdx];
|
||||
var g = pData[inIdx + 1];
|
||||
var b = pData[inIdx + 2];
|
||||
|
||||
// Basic Tone Mapping (Exposure + Gamma Correction) so we can see HDR values on a normal screen
|
||||
// Gamma 2.2 = roughly pow(color, 1.0/2.2)
|
||||
r = MathF.Pow(MathF.Max(0, r), 1.0f / 2.2f);
|
||||
g = MathF.Pow(MathF.Max(0, g), 1.0f / 2.2f);
|
||||
b = MathF.Pow(MathF.Max(0, b), 1.0f / 2.2f);
|
||||
|
||||
// Convert 0.0-1.0 to 0-255 byte
|
||||
var rByte = (byte)Math.Clamp(r * 255.0f, 0, 255);
|
||||
var gByte = (byte)Math.Clamp(g * 255.0f, 0, 255);
|
||||
var bByte = (byte)Math.Clamp(b * 255.0f, 0, 255);
|
||||
|
||||
// Write to Skia's buffer (RGBA)
|
||||
var outIdx = (y * width + x) * 4;
|
||||
pPixels[outIdx] = rByte;
|
||||
pPixels[outIdx + 1] = gByte;
|
||||
pPixels[outIdx + 2] = bByte;
|
||||
pPixels[outIdx + 3] = 255; // Alpha
|
||||
}
|
||||
}
|
||||
|
||||
// Save out the preview
|
||||
using var data = bitmap.Encode(SKEncodedImageFormat.Png, 100);
|
||||
using var stream = File.OpenWrite(filePath);
|
||||
data.SaveTo(stream);
|
||||
}
|
||||
|
||||
[GlobalCleanup]
|
||||
public void Cleanup()
|
||||
{
|
||||
for (var i = 0; i < _mipLevels; i++)
|
||||
{
|
||||
DumpMipLevelToPng(_pResult[i], (int)_pMipLevels[i].width, (int)_pMipLevels[i].height, $"C:\\Users\\Misaki\\Downloads\\Im\\mip_level_{i}.png");
|
||||
}
|
||||
|
||||
_image.Dispose();
|
||||
for (var i = 0; i < _mipLevels; i++)
|
||||
{
|
||||
NativeMemory.Free(_pResult[i]);
|
||||
}
|
||||
|
||||
NativeMemory.Free(_pResult);
|
||||
NativeMemory.Free(_pMipLevels);
|
||||
NativeMemory.Free(radicalInverse_VdCLut);
|
||||
|
||||
_jobScheduler.Dispose();
|
||||
}
|
||||
|
||||
[Benchmark]
|
||||
public void JobGGX()
|
||||
{
|
||||
JobHandle handle;
|
||||
if (WideLane.IsSupported)
|
||||
{
|
||||
var job = new GGXMipGenerationJobSPMD<WideLane<float>, WideLane<int>>
|
||||
{
|
||||
image = _image,
|
||||
pMipLevels = _pMipLevels,
|
||||
numMipLevels = _mipLevels,
|
||||
radicalInverse_VdCLut = radicalInverse_VdCLut
|
||||
};
|
||||
|
||||
handle = _jobScheduler.ScheduleParallelFor(in job, _totalPixel, 64);
|
||||
}
|
||||
else
|
||||
{
|
||||
var job = new GGXMipGenerationJobSPMD<ScalarLane<float>, ScalarLane<int>>
|
||||
{
|
||||
image = _image,
|
||||
pMipLevels = _pMipLevels,
|
||||
numMipLevels = _mipLevels,
|
||||
radicalInverse_VdCLut = radicalInverse_VdCLut
|
||||
};
|
||||
|
||||
handle = _jobScheduler.ScheduleParallelFor(in job, _totalPixel, 64);
|
||||
}
|
||||
|
||||
_jobScheduler.Wait(handle);
|
||||
}
|
||||
|
||||
//[Benchmark]
|
||||
public void ParallelGGX()
|
||||
{
|
||||
var job = new GGXMipGenerationJob
|
||||
{
|
||||
image = _image,
|
||||
pMipLevels = _pMipLevels,
|
||||
numMipLevels = _mipLevels,
|
||||
radicalInverse_VdCLut = radicalInverse_VdCLut
|
||||
};
|
||||
|
||||
Parallel.For(0, _totalPixel, new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount - 1 }, i =>
|
||||
{
|
||||
var localJob = job;
|
||||
var ctx = new JobExecutionContext();
|
||||
localJob.Execute(i, in ctx);
|
||||
});
|
||||
}
|
||||
|
||||
[Benchmark]
|
||||
public void SingleThreadGGX()
|
||||
{
|
||||
var job = new GGXMipGenerationJob
|
||||
{
|
||||
image = _image,
|
||||
pMipLevels = _pMipLevels,
|
||||
numMipLevels = _mipLevels,
|
||||
radicalInverse_VdCLut = radicalInverse_VdCLut
|
||||
};
|
||||
|
||||
//var handle = _jobScheduler.ScheduleParallelFor(in job, _totalPixel, 64);
|
||||
//_jobScheduler.Wait(handle);
|
||||
var ctx = new JobExecutionContext();
|
||||
job.Run(_totalPixel, in ctx);
|
||||
}
|
||||
}
|
||||
@@ -36,7 +36,7 @@ public unsafe class SPMDBenchmark
|
||||
height = _SIZE,
|
||||
};
|
||||
|
||||
var handle = _scheduler.ScheduleParallelSPDM<Jobs.NoiseJobMathSPMD, float>(ref job, _SIZE * _SIZE, 64, false);
|
||||
var handle = _scheduler.ScheduleParallelSPDM<Jobs.NoiseJobMathSPMD, float>(ref job, _SIZE * _SIZE, 64, false, JobPriority.Normal);
|
||||
_scheduler.Wait(handle);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user