Refactor SPMD job system, add GGX mipmap benchmark

- Replace IJobSPMD with T4-generated, multi-type SPMD job interfaces and wrappers (up to 8 numeric types)
- Extend ISPMD with Cast/BitCast; implement for ScalarLane and WideLane (SIMD-aware)
- Add unary minus, scalar-lane, and lane-scalar operators to Vector2/3/4; improve Select methods
- WideLane now partial with T4-generated Cast/BitCast (SIMD conversions)
- SPMD job Execute now requires unmanaged TLane; update all usages and benchmarks
- Add GGXMipGenerationBenchmark with vectorized and scalar paths, SkiaSharp output
- Update project files: add generated code, SkiaSharp, bump version to 1.3.0
- Misc: fix formatting, method signatures, FreeList logic
This commit is contained in:
2026-04-25 01:50:06 +09:00
parent a704cb19ec
commit cfd01eb9b6
24 changed files with 2501 additions and 204 deletions

View File

@@ -47,7 +47,7 @@ public readonly unsafe struct ImageResultFloat : IDisposable
return 0; return 0;
} }
return (ulong)(Width * Height * (int)Comp); return (ulong)(Width * Height * (int)Comp * sizeof(float));
} }
} }

View File

@@ -1,114 +0,0 @@
using Misaki.HighPerformance.Jobs;
using System.Numerics;
namespace Misaki.HighPerformance.Mathematics.SPMD;
public interface IJobSPMD<TNumber>
where TNumber : unmanaged, INumber<TNumber>, IBinaryNumber<TNumber>, IMinMaxValue<TNumber>, IBitwiseOperators<TNumber, TNumber, TNumber>
{
void Execute<TLane>(int baseIndex, ref readonly JobExecutionContext ctx)
where TLane : ISPMD<TLane, TNumber>;
}
internal struct SPMDJobWrapper<T, TNumber> : IJobParallelFor
where T : unmanaged, IJobSPMD<TNumber>
where TNumber : unmanaged, INumber<TNumber>, IBinaryNumber<TNumber>, IMinMaxValue<TNumber>, IBitwiseOperators<TNumber, TNumber, TNumber>
{
public T innerJob;
public int totalCount;
public void Execute(int loopIndex, ref readonly JobExecutionContext ctx)
{
var baseIndex = loopIndex * WideLane<TNumber>.LaneWidth;
var remaining = totalCount - baseIndex;
if (remaining >= WideLane<TNumber>.LaneWidth)
{
innerJob.Execute<WideLane<TNumber>>(baseIndex, in ctx);
}
else
{
for (var j = 0; j < remaining; j++)
{
innerJob.Execute<ScalarLane<TNumber>>(baseIndex + j, in ctx);
}
}
}
}
internal struct SPMDScalerJobWrapper<T, TNumber> : IJobParallelFor
where T : unmanaged, IJobSPMD<TNumber>
where TNumber : unmanaged, INumber<TNumber>, IBinaryNumber<TNumber>, IMinMaxValue<TNumber>, IBitwiseOperators<TNumber, TNumber, TNumber>
{
public T innerJob;
public int totalCount;
public void Execute(int loopIndex, ref readonly JobExecutionContext ctx)
{
innerJob.Execute<ScalarLane<TNumber>>(loopIndex, in ctx);
}
}
public static class IJobParallelForSPMDExtensions
{
public static void Run<T, TNumber>(this ref T job, int totalCount, ref readonly JobExecutionContext ctx)
where T : struct, IJobSPMD<TNumber>
where TNumber : unmanaged, INumber<TNumber>, IBinaryNumber<TNumber>, IMinMaxValue<TNumber>, IBitwiseOperators<TNumber, TNumber, TNumber>
{
if (WideLane.IsSupported)
{
var iterations = (totalCount + WideLane<TNumber>.LaneWidth - 1) / WideLane<TNumber>.LaneWidth;
for (var loopIndex = 0; loopIndex < iterations; loopIndex++)
{
var baseIndex = loopIndex * WideLane<TNumber>.LaneWidth;
var remaining = totalCount - baseIndex;
if (remaining >= WideLane<TNumber>.LaneWidth)
{
job.Execute<WideLane<TNumber>>(baseIndex, in ctx);
}
else
{
for (var i = 0; i < remaining; i++)
{
job.Execute<ScalarLane<TNumber>>(baseIndex + i, in ctx);
}
}
}
}
else
{
for (var loopIndex = 0; loopIndex < totalCount; loopIndex++)
{
job.Execute<ScalarLane<TNumber>>(loopIndex, in ctx);
}
}
}
public static JobHandle ScheduleParallelSPDM<T, TNumber>(this JobScheduler jobScheduler, ref T job, int totalCount, int batchSize, bool preferLocal, params ReadOnlySpan<JobHandle> dependencies)
where T : unmanaged, IJobSPMD<TNumber>
where TNumber : unmanaged, INumber<TNumber>, IBinaryNumber<TNumber>, IMinMaxValue<TNumber>, IBitwiseOperators<TNumber, TNumber, TNumber>
{
if (WideLane.IsSupported)
{
var warper = new SPMDJobWrapper<T, TNumber>
{
innerJob = job,
totalCount = totalCount,
};
var iterations = (totalCount + WideLane<TNumber>.LaneWidth - 1) / WideLane<TNumber>.LaneWidth;
return jobScheduler.ScheduleParallelFor(ref warper, iterations, batchSize, preferLocal, dependencies);
}
else
{
var warper = new SPMDScalerJobWrapper<T, TNumber>
{
innerJob = job,
totalCount = totalCount,
};
return jobScheduler.ScheduleParallelFor(ref warper, totalCount, batchSize, preferLocal, dependencies);
}
}
}

View File

@@ -157,6 +157,26 @@ public interface ISPMD<TSelf, TNumber> : ISPMD, IEquatable<TSelf>
/// <returns>The backing vector representation.</returns> /// <returns>The backing vector representation.</returns>
Vector<TNumber> AsVector(); Vector<TNumber> AsVector();
/// <summary>
/// Casts the lane value to another SPMD lane type with a different underlying numeric type.
/// </summary>
/// <typeparam name="TOther">The type of the other SPMD lane.</typeparam>
/// <typeparam name="TOtherNumber">The underlying numeric type of the other SPMD lane.</typeparam>
/// <returns>The casted lane value.</returns>
TOther Cast<TOther, TOtherNumber>()
where TOther : ISPMD<TOther, TOtherNumber>
where TOtherNumber : unmanaged, INumber<TOtherNumber>, IBinaryNumber<TOtherNumber>, IMinMaxValue<TOtherNumber>, IBitwiseOperators<TOtherNumber, TOtherNumber, TOtherNumber>;
/// <summary>
/// Bitwise reinterprets the lane value as another SPMD lane type with a different underlying numeric type.
/// </summary>
/// <typeparam name="TOther">The type of the other SPMD lane.</typeparam>
/// <typeparam name="TOtherNumber">The underlying numeric type of the other SPMD lane.</typeparam>
/// <returns>The bit-cast lane value.</returns>
TOther BitCast<TOther, TOtherNumber>()
where TOther : ISPMD<TOther, TOtherNumber>
where TOtherNumber : unmanaged, INumber<TOtherNumber>, IBinaryNumber<TOtherNumber>, IMinMaxValue<TOtherNumber>, IBitwiseOperators<TOtherNumber, TOtherNumber, TOtherNumber>;
/// <summary> /// <summary>
/// Adds two lane values element-wise. /// Adds two lane values element-wise.
/// </summary> /// </summary>

View File

@@ -7,7 +7,7 @@
<AllowUnsafeBlocks>true</AllowUnsafeBlocks> <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
<GeneratePackageOnBuild>true</GeneratePackageOnBuild> <GeneratePackageOnBuild>true</GeneratePackageOnBuild>
<Authors>Misaki</Authors> <Authors>Misaki</Authors>
<AssemblyVersion>1.2.0</AssemblyVersion> <AssemblyVersion>1.3.0</AssemblyVersion>
<Version>$(AssemblyVersion)</Version> <Version>$(AssemblyVersion)</Version>
<PackageProjectUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</PackageProjectUrl> <PackageProjectUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</PackageProjectUrl>
<RepositoryUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</RepositoryUrl> <RepositoryUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</RepositoryUrl>
@@ -27,15 +27,20 @@
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<None Include="Templates\IJobSPMD.gen.cs">
<DesignTime>True</DesignTime>
<AutoGen>True</AutoGen>
<DependentUpon>IJobSPMD.tt</DependentUpon>
</None>
<None Include="Templates\Vector2.gen.cs"> <None Include="Templates\Vector2.gen.cs">
<DesignTime>True</DesignTime> <DesignTime>True</DesignTime>
<AutoGen>True</AutoGen> <AutoGen>True</AutoGen>
<DependentUpon>Vector2.tt</DependentUpon> <DependentUpon>Vector2.tt</DependentUpon>
</None> </None>
<None Include="Templates\Vector{T}Helper.cs"> <None Include="Templates\WideLane.gen.cs">
<DesignTime>True</DesignTime> <DesignTime>True</DesignTime>
<AutoGen>True</AutoGen> <AutoGen>True</AutoGen>
<DependentUpon>Vector{T}Helper.ttinclude</DependentUpon> <DependentUpon>WideLane.tt</DependentUpon>
</None> </None>
</ItemGroup> </ItemGroup>
@@ -44,6 +49,10 @@
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<None Update="Templates\IJobSPMD.tt">
<Generator>TextTemplatingFileGenerator</Generator>
<LastGenOutput>IJobSPMD.gen.cs</LastGenOutput>
</None>
<None Update="Templates\MathV.Vector.tt"> <None Update="Templates\MathV.Vector.tt">
<Generator>TextTemplatingFileGenerator</Generator> <Generator>TextTemplatingFileGenerator</Generator>
<LastGenOutput>MathV.Vector.gen.cs</LastGenOutput> <LastGenOutput>MathV.Vector.gen.cs</LastGenOutput>
@@ -60,9 +69,9 @@
<Generator>TextTemplatingFileGenerator</Generator> <Generator>TextTemplatingFileGenerator</Generator>
<LastGenOutput>Vector2.gen.cs</LastGenOutput> <LastGenOutput>Vector2.gen.cs</LastGenOutput>
</None> </None>
<None Update="Templates\Vector{T}Helper.ttinclude"> <None Update="Templates\WideLane.tt">
<Generator>TextTemplatingFileGenerator</Generator> <Generator>TextTemplatingFileGenerator</Generator>
<LastGenOutput>Vector{T}Helper.cs</LastGenOutput> <LastGenOutput>WideLane.gen.cs</LastGenOutput>
</None> </None>
</ItemGroup> </ItemGroup>
@@ -71,6 +80,11 @@
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<Compile Update="Templates\IJobSPMD.gen.cs">
<DesignTime>True</DesignTime>
<AutoGen>True</AutoGen>
<DependentUpon>IJobSPMD.tt</DependentUpon>
</Compile>
<Compile Update="Templates\MathV.Vector.gen.cs"> <Compile Update="Templates\MathV.Vector.gen.cs">
<DesignTime>True</DesignTime> <DesignTime>True</DesignTime>
<AutoGen>True</AutoGen> <AutoGen>True</AutoGen>
@@ -91,10 +105,10 @@
<AutoGen>True</AutoGen> <AutoGen>True</AutoGen>
<DependentUpon>Vector4.tt</DependentUpon> <DependentUpon>Vector4.tt</DependentUpon>
</Compile> </Compile>
<Compile Update="Templates\Vector{T}Helper.cs"> <Compile Update="Templates\WideLane.gen.cs">
<DesignTime>True</DesignTime> <DesignTime>True</DesignTime>
<AutoGen>True</AutoGen> <AutoGen>True</AutoGen>
<DependentUpon>Vector{T}Helper.ttinclude</DependentUpon> <DependentUpon>WideLane.tt</DependentUpon>
</Compile> </Compile>
</ItemGroup> </ItemGroup>

View File

@@ -123,6 +123,22 @@ public readonly unsafe struct ScalarLane<TNumber> : ISPMD<ScalarLane<TNumber>, T
return Vector.Create(value); return Vector.Create(value);
} }
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public TOther Cast<TOther, TOtherNumber>()
where TOther : ISPMD<TOther, TOtherNumber>
where TOtherNumber : unmanaged, INumber<TOtherNumber>, IBinaryNumber<TOtherNumber>, IMinMaxValue<TOtherNumber>, IBitwiseOperators<TOtherNumber, TOtherNumber, TOtherNumber>
{
return TOther.Create(TOtherNumber.CreateChecked(value));
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public TOther BitCast<TOther, TOtherNumber>()
where TOther : ISPMD<TOther, TOtherNumber>
where TOtherNumber : unmanaged, INumber<TOtherNumber>, IBinaryNumber<TOtherNumber>, IMinMaxValue<TOtherNumber>, IBitwiseOperators<TOtherNumber, TOtherNumber, TOtherNumber>
{
return Unsafe.BitCast<ScalarLane<TNumber>, TOther>(this);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<TNumber> operator +(ScalarLane<TNumber> a, ScalarLane<TNumber> b) public static ScalarLane<TNumber> operator +(ScalarLane<TNumber> a, ScalarLane<TNumber> b)
{ {

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,204 @@
<#@ template debug="false" hostspecific="false" language="C#" #>
<#@ assembly name="System.Core" #>
<#@ import namespace="System.Linq" #>
<#@ import namespace="System.Text" #>
<#@ import namespace="System.Collections.Generic" #>
<#@ output extension="gen.cs" #>
using Misaki.HighPerformance.Jobs;
using System.Numerics;
namespace Misaki.HighPerformance.Mathematics.SPMD;
<#
const string TLane = "TLane";
const string TNumber = "TNumber";
const string GenericParameters = $"{TLane}, {TNumber}";
var TLaneRestrictions = $@"where {TLane} : ISPMD<{TLane}, {TNumber}>";
var TNumberRestrictions = $@"where {TNumber} : unmanaged, INumber<{TNumber}>, IBinaryNumber<{TNumber}>, IMinMaxValue<{TNumber}>, IBitwiseOperators<{TNumber}, {TNumber}, {TNumber}>";
for (var i = 0; i < 8; i++) { #>
/// <summary>
/// A job interface for Single Program Multiple Data (SPMD) execution, allowing for efficient parallel processing of data across multiple lanes.
/// </summary>
/// <remarks>
/// Always use TNumber0 as the primary type for determining lane width and job scheduling, even if it's not used in the job execution.
/// </remarks>
<#= ForEachDimension(i + 1, j => @$"/// <typeparam name=""TNumber{j}"">The first numeric type used in the SPMD job.</typeparam>", Environment.NewLine) #>
public interface IJobSPMD<<#= ForEachDimension(i + 1, j => $"TNumber{j}") #>>
<#= GetTNumberRestrictions(i + 1) #>
{
void Execute<<#= ForEachDimension(i + 1, j => $"TLane{j}") #>>(int baseIndex, ref readonly JobExecutionContext ctx)
<#= GetTLaneRestrictions(i + 1, " ") #>;
}
internal struct SPMDJobWrapper<T, <#= ForEachDimension(i + 1, j => $"TNumber{j}") #>> : IJobParallelFor
where T : unmanaged, IJobSPMD<<#= ForEachDimension(i + 1, j => $"TNumber{j}") #>>
<#= GetTNumberRestrictions(i + 1) #>
{
public T innerJob;
public int totalIteration;
public void Execute(int loopIndex, ref readonly JobExecutionContext ctx)
{
var baseIndex = loopIndex * WideLane<TNumber0>.LaneWidth;
var remaining = totalIteration - baseIndex;
if (remaining >= WideLane<TNumber0>.LaneWidth)
{
innerJob.Execute<<#= ForEachDimension(i + 1, j => $"WideLane<TNumber{j}>") #>>(baseIndex, in ctx);
}
else
{
for (var j = 0; j < remaining; j++)
{
innerJob.Execute<<#= ForEachDimension(i + 1, j => $"ScalarLane<TNumber{j}>") #>>(baseIndex + j, in ctx);
}
}
}
}
internal struct SPMDScalerJobWrapper<T, <#= ForEachDimension(i + 1, j => $"TNumber{j}") #>> : IJobParallelFor
where T : unmanaged, IJobSPMD<<#= ForEachDimension(i + 1, j => $"TNumber{j}") #>>
<#= GetTNumberRestrictions(i + 1) #>
{
public T innerJob;
public int totalIteration;
public void Execute(int loopIndex, ref readonly JobExecutionContext ctx)
{
innerJob.Execute<<#= ForEachDimension(i + 1, j => $"ScalarLane<TNumber{j}>") #>>(loopIndex, in ctx);
}
}
<# } #>
public static class IJobParallelForSPMDExtensions
{
<# for (var i = 0; i < 8; i++) { #>
/// <summary>
/// Run the SPMD job with the specified total count and job execution context directly on the calling thread.
/// </summary>
/// <remarks>
/// Always use TNumber0 as the primary type for determining lane width and job scheduling, even if it's not used in the job execution.
/// </remarks>
<#= ForEachDimension(i + 1, j => @$" /// <typeparam name=""TNumber{j}"">The first numeric type used in the SPMD job.</typeparam>", Environment.NewLine) #>
/// <param name="job">The SPMD job to run.</param>
/// <param name="totalIteration">The total number of iterations to execute across all lanes.</param>
/// <param name="ctx">The job execution context providing information about the current execution environment.</param>
public static void Run<T, <#= ForEachDimension(i + 1, j => $"TNumber{j}") #>>(this ref T job, int totalIteration, ref readonly JobExecutionContext ctx)
where T : struct, IJobSPMD<<#= ForEachDimension(i + 1, j => $"TNumber{j}") #>>
<#= GetTNumberRestrictions(i + 1) #>
{
if (WideLane.IsSupported)
{
var iterations = (totalIteration + WideLane<TNumber0>.LaneWidth - 1) / WideLane<TNumber0>.LaneWidth;
for (var loopIndex = 0; loopIndex < iterations; loopIndex++)
{
var baseIndex = loopIndex * WideLane<TNumber0>.LaneWidth;
var remaining = totalIteration - baseIndex;
if (remaining >= WideLane<TNumber0>.LaneWidth)
{
job.Execute<<#= ForEachDimension(i + 1, j => $"WideLane<TNumber{j}>") #>>(baseIndex, in ctx);
}
else
{
for (var i = 0; i < remaining; i++)
{
job.Execute<<#= ForEachDimension(i + 1, j => $"ScalarLane<TNumber{j}>") #>>(baseIndex + i, in ctx);
}
}
}
}
else
{
for (var loopIndex = 0; loopIndex < totalIteration; loopIndex++)
{
job.Execute<<#= ForEachDimension(i + 1, j => $"ScalarLane<TNumber{j}>") #>>(loopIndex, in ctx);
}
}
}
/// <summary>
/// Schedule the SPMD job for parallel execution across multiple threads, with the specified total count, batch size, and job execution context.
/// </summary>
<#= ForEachDimension(i + 1, j => @$" /// <typeparam name=""TNumber{j}"">The first numeric type used in the SPMD job.</typeparam>", Environment.NewLine) #>
/// <remarks>
/// Always use TNumber0 as the primary type for determining lane width and job scheduling, even if it's not used in the job execution.
/// </remarks>
/// <param name="jobScheduler">The job scheduler to use for scheduling the job.</param>
/// <param name="job">The SPMD job to schedule.</param>
/// <param name="totalIteration">The total number of iterations to execute across all lanes.</param>
/// <param name="batchSize">The number of iterations to execute in each batch for parallel execution.</param>
/// <param name="preferLocal">Whether to prefer scheduling the job on the local thread for better cache locality.</param>
/// <param name="priority">The priority of the job.</param>
/// <param name="dependencies">Any job handles that this job depends on, which must complete before this job can start.</param>
public static JobHandle ScheduleParallelSPDM<T, <#= ForEachDimension(i + 1, j => $"TNumber{j}") #>>(this JobScheduler jobScheduler, ref T job, int totalIteration, int batchSize, bool preferLocal, JobPriority priority, params ReadOnlySpan<JobHandle> dependencies)
where T : unmanaged, IJobSPMD<<#= ForEachDimension(i + 1, j => $"TNumber{j}") #>>
<#= GetTNumberRestrictions(i + 1) #>
{
if (WideLane.IsSupported)
{
var warper = new SPMDJobWrapper<T, <#= ForEachDimension(i + 1, j => $"TNumber{j}") #>>
{
innerJob = job,
totalIteration = totalIteration,
};
var iterations = (totalIteration + WideLane<TNumber0>.LaneWidth - 1) / WideLane<TNumber0>.LaneWidth;
return jobScheduler.ScheduleParallelFor(ref warper, iterations, batchSize, preferLocal, priority, dependencies);
}
else
{
var warper = new SPMDScalerJobWrapper<T, <#= ForEachDimension(i + 1, j => $"TNumber{j}") #>>
{
innerJob = job,
totalIteration = totalIteration,
};
return jobScheduler.ScheduleParallelFor(ref warper, totalIteration, batchSize, preferLocal, priority, dependencies);
}
}
<# } #>
}
<#+
public string ForEachDimension(int dimension, Func<int, string> action, string spliter = ", ")
{
return string.Join(spliter, Enumerable.Range(0, dimension).Select(i => action(i)));
}
public string GetTNumberRestrictions(int dimension, string space = " ")
{
var sb = new StringBuilder();
for (var i = 0; i < dimension; i++)
{
sb.Append(space + $@"where TNumber{i} : unmanaged, INumber<TNumber{i}>, IBinaryNumber<TNumber{i}>, IMinMaxValue<TNumber{i}>, IBitwiseOperators<TNumber{i}, TNumber{i}, TNumber{i}>");
if (i < dimension - 1)
{
sb.AppendLine();
}
}
return sb.ToString();
}
public string GetTLaneRestrictions(int dimension, string space = " ")
{
var sb = new StringBuilder();
for (var i = 0; i < dimension; i++)
{
sb.Append(space + $@"where TLane{i} : unmanaged, ISPMD<TLane{i}, TNumber{i}>");
if (i < dimension - 1)
{
sb.AppendLine();
}
}
return sb.ToString();
}
#>

View File

@@ -554,6 +554,7 @@ public static unsafe partial class MathV
{ {
x = TLane.Select(condition, b.x, a.x), x = TLane.Select(condition, b.x, a.x),
y = TLane.Select(condition, b.y, a.y), y = TLane.Select(condition, b.y, a.y),
z = TLane.Select(condition, b.z, a.z),
}; };
} }
@@ -566,6 +567,7 @@ public static unsafe partial class MathV
{ {
x = TLane.Select(condition.x, b.x, a.x), x = TLane.Select(condition.x, b.x, a.x),
y = TLane.Select(condition.y, b.y, a.y), y = TLane.Select(condition.y, b.y, a.y),
z = TLane.Select(condition.z, b.z, a.z),
}; };
} }
@@ -852,6 +854,8 @@ public static unsafe partial class MathV
{ {
x = TLane.Select(condition, b.x, a.x), x = TLane.Select(condition, b.x, a.x),
y = TLane.Select(condition, b.y, a.y), y = TLane.Select(condition, b.y, a.y),
z = TLane.Select(condition, b.z, a.z),
w = TLane.Select(condition, b.w, a.w),
}; };
} }
@@ -864,6 +868,8 @@ public static unsafe partial class MathV
{ {
x = TLane.Select(condition.x, b.x, a.x), x = TLane.Select(condition.x, b.x, a.x),
y = TLane.Select(condition.y, b.y, a.y), y = TLane.Select(condition.y, b.y, a.y),
z = TLane.Select(condition.z, b.z, a.z),
w = TLane.Select(condition.w, b.w, a.w),
}; };
} }

View File

@@ -297,8 +297,9 @@ public static unsafe partial class MathV
{ {
return new <#= vectorType #> return new <#= vectorType #>
{ {
x = <#= TLane #>.Select(condition, b.x, a.x), <# for (int i = 0; i < dimension; i++) { #>
y = <#= TLane #>.Select(condition, b.y, a.y), <#= components[i] #> = <#= TLane #>.Select(condition, b.<#= components[i] #>, a.<#= components[i] #>),
<# } #>
}; };
} }
@@ -309,8 +310,9 @@ public static unsafe partial class MathV
{ {
return new <#= vectorType #> return new <#= vectorType #>
{ {
x = <#= TLane #>.Select(condition.x, b.x, a.x), <# for (int i = 0; i < dimension; i++) { #>
y = <#= TLane #>.Select(condition.y, b.y, a.y), <#= components[i] #> = <#= TLane #>.Select(condition.<#= components[i] #>, b.<#= components[i] #>, a.<#= components[i] #>),
<# } #>
}; };
} }

View File

@@ -96,6 +96,16 @@ public unsafe struct Vector2<TLane, TNumber> : IEquatable<Vector2<TLane, TNumber
this.y.Store(ref y); this.y.Store(ref y);
} }
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector2<TLane, TNumber> operator -(in Vector2<TLane, TNumber> vector)
{
return new Vector2<TLane, TNumber>
{
x = -vector.x,
y = -vector.y,
};
}
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector2<TLane, TNumber> operator +(in Vector2<TLane, TNumber> left, in Vector2<TLane, TNumber> right) public static Vector2<TLane, TNumber> operator +(in Vector2<TLane, TNumber> left, in Vector2<TLane, TNumber> right)
{ {

View File

@@ -104,6 +104,17 @@ public unsafe struct Vector3<TLane, TNumber> : IEquatable<Vector3<TLane, TNumber
this.z.Store(ref z); this.z.Store(ref z);
} }
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector3<TLane, TNumber> operator -(in Vector3<TLane, TNumber> vector)
{
return new Vector3<TLane, TNumber>
{
x = -vector.x,
y = -vector.y,
z = -vector.z,
};
}
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector3<TLane, TNumber> operator +(in Vector3<TLane, TNumber> left, in Vector3<TLane, TNumber> right) public static Vector3<TLane, TNumber> operator +(in Vector3<TLane, TNumber> left, in Vector3<TLane, TNumber> right)
{ {

View File

@@ -112,6 +112,18 @@ public unsafe struct Vector4<TLane, TNumber> : IEquatable<Vector4<TLane, TNumber
this.w.Store(ref w); this.w.Store(ref w);
} }
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector4<TLane, TNumber> operator -(in Vector4<TLane, TNumber> vector)
{
return new Vector4<TLane, TNumber>
{
x = -vector.x,
y = -vector.y,
z = -vector.z,
w = -vector.w,
};
}
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector4<TLane, TNumber> operator +(in Vector4<TLane, TNumber> left, in Vector4<TLane, TNumber> right) public static Vector4<TLane, TNumber> operator +(in Vector4<TLane, TNumber> left, in Vector4<TLane, TNumber> right)
{ {

View File

@@ -125,6 +125,15 @@ public unsafe struct {typeName} : IEquatable<{typeName}>
{ForEachDimension(dimension, 8, Environment.NewLine, (dim, sb) => sb.Append($"this.{components[dim]}.Store(ref {components[dim]});"))} {ForEachDimension(dimension, 8, Environment.NewLine, (dim, sb) => sb.Append($"this.{components[dim]}.Store(ref {components[dim]});"))}
}} }}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static {typeName} operator -(in {typeName} vector)
{{
return new {typeName}
{{
{ForEachDimension(dimension, 12, Environment.NewLine, (dim, sb) => sb.Append($"{components[dim]} = -vector.{components[dim]},"))}
}};
}}
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static {typeName} operator +(in {typeName} left, in {typeName} right) public static {typeName} operator +(in {typeName} left, in {typeName} right)
{{ {{

View File

@@ -0,0 +1,79 @@
using System.Numerics;
using System.Runtime.CompilerServices;
namespace Misaki.HighPerformance.Mathematics.SPMD;
public readonly unsafe partial struct WideLane<TNumber> : ISPMD<WideLane<TNumber>, TNumber>
where TNumber : unmanaged, INumber<TNumber>, IBinaryNumber<TNumber>, IMinMaxValue<TNumber>, IBitwiseOperators<TNumber, TNumber, TNumber>
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public TOther Cast<TOther, TOtherNumber>()
where TOther : ISPMD<TOther, TOtherNumber>
where TOtherNumber : unmanaged, INumber<TOtherNumber>, IBinaryNumber<TOtherNumber>, IMinMaxValue<TOtherNumber>, IBitwiseOperators<TOtherNumber, TOtherNumber, TOtherNumber>
{
if (typeof(TNumber) == typeof(float) && typeof(TOtherNumber) == typeof(int))
{
ref var vFrom = ref Unsafe.As<Vector<TNumber>, Vector<float>>(ref Unsafe.AsRef(in value));
var vTo = Vector.ConvertToInt32(vFrom);
return Unsafe.As<Vector<int>, TOther>(ref vTo);
}
if (typeof(TNumber) == typeof(float) && typeof(TOtherNumber) == typeof(uint))
{
ref var vFrom = ref Unsafe.As<Vector<TNumber>, Vector<float>>(ref Unsafe.AsRef(in value));
var vTo = Vector.ConvertToUInt32(vFrom);
return Unsafe.As<Vector<uint>, TOther>(ref vTo);
}
if (typeof(TNumber) == typeof(double) && typeof(TOtherNumber) == typeof(long))
{
ref var vFrom = ref Unsafe.As<Vector<TNumber>, Vector<double>>(ref Unsafe.AsRef(in value));
var vTo = Vector.ConvertToInt64(vFrom);
return Unsafe.As<Vector<long>, TOther>(ref vTo);
}
if (typeof(TNumber) == typeof(double) && typeof(TOtherNumber) == typeof(ulong))
{
ref var vFrom = ref Unsafe.As<Vector<TNumber>, Vector<double>>(ref Unsafe.AsRef(in value));
var vTo = Vector.ConvertToUInt64(vFrom);
return Unsafe.As<Vector<ulong>, TOther>(ref vTo);
}
if (typeof(TNumber) == typeof(int) && typeof(TOtherNumber) == typeof(float))
{
ref var vFrom = ref Unsafe.As<Vector<TNumber>, Vector<int>>(ref Unsafe.AsRef(in value));
var vTo = Vector.ConvertToSingle(vFrom);
return Unsafe.As<Vector<float>, TOther>(ref vTo);
}
if (typeof(TNumber) == typeof(uint) && typeof(TOtherNumber) == typeof(float))
{
ref var vFrom = ref Unsafe.As<Vector<TNumber>, Vector<uint>>(ref Unsafe.AsRef(in value));
var vTo = Vector.ConvertToSingle(vFrom);
return Unsafe.As<Vector<float>, TOther>(ref vTo);
}
if (typeof(TNumber) == typeof(long) && typeof(TOtherNumber) == typeof(double))
{
ref var vFrom = ref Unsafe.As<Vector<TNumber>, Vector<long>>(ref Unsafe.AsRef(in value));
var vTo = Vector.ConvertToDouble(vFrom);
return Unsafe.As<Vector<double>, TOther>(ref vTo);
}
if (typeof(TNumber) == typeof(ulong) && typeof(TOtherNumber) == typeof(double))
{
ref var vFrom = ref Unsafe.As<Vector<TNumber>, Vector<ulong>>(ref Unsafe.AsRef(in value));
var vTo = Vector.ConvertToDouble(vFrom);
return Unsafe.As<Vector<double>, TOther>(ref vTo);
}
var casted = stackalloc TOtherNumber[LaneWidth];
for (var i = 0; (i < LaneWidth) && (i < TOther.LaneWidth); i++)
{
casted[i] = TOtherNumber.CreateTruncating(value[i]);
}
return TOther.Load(casted);
}
}

View File

@@ -0,0 +1,59 @@
<#@ template debug="false" hostspecific="false" language="C#" #>
<#@ assembly name="System.Core" #>
<#@ import namespace="System.Linq" #>
<#@ import namespace="System.Text" #>
<#@ import namespace="System.Collections.Generic" #>
<#@ output extension=".gen.cs" #>
using System.Numerics;
using System.Runtime.CompilerServices;
namespace Misaki.HighPerformance.Mathematics.SPMD;
<#
var conversions = new CastRoute[]
{
new CastRoute { From = "float", To = "int", Method = "Vector.ConvertToInt32" },
new CastRoute { From = "float", To = "uint", Method = "Vector.ConvertToUInt32" },
new CastRoute { From = "double", To = "long", Method = "Vector.ConvertToInt64" },
new CastRoute { From = "double", To = "ulong", Method = "Vector.ConvertToUInt64" },
new CastRoute { From = "int", To = "float", Method = "Vector.ConvertToSingle" },
new CastRoute { From = "uint", To = "float", Method = "Vector.ConvertToSingle" },
new CastRoute { From = "long", To = "double", Method = "Vector.ConvertToDouble" },
new CastRoute { From = "ulong", To = "double", Method = "Vector.ConvertToDouble" },
};
#>
public readonly unsafe partial struct WideLane<TNumber> : ISPMD<WideLane<TNumber>, TNumber>
where TNumber : unmanaged, INumber<TNumber>, IBinaryNumber<TNumber>, IMinMaxValue<TNumber>, IBitwiseOperators<TNumber, TNumber, TNumber>
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public TOther Cast<TOther, TOtherNumber>()
where TOther : ISPMD<TOther, TOtherNumber>
where TOtherNumber : unmanaged, INumber<TOtherNumber>, IBinaryNumber<TOtherNumber>, IMinMaxValue<TOtherNumber>, IBitwiseOperators<TOtherNumber, TOtherNumber, TOtherNumber>
{
<# foreach (var c in conversions) { #>
if (typeof(TNumber) == typeof(<#= c.From #>) && typeof(TOtherNumber) == typeof(<#= c.To #>))
{
ref var vFrom = ref Unsafe.As<Vector<TNumber>, Vector<<#= c.From #>>>(ref Unsafe.AsRef(in value));
var vTo = <#= c.Method #>(vFrom);
return Unsafe.As<Vector<<#= c.To #>>, TOther>(ref vTo);
}
<# } #>
var casted = stackalloc TOtherNumber[LaneWidth];
for (var i = 0; (i < LaneWidth) && (i < TOther.LaneWidth); i++)
{
casted[i] = TOtherNumber.CreateTruncating(value[i]);
}
return TOther.Load(casted);
}
}
<#+
public struct CastRoute
{
public string From;
public string To;
public string Method;
}
#>

View File

@@ -32,7 +32,7 @@ public static unsafe class WideLane
} }
[StructLayout(LayoutKind.Sequential)] [StructLayout(LayoutKind.Sequential)]
public readonly unsafe struct WideLane<TNumber> : ISPMD<WideLane<TNumber>, TNumber> public readonly unsafe partial struct WideLane<TNumber> : ISPMD<WideLane<TNumber>, TNumber>
where TNumber : unmanaged, INumber<TNumber>, IBinaryNumber<TNumber>, IMinMaxValue<TNumber>, IBitwiseOperators<TNumber, TNumber, TNumber> where TNumber : unmanaged, INumber<TNumber>, IBinaryNumber<TNumber>, IMinMaxValue<TNumber>, IBitwiseOperators<TNumber, TNumber, TNumber>
{ {
private static readonly Vector<TNumber> s_indices; private static readonly Vector<TNumber> s_indices;
@@ -301,6 +301,15 @@ public readonly unsafe struct WideLane<TNumber> : ISPMD<WideLane<TNumber>, TNumb
return value; return value;
} }
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public TOther BitCast<TOther, TOtherNumber>()
where TOther : ISPMD<TOther, TOtherNumber>
where TOtherNumber : unmanaged, INumber<TOtherNumber>, IBinaryNumber<TOtherNumber>, IMinMaxValue<TOtherNumber>, IBitwiseOperators<TOtherNumber, TOtherNumber, TOtherNumber>
{
return Unsafe.BitCast<WideLane<TNumber>, TOther>(this);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<TNumber> operator +(WideLane<TNumber> a, WideLane<TNumber> b) public static WideLane<TNumber> operator +(WideLane<TNumber> a, WideLane<TNumber> b)
{ {

View File

@@ -0,0 +1,602 @@
using BenchmarkDotNet.Attributes;
using Misaki.HighPerformance.Image;
using Misaki.HighPerformance.Jobs;
using Misaki.HighPerformance.Mathematics;
using Misaki.HighPerformance.Mathematics.SPMD;
using SkiaSharp;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using static Misaki.HighPerformance.Mathematics.math;
namespace Misaki.HighPerformance.Test.Benchmark;
internal unsafe struct MipLevel
{
public float* data;
public uint width;
public uint height;
public int offset;
public float roughness;
}
internal unsafe struct GGXMipGenerationJobSPMD<TFloat, TInt> : IJobParallelFor
where TFloat : unmanaged, ISPMD<TFloat, float>
where TInt : unmanaged, ISPMD<TInt, int>
{
public const uint SAMPLE_COUNT = 1024u;
public ImageResultFloat image;
public MipLevel* pMipLevels;
public float* radicalInverse_VdCLut;
public int numMipLevels;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float RadicalInverse_VdC(uint bits)
{
bits = (bits << 16) | (bits >> 16);
bits = ((bits & 0x55555555u) << 1) | ((bits & 0xAAAAAAAAu) >> 1);
bits = ((bits & 0x33333333u) << 2) | ((bits & 0xCCCCCCCCu) >> 2);
bits = ((bits & 0x0F0F0F0Fu) << 4) | ((bits & 0xF0F0F0F0u) >> 4);
bits = ((bits & 0x00FF00FFu) << 8) | ((bits & 0xFF00FF00u) >> 8);
return bits * 2.3283064365386963e-10f; // bits / 0x100000000
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector2<TFloat, float> Hammersley(TFloat i, uint N, float* lut)
{
var x = i / N;
var y = TFloat.Load(lut + (int)i[0]);
return MathV.Create<TFloat, float>(x, y);
}
// --- GGX Importance Sampling ---
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector3<TFloat, float> ImportanceSampleGGX(Vector2<TFloat, float> Xi, Vector3<TFloat, float> N, float roughness)
{
var a = roughness * roughness; // Disney/Epic remap roughness for better visual linearity
var phi = 2.0f * PI * Xi.x;
// Clamp the inside of the cosTheta Sqrt to prevent NaN on division precision edges
var cosThetaInner = TFloat.Max((1.0f - Xi.y) / (1.0f + (a * a - 1.0f) * Xi.y), TFloat.Zero);
var cosTheta = TFloat.Sqrt(cosThetaInner);
// Clamp the inside of sinTheta to prevent sqrt of negative floating-point errors
var sinThetaInner = TFloat.Max(1.0f - cosTheta * cosTheta, TFloat.Zero);
var sinTheta = TFloat.Sqrt(sinThetaInner);
// Spherical to Cartesian coordinates (Halfway vector)
var (sinPhi, cosPhi) = TFloat.SinCos(phi);
var H = MathV.Create<TFloat, float>(cosPhi * sinTheta, sinPhi * sinTheta, cosTheta);
// Tangent space to World space
var mask = TFloat.Abs(N.z) < 0.999f;
var up = MathV.Select(mask, MathV.Create<TFloat, float>(0.0f, 0.0f, 1.0f), MathV.Create<TFloat, float>(1.0f, 0.0f, 0.0f));
var tangent = MathV.Normalize(MathV.Cross(up, N));
var bitangent = MathV.Cross(N, tangent);
var sampleVec = (tangent * H.x) + (bitangent * H.y) + (N * H.z);
return MathV.Normalize(sampleVec);
}
// --- Image Sampling Helpers ---
// Maps a 3D direction vector to 2D equirectangular UVs
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector2<TFloat, float> DirToEquirectangularUV(Vector3<TFloat, float> dir)
{
var u = TFloat.Atan2(dir.z, dir.x);
var v = TFloat.Asin(dir.y);
u = u / (2.0f * PI) + 0.5f;
v = v / PI + 0.5f;
return MathV.Create<TFloat, float>(u, v);
}
// Samples the source HDR image using bilinear interpolation (simplified to nearest neighbor for brevity here)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector3<TFloat, float> SampleEquirectangularMap(float* img, int w, int h, Vector3<TFloat, float> dir)
{
var uv = DirToEquirectangularUV(dir);
// Nearest neighbor pixel coordinates
var px = (uv.x * (w - 1.0f)).Cast<TInt, int>();
var py = (uv.y * (h - 1.0f)).Cast<TInt, int>();
// Clamp
px = TInt.Clamp(px, TInt.Zero, TInt.Create(w - 1));
py = TInt.Clamp(py, TInt.Zero, TInt.Create(h - 1));
// Assuming float RGB array format
var idx = (py * w + px) * 3;
var laneWidth = TFloat.LaneWidth;
var rBuffer = stackalloc float[laneWidth];
var gBuffer = stackalloc float[laneWidth];
var bBuffer = stackalloc float[laneWidth];
// Gather operation: extract scalar indices, perform random memory reads, and construct SoA buffers
for (var i = 0; i < laneWidth; i++)
{
var scalarIdx = idx[i];
rBuffer[i] = img[scalarIdx];
gBuffer[i] = img[scalarIdx + 1];
bBuffer[i] = img[scalarIdx + 2];
}
// Load the gathered contiguous arrays back into TLane types
var rLane = TFloat.Load(rBuffer);
var gLane = TFloat.Load(gBuffer);
var bLane = TFloat.Load(bBuffer);
return MathV.Create<TFloat, float>(rLane, gLane, bLane);
}
public void Execute(int loopIndex, ref readonly JobExecutionContext ctx)
{
var m = 0;
while (m < numMipLevels - 1 && loopIndex >= pMipLevels[m + 1].offset)
{
m++;
}
var span = new ReadOnlySpan<MipLevel>(pMipLevels, numMipLevels);
var pLevel = &pMipLevels[m];
var w = (int)pLevel->width;
var h = (int)pLevel->height;
var pData = pLevel->data;
var local_i = loopIndex - pLevel->offset;
var x = local_i % w;
var y = local_i / w;
var u = (float)x / (w - 1);
var v = (float)y / (h - 1);
var phi = (u - 0.5f) * 2.0f * PI;
var theta = (v - 0.5f) * PI;
sincos(theta, out var sinTheta, out var cosTheta);
sincos(phi, out var sinPhi, out var cosPhi);
var N = float3(cosTheta * cosPhi, sinTheta, cosTheta * sinPhi);
N = normalize(N);
// For split-sum, we assume View and Reflection directions equal the Normal
var V = N;
var R = N;
var vN = MathV.Create<TFloat, float>(
TFloat.Create(N.x),
TFloat.Create(N.y),
TFloat.Create(N.z)
);
var vV = MathV.Create<TFloat, float>(
TFloat.Create(V.x),
TFloat.Create(V.y),
TFloat.Create(V.z)
);
var vPrefilteredColorX = TFloat.Zero;
var vPrefilteredColorY = TFloat.Zero;
var vPrefilteredColorZ = TFloat.Zero;
var vTotalWeight = TFloat.Zero;
// 3. Monte Carlo Integration Loop
// We assume WideLane is supported in the test.
var dynamicSampleCount = (uint)max(1.0f, SAMPLE_COUNT * pLevel->roughness);
var vDynamicSampleCount = TFloat.Create(dynamicSampleCount);
for (var i = 0u; i < dynamicSampleCount; i += (uint)TFloat.LaneWidth)
{
var laneIndices = TFloat.Sequence(i, 1.0f);
var validLaneMask = laneIndices < vDynamicSampleCount;
// Generate a Hammersley random sequence point
var Xi = Hammersley(laneIndices, dynamicSampleCount, radicalInverse_VdCLut);
// Get the halfway vector based on GGX NDF
var H = ImportanceSampleGGX(Xi, vN, pLevel->roughness);
// Calculate Light direction
var L = MathV.Reflect(-vV, H);
L = MathV.Normalize(L);
var NdotL = TFloat.Max(MathV.Dot(vN, L), TFloat.Zero);
var sampleColor = SampleEquirectangularMap(image.Data, (int)image.Width, (int)image.Height, L);
NdotL &= validLaneMask;
// The Karis Average Weight: 1 / (1 + luma)
// A normal sky pixel (luma 1.0) gets a weight of 0.5.
// A sun pixel (luma 1000.0) gets a tiny weight of ~0.001, naturally suppressing it.
// This introduce bias, but significantly reduces fireflies without needing solid angle sampling or cdf inversion.
// And since this is a mip generation step, a little bias is acceptable for much better performance and stability.
var luma = MathV.Dot(sampleColor, MathV.Create<TFloat, float>(0.2126f, 0.7152f, 0.0722f));
var fireflyWeight = TFloat.One / (TFloat.One + luma);
var finalWeight = NdotL * fireflyWeight;
vPrefilteredColorX += sampleColor.x * finalWeight;
vPrefilteredColorY += sampleColor.y * finalWeight;
vPrefilteredColorZ += sampleColor.z * finalWeight;
vTotalWeight += finalWeight;
}
var totalWeight = 0.0f;
var prefilteredColor = float3(0, 0, 0);
for (var i = 0; i < TFloat.LaneWidth; i++)
{
prefilteredColor.x += vPrefilteredColorX[i];
prefilteredColor.y += vPrefilteredColorY[i];
prefilteredColor.z += vPrefilteredColorZ[i];
totalWeight += vTotalWeight[i];
}
// 4. Average the result
if (totalWeight > 0.0f)
{
prefilteredColor *= 1.0f / totalWeight;
}
// Write to output mip array
var out_idx = (y * w + x) * 3;
pData[out_idx] = prefilteredColor.x;
pData[out_idx + 1] = prefilteredColor.y;
pData[out_idx + 2] = prefilteredColor.z;
}
}
internal unsafe struct GGXMipGenerationJob : IJobParallelFor
{
public const uint SAMPLE_COUNT = 1024u;
public ImageResultFloat image;
public MipLevel* pMipLevels;
public float* radicalInverse_VdCLut;
public int numMipLevels;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float RadicalInverse_VdC(uint bits)
{
bits = (bits << 16) | (bits >> 16);
bits = ((bits & 0x55555555u) << 1) | ((bits & 0xAAAAAAAAu) >> 1);
bits = ((bits & 0x33333333u) << 2) | ((bits & 0xCCCCCCCCu) >> 2);
bits = ((bits & 0x0F0F0F0Fu) << 4) | ((bits & 0xF0F0F0F0u) >> 4);
bits = ((bits & 0x00FF00FFu) << 8) | ((bits & 0xFF00FF00u) >> 8);
return bits * 2.3283064365386963e-10f; // bits / 0x100000000
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static float2 Hammersley(uint i, uint N, float* lut)
{
return float2((float)i / N, lut[i]);
}
// --- GGX Importance Sampling ---
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static float3 ImportanceSampleGGX(float2 Xi, float3 N, float roughness)
{
var a = roughness * roughness; // Disney/Epic remap roughness for better visual linearity
var phi = 2.0f * PI * Xi.x;
var cosTheta = sqrt((1.0f - Xi.y) / (1.0f + (a * a - 1.0f) * Xi.y));
var sinTheta = sqrt(1.0f - cosTheta * cosTheta);
// Spherical to Cartesian coordinates (Halfway vector)
sincos(phi, out var sinPhi, out var cosPhi);
var H = float3(cosPhi * sinTheta, sinPhi * sinTheta, cosTheta);
// Tangent space to World space
var up = abs(N.z) < 0.999f ? float3(0.0f, 0.0f, 1.0f) : float3(1.0f, 0.0f, 0.0f);
var tangent = normalize(cross(up, N));
var bitangent = cross(N, tangent);
var sampleVec = (tangent * H.x) + (bitangent * H.y) + (N * H.z);
return normalize(sampleVec);
}
// --- Image Sampling Helpers ---
// Maps a 3D direction vector to 2D equirectangular UVs
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static float2 DirToEquirectangularUV(float3 dir)
{
var uv = float2(atan2(dir.z, dir.x), asin(dir.y));
uv.x = uv.x / (2.0f * PI) + 0.5f;
uv.y = uv.y / PI + 0.5f;
return uv;
}
// Samples the source HDR image using bilinear interpolation (simplified to nearest neighbor for brevity here)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static float3 SampleEquirectangularMap(float* img, int w, int h, float3 dir)
{
var uv = DirToEquirectangularUV(dir);
// Nearest neighbor pixel coordinates
var px = (int)(uv.x * (w - 1));
var py = (int)(uv.y * (h - 1));
// Clamp
px = clamp(px, 0, w - 1);
py = clamp(py, 0, h - 1);
// Assuming float RGB array format
var idx = (py * w + px) * 3;
return float3(img[idx], img[idx + 1], img[idx + 2]);
}
public void Execute(int loopIndex, ref readonly JobExecutionContext ctx)
{
var m = 0;
while (m < numMipLevels - 1 && loopIndex >= pMipLevels[m + 1].offset)
{
m++;
}
var pLevel = &pMipLevels[m];
var w = (int)pLevel->width;
var h = (int)pLevel->height;
var pData = pLevel->data;
var local_i = loopIndex - pLevel->offset;
var x = local_i % w;
var y = local_i / w;
var u = (float)x / (w - 1);
var v = (float)y / (h - 1);
var phi = (u - 0.5f) * 2.0f * PI;
var theta = (v - 0.5f) * PI;
sincos(theta, out var sinTheta, out var cosTheta);
sincos(phi, out var sinPhi, out var cosPhi);
var N = float3(cosTheta * cosPhi, sinTheta, cosTheta * sinPhi);
N = normalize(N);
// For split-sum, we assume View and Reflection directions equal the Normal
var V = N;
var R = N;
var prefilteredColor = float3(0, 0, 0);
var totalWeight = 0.0f;
// 3. Monte Carlo Integration Loop
var dynamicSampleCount = (uint)max(1.0f, SAMPLE_COUNT * sqrt(pLevel->roughness));
for (var i = 0u; i < dynamicSampleCount; i++)
{
// Generate a Hammersley random sequence point
var Xi = Hammersley(i, dynamicSampleCount, radicalInverse_VdCLut);
// Get the halfway vector based on GGX NDF
var H = ImportanceSampleGGX(Xi, N, pLevel->roughness);
// Calculate Light direction
var L = reflect(-V, H);
L = normalize(L);
var NdotL = max(dot(N, L), 0.0f);
// If light is above the horizon
if (NdotL > 0.0f)
{
var sampleColor = SampleEquirectangularMap(image.Data, (int)image.Width, (int)image.Height, L);
prefilteredColor += sampleColor * NdotL;
totalWeight += NdotL;
}
}
// 4. Average the result
if (totalWeight > 0.0f)
{
prefilteredColor *= 1.0f / totalWeight;
}
// Write to output mip array
var out_idx = (y * w + x) * 3;
pData[out_idx] = prefilteredColor.x;
pData[out_idx + 1] = prefilteredColor.y;
pData[out_idx + 2] = prefilteredColor.z;
}
}
public unsafe class GGXMipGenerationBenchmark
{
private ImageResultFloat _image;
private int _mipLevels;
private int _totalPixel;
private float** _pResult;
private MipLevel* _pMipLevels;
private float* radicalInverse_VdCLut;
private JobScheduler _jobScheduler = null!;
[GlobalSetup]
public void Setup()
{
//const string imagePath = "F:\\c\\SimpleRayTracer\\native\\assets\\hdri\\golden_gate_hills_1k.hdr";
const string imagePath = "C:\\Users\\Misaki\\Downloads\\grasslands_sunset_4k.hdr";
using var stream = new FileStream(imagePath, FileMode.Open, FileAccess.Read);
_image = ImageResultFloat.FromStream(stream, ColorComponents.RGB);
_mipLevels = (int)Math.Floor(Math.Log2(Math.Max(_image.Width, _image.Height))) + 1;
_pResult = (float**)NativeMemory.Alloc((nuint)(_mipLevels * sizeof(float*)));
_pMipLevels = (MipLevel*)NativeMemory.Alloc((nuint)(_mipLevels * sizeof(MipLevel)));
uint w, h;
for (var i = 0; i < _mipLevels; i++)
{
w = Math.Max(1, _image.Width >> i);
h = Math.Max(1, _image.Height >> i);
var sizeInBytes = (nuint)(w * h * 3 * sizeof(float));
_pResult[i] = (float*)NativeMemory.Alloc(sizeInBytes);
_pMipLevels[i] = new MipLevel
{
width = w,
height = h,
offset = _totalPixel,
data = _pResult[i],
roughness = (float)i / (_mipLevels - 1) // Linear roughness from 0 to 1 across mip levels
};
_totalPixel += (int)(w * h);
}
var desc = new JobSchedulerDesc
{
DependencyChainCapacity = 16,
ThreadCount = Environment.ProcessorCount - 1,
ThreadPriority = ThreadPriority.Normal,
};
radicalInverse_VdCLut = (float*)NativeMemory.Alloc(GGXMipGenerationJob.SAMPLE_COUNT * sizeof(float));
for (var i = 0u; i < GGXMipGenerationJob.SAMPLE_COUNT; i++)
{
radicalInverse_VdCLut[i] = GGXMipGenerationJob.RadicalInverse_VdC(i);
}
_jobScheduler = new JobScheduler(in desc);
}
public void DumpMipLevelToPng(float* pData, int width, int height, string filePath)
{
// Create a standard 32-bit RGBA bitmap
using var bitmap = new SKBitmap(width, height, SKColorType.Rgba8888, SKAlphaType.Opaque);
// Get a pointer to the SkiaSharp pixel buffer
var pPixels = (byte*)bitmap.GetPixels();
for (var y = 0; y < height; y++)
{
for (var x = 0; x < width; x++)
{
// Your data is tightly packed floats: R, G, B
var inIdx = (y * width + x) * 3;
var r = pData[inIdx];
var g = pData[inIdx + 1];
var b = pData[inIdx + 2];
// Basic Tone Mapping (Exposure + Gamma Correction) so we can see HDR values on a normal screen
// Gamma 2.2 = roughly pow(color, 1.0/2.2)
r = MathF.Pow(MathF.Max(0, r), 1.0f / 2.2f);
g = MathF.Pow(MathF.Max(0, g), 1.0f / 2.2f);
b = MathF.Pow(MathF.Max(0, b), 1.0f / 2.2f);
// Convert 0.0-1.0 to 0-255 byte
var rByte = (byte)Math.Clamp(r * 255.0f, 0, 255);
var gByte = (byte)Math.Clamp(g * 255.0f, 0, 255);
var bByte = (byte)Math.Clamp(b * 255.0f, 0, 255);
// Write to Skia's buffer (RGBA)
var outIdx = (y * width + x) * 4;
pPixels[outIdx] = rByte;
pPixels[outIdx + 1] = gByte;
pPixels[outIdx + 2] = bByte;
pPixels[outIdx + 3] = 255; // Alpha
}
}
// Save out the preview
using var data = bitmap.Encode(SKEncodedImageFormat.Png, 100);
using var stream = File.OpenWrite(filePath);
data.SaveTo(stream);
}
[GlobalCleanup]
public void Cleanup()
{
for (var i = 0; i < _mipLevels; i++)
{
DumpMipLevelToPng(_pResult[i], (int)_pMipLevels[i].width, (int)_pMipLevels[i].height, $"C:\\Users\\Misaki\\Downloads\\Im\\mip_level_{i}.png");
}
_image.Dispose();
for (var i = 0; i < _mipLevels; i++)
{
NativeMemory.Free(_pResult[i]);
}
NativeMemory.Free(_pResult);
NativeMemory.Free(_pMipLevels);
NativeMemory.Free(radicalInverse_VdCLut);
_jobScheduler.Dispose();
}
[Benchmark]
public void JobGGX()
{
JobHandle handle;
if (WideLane.IsSupported)
{
var job = new GGXMipGenerationJobSPMD<WideLane<float>, WideLane<int>>
{
image = _image,
pMipLevels = _pMipLevels,
numMipLevels = _mipLevels,
radicalInverse_VdCLut = radicalInverse_VdCLut
};
handle = _jobScheduler.ScheduleParallelFor(in job, _totalPixel, 64);
}
else
{
var job = new GGXMipGenerationJobSPMD<ScalarLane<float>, ScalarLane<int>>
{
image = _image,
pMipLevels = _pMipLevels,
numMipLevels = _mipLevels,
radicalInverse_VdCLut = radicalInverse_VdCLut
};
handle = _jobScheduler.ScheduleParallelFor(in job, _totalPixel, 64);
}
_jobScheduler.Wait(handle);
}
//[Benchmark]
public void ParallelGGX()
{
var job = new GGXMipGenerationJob
{
image = _image,
pMipLevels = _pMipLevels,
numMipLevels = _mipLevels,
radicalInverse_VdCLut = radicalInverse_VdCLut
};
Parallel.For(0, _totalPixel, new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount - 1 }, i =>
{
var localJob = job;
var ctx = new JobExecutionContext();
localJob.Execute(i, in ctx);
});
}
[Benchmark]
public void SingleThreadGGX()
{
var job = new GGXMipGenerationJob
{
image = _image,
pMipLevels = _pMipLevels,
numMipLevels = _mipLevels,
radicalInverse_VdCLut = radicalInverse_VdCLut
};
//var handle = _jobScheduler.ScheduleParallelFor(in job, _totalPixel, 64);
//_jobScheduler.Wait(handle);
var ctx = new JobExecutionContext();
job.Run(_totalPixel, in ctx);
}
}

View File

@@ -36,7 +36,7 @@ public unsafe class SPMDBenchmark
height = _SIZE, height = _SIZE,
}; };
var handle = _scheduler.ScheduleParallelSPDM<Jobs.NoiseJobMathSPMD, float>(ref job, _SIZE * _SIZE, 64, false); var handle = _scheduler.ScheduleParallelSPDM<Jobs.NoiseJobMathSPMD, float>(ref job, _SIZE * _SIZE, 64, false, JobPriority.Normal);
_scheduler.Wait(handle); _scheduler.Wait(handle);
} }

View File

@@ -239,7 +239,7 @@ internal unsafe struct NoiseJobMathSPMD : IJobSPMD<float>
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
private static T GradDot<T>(T ix, T iy, T fx, T fy) private static T GradDot<T>(T ix, T iy, T fx, T fy)
where T : ISPMD<T, float> where T : unmanaged, ISPMD<T, float>
{ {
var c289 = T.Create(289f); var c289 = T.Create(289f);
var c34 = T.Create(34f); var c34 = T.Create(34f);
@@ -267,7 +267,7 @@ internal unsafe struct NoiseJobMathSPMD : IJobSPMD<float>
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static T Noise<T>(T uvX, T uvY) public static T Noise<T>(T uvX, T uvY)
where T : ISPMD<T, float> where T : unmanaged, ISPMD<T, float>
{ {
var c1 = T.Create(1f); var c1 = T.Create(1f);
var c6 = T.Create(6f); var c6 = T.Create(6f);
@@ -292,7 +292,7 @@ internal unsafe struct NoiseJobMathSPMD : IJobSPMD<float>
} }
public readonly void Execute<TLane>(int baseIndex, ref readonly JobExecutionContext ctx) public readonly void Execute<TLane>(int baseIndex, ref readonly JobExecutionContext ctx)
where TLane : ISPMD<TLane, float> where TLane : unmanaged, ISPMD<TLane, float>
{ {
var indices = TLane.Sequence(baseIndex, 1f); var indices = TLane.Sequence(baseIndex, 1f);
var w = TLane.Create(width); var w = TLane.Create(width);

View File

@@ -18,8 +18,9 @@
<ItemGroup> <ItemGroup>
<PackageReference Include="BenchmarkDotNet" Version="0.15.8" /> <PackageReference Include="BenchmarkDotNet" Version="0.15.8" />
<PackageReference Include="Microsoft.VisualStudio.DiagnosticsHub.BenchmarkDotNetDiagnosers" Version="18.6.37125.3" /> <PackageReference Include="Microsoft.VisualStudio.DiagnosticsHub.BenchmarkDotNetDiagnosers" Version="18.7.37220.1" />
<PackageReference Include="MSTest" Version="4.1.0" /> <PackageReference Include="MSTest" Version="4.2.1" />
<PackageReference Include="SkiaSharp" Version="3.119.2" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>

View File

@@ -6,35 +6,18 @@ using Misaki.HighPerformance.Test.Benchmark;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
BenchmarkRunner.Run<AllocationBenchmark>(); //BenchmarkRunner.Run<GGXMipGenerationBenchmark>();
//var bench = new ParallelNoiseBenchmark(); var bench = new GGXMipGenerationBenchmark();
//bench.Setup(); bench.Setup();
//for (int i = 0; i < 4096 * 5; i++) var sw = System.Diagnostics.Stopwatch.StartNew();
//{ bench.JobGGX();
// bench.JobSystem(); sw.Stop();
//} Console.WriteLine($"GGX Mip Generation: {sw.Elapsed.TotalMilliseconds} ms");
bench.Cleanup();
//bench.Cleanup(); //Console.WriteLine(sw.Elapsed.TotalMilliseconds);
//bench.Setup();
//for (int i = 0; i < 4096 * 5; i++)
//{
// bench.JobSystem();
//}
//bench.Cleanup();
//bench.Setup();
//for (int i = 0; i < 4096 * 5; i++)
//{
// bench.JobSystem();
//}
//bench.Cleanup();
//AllocationManager.Initialize(AllocationManagerInitOpts.Default); //AllocationManager.Initialize(AllocationManagerInitOpts.Default);
//var set = new UnsafeBitSet(100, AllocationHandle.Persistent, AllocationOption.Clear); //var set = new UnsafeBitSet(100, AllocationHandle.Persistent, AllocationOption.Clear);

View File

@@ -13,7 +13,7 @@ internal unsafe struct DotProductJob : IJobSPMD<float>
public float* results; // output array (dot products) public float* results; // output array (dot products)
public readonly void Execute<TLane>(int baseIndex, ref readonly JobExecutionContext ctx) public readonly void Execute<TLane>(int baseIndex, ref readonly JobExecutionContext ctx)
where TLane : ISPMD<TLane, float> where TLane : unmanaged, ISPMD<TLane, float>
{ {
var vecA = MathV.LoadVector3<TLane, float>((float*)(arrayA + baseIndex)); var vecA = MathV.LoadVector3<TLane, float>((float*)(arrayA + baseIndex));
var vecB = MathV.LoadVector3<TLane, float>((float*)(arrayB + baseIndex)); var vecB = MathV.LoadVector3<TLane, float>((float*)(arrayB + baseIndex));
@@ -30,7 +30,7 @@ internal unsafe struct Vector2LerpJob : IJobSPMD<float>
public float[] results; public float[] results;
public readonly void Execute<TLane>(int baseIndex, ref readonly JobExecutionContext ctx) public readonly void Execute<TLane>(int baseIndex, ref readonly JobExecutionContext ctx)
where TLane : ISPMD<TLane, float> where TLane : unmanaged, ISPMD<TLane, float>
{ {
var a = MathV.LoadVector2<TLane, float>(ref arrayA[baseIndex].x); var a = MathV.LoadVector2<TLane, float>(ref arrayA[baseIndex].x);
var b = MathV.LoadVector2<TLane, float>(ref arrayB[baseIndex].x); var b = MathV.LoadVector2<TLane, float>(ref arrayB[baseIndex].x);
@@ -49,7 +49,7 @@ internal unsafe struct Vector4NormalizeJob : IJobSPMD<float>
public float4[] output; public float4[] output;
public readonly void Execute<TLane>(int baseIndex, ref readonly JobExecutionContext ctx) public readonly void Execute<TLane>(int baseIndex, ref readonly JobExecutionContext ctx)
where TLane : ISPMD<TLane, float> where TLane : unmanaged, ISPMD<TLane, float>
{ {
var vec = MathV.LoadVector4<TLane, float>(ref input[baseIndex].x); var vec = MathV.LoadVector4<TLane, float>(ref input[baseIndex].x);
var normalized = MathV.Normalize(vec); var normalized = MathV.Normalize(vec);
@@ -64,7 +64,7 @@ internal unsafe struct Vector3CrossJob : IJobSPMD<float>
public float3[] results; public float3[] results;
public readonly void Execute<TLane>(int baseIndex, ref readonly JobExecutionContext ctx) public readonly void Execute<TLane>(int baseIndex, ref readonly JobExecutionContext ctx)
where TLane : ISPMD<TLane, float> where TLane : unmanaged, ISPMD<TLane, float>
{ {
var a = MathV.LoadVector3<TLane, float>(ref arrayA[baseIndex].x); var a = MathV.LoadVector3<TLane, float>(ref arrayA[baseIndex].x);
var b = MathV.LoadVector3<TLane, float>(ref arrayB[baseIndex].x); var b = MathV.LoadVector3<TLane, float>(ref arrayB[baseIndex].x);
@@ -82,7 +82,7 @@ internal unsafe struct MinMaxClampJob : IJobSPMD<float>
public float3[] results; public float3[] results;
public readonly void Execute<TLane>(int baseIndex, ref readonly JobExecutionContext ctx) public readonly void Execute<TLane>(int baseIndex, ref readonly JobExecutionContext ctx)
where TLane : ISPMD<TLane, float> where TLane : unmanaged, ISPMD<TLane, float>
{ {
var val = MathV.LoadVector3<TLane, float>(ref values[baseIndex].x); var val = MathV.LoadVector3<TLane, float>(ref values[baseIndex].x);
var min = MathV.LoadVector3<TLane, float>(ref mins[baseIndex].x); var min = MathV.LoadVector3<TLane, float>(ref mins[baseIndex].x);
@@ -100,7 +100,7 @@ internal unsafe struct DistanceJob : IJobSPMD<float>
public float[] results; public float[] results;
public readonly void Execute<TLane>(int baseIndex, ref readonly JobExecutionContext ctx) public readonly void Execute<TLane>(int baseIndex, ref readonly JobExecutionContext ctx)
where TLane : ISPMD<TLane, float> where TLane : unmanaged, ISPMD<TLane, float>
{ {
var a = MathV.LoadVector3<TLane, float>(ref arrayA[baseIndex].x); var a = MathV.LoadVector3<TLane, float>(ref arrayA[baseIndex].x);
var b = MathV.LoadVector3<TLane, float>(ref arrayB[baseIndex].x); var b = MathV.LoadVector3<TLane, float>(ref arrayB[baseIndex].x);