Refactor SIMD gather, tighten constraints, doc & test opts
- Require TLane : unmanaged, ISPMDLane for stricter type safety and direct memory ops - Refactor GatherVectorN and WideLane<T>.Gather to use Unsafe.SkipInit and direct assignment, removing stackalloc and TLane.Load for better SIMD performance - Use Vector.Sum in WideLane<T>.ReduceAdd - Add/improve XML docs for ReduceAdd/ReduceMax/ReduceMin - Update test project for AOT, AVX2, speed optimization, and disable reflection - Tweak GGXMipGenerationBenchmark and Program.cs for improved benchmarking and output
This commit is contained in:
@@ -597,13 +597,22 @@ public unsafe interface ISPMDLane<TSelf, TNumber> : ISPMDLane, IEquatable<TSelf>
|
|||||||
static abstract TSelf Rsqrt(TSelf value);
|
static abstract TSelf Rsqrt(TSelf value);
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Horizontally reduces the lane value by adding all lanes together, returning a single-lane result.
|
/// Reduces the lane value to a single scalar by adding all lanes together.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="a"></param>
|
/// <param name="value">The lane value to reduce.</param>
|
||||||
/// <param name="b"></param>
|
/// <returns>The reduced scalar value.</returns>
|
||||||
/// <returns></returns>
|
|
||||||
static abstract TNumber ReduceAdd(TSelf value);
|
static abstract TNumber ReduceAdd(TSelf value);
|
||||||
|
/// <summary>
|
||||||
|
/// Reduces the lane value to a single scalar by finding the maximum element.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="value">The lane value to reduce.</param>
|
||||||
|
/// <returns>The reduced scalar value.</returns>
|
||||||
static abstract TNumber ReduceMax(TSelf value);
|
static abstract TNumber ReduceMax(TSelf value);
|
||||||
|
/// <summary>
|
||||||
|
/// Reduces the lane value to a single scalar by finding the minimum element.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="value">The lane value to reduce.</param>
|
||||||
|
/// <returns>The reduced scalar value.</returns>
|
||||||
static abstract TNumber ReduceMin(TSelf value);
|
static abstract TNumber ReduceMin(TSelf value);
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
||||||
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
|
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
|
||||||
<Authors>Misaki</Authors>
|
<Authors>Misaki</Authors>
|
||||||
<AssemblyVersion>1.3.2</AssemblyVersion>
|
<AssemblyVersion>1.3.3</AssemblyVersion>
|
||||||
<Version>$(AssemblyVersion)</Version>
|
<Version>$(AssemblyVersion)</Version>
|
||||||
<PackageProjectUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</PackageProjectUrl>
|
<PackageProjectUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</PackageProjectUrl>
|
||||||
<RepositoryUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</RepositoryUrl>
|
<RepositoryUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</RepositoryUrl>
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -21,7 +21,7 @@ const string GenericParameters = $"{TLane}, {TNumber}";
|
|||||||
|
|
||||||
var dimensions = new int[] { 2, 3, 4 };
|
var dimensions = new int[] { 2, 3, 4 };
|
||||||
var components = new char[] { 'x', 'y', 'z', 'w' };
|
var components = new char[] { 'x', 'y', 'z', 'w' };
|
||||||
var TLaneRestrictions = $@"where {TLane} : ISPMDLane<{TLane}, {TNumber}>";
|
var TLaneRestrictions = $@"where {TLane} : unmanaged, ISPMDLane<{TLane}, {TNumber}>";
|
||||||
var TNumberRestrictions = $@"where {TNumber} : unmanaged, INumber<{TNumber}>, IBinaryNumber<{TNumber}>, IMinMaxValue<{TNumber}>, IBitwiseOperators<{TNumber}, {TNumber}, {TNumber}>";
|
var TNumberRestrictions = $@"where {TNumber} : unmanaged, INumber<{TNumber}>, IBinaryNumber<{TNumber}>, IMinMaxValue<{TNumber}>, IBitwiseOperators<{TNumber}, {TNumber}, {TNumber}>";
|
||||||
#>
|
#>
|
||||||
|
|
||||||
@@ -126,19 +126,24 @@ public static unsafe partial class MathV
|
|||||||
<#= TLaneRestrictions #>
|
<#= TLaneRestrictions #>
|
||||||
<#= TNumberRestrictions #>
|
<#= TNumberRestrictions #>
|
||||||
{
|
{
|
||||||
var buffer = stackalloc <#= TNumber #>[TLane.LaneWidth * <#= dimension #>];
|
<# for (int i = 0; i < dimension; i++) { #>
|
||||||
|
Unsafe.SkipInit(out TLane <#= components[i] #>);
|
||||||
|
var p<#= components[i] #> = (<#= TNumber #>*)&<#= components[i] #>;
|
||||||
|
<# } #>
|
||||||
|
|
||||||
for (var i = 0; i < TLane.LaneWidth; i++)
|
for (var i = 0; i < TLane.LaneWidth; i++)
|
||||||
{
|
{
|
||||||
var scalarIdx = int.CreateTruncating(indices[i]);
|
var scalarIdx = int.CreateTruncating(indices[i]);
|
||||||
|
|
||||||
<# for (int i = 0; i < dimension; i++) { #>
|
<# for (int i = 0; i < dimension; i++) { #>
|
||||||
buffer[<#= i #> * TLane.LaneWidth + i] = pData[scalarIdx + <#= i #> * scale];
|
p<#= components[i] #>[i] = pData[scalarIdx + <#= i #> * scale];
|
||||||
<# } #>
|
<# } #>
|
||||||
}
|
}
|
||||||
|
|
||||||
return new <#= vectorType #>
|
return new Vector<#= dimension #><TLane, TNumber>
|
||||||
{
|
{
|
||||||
<# for (int i = 0; i < dimension; i++) { #>
|
<# for (int i = 0; i < dimension; i++) { #>
|
||||||
<#= components[i] #> = TLane.Load(buffer + <#= i #> * TLane.LaneWidth),
|
<#= components[i] #> = <#= components[i] #>,
|
||||||
<# } #>
|
<# } #>
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@@ -148,19 +153,24 @@ public static unsafe partial class MathV
|
|||||||
<#= TLaneRestrictions #>
|
<#= TLaneRestrictions #>
|
||||||
<#= TNumberRestrictions #>
|
<#= TNumberRestrictions #>
|
||||||
{
|
{
|
||||||
var buffer = stackalloc <#= TNumber #>[TLane.LaneWidth * <#= dimension #>];
|
<# for (int i = 0; i < dimension; i++) { #>
|
||||||
|
Unsafe.SkipInit(out TLane <#= components[i] #>);
|
||||||
|
var p<#= components[i] #> = (<#= TNumber #>*)&<#= components[i] #>;
|
||||||
|
<# } #>
|
||||||
|
|
||||||
for (var i = 0; i < TLane.LaneWidth; i++)
|
for (var i = 0; i < TLane.LaneWidth; i++)
|
||||||
{
|
{
|
||||||
var scalarIdx = pIndices[i];
|
var scalerIdx = pIndices[i];
|
||||||
|
|
||||||
<# for (int i = 0; i < dimension; i++) { #>
|
<# for (int i = 0; i < dimension; i++) { #>
|
||||||
buffer[<#= i #> * TLane.LaneWidth + i] = pData[scalarIdx + <#= i #> * scale];
|
p<#= components[i] #>[i] = pData[scalerIdx + <#= i #> * scale];
|
||||||
<# } #>
|
<# } #>
|
||||||
}
|
}
|
||||||
|
|
||||||
return new <#= vectorType #>
|
return new Vector<#= dimension #><TLane, TNumber>
|
||||||
{
|
{
|
||||||
<# for (int i = 0; i < dimension; i++) { #>
|
<# for (int i = 0; i < dimension; i++) { #>
|
||||||
<#= components[i] #> = TLane.Load(buffer + <#= i #> * TLane.LaneWidth),
|
<#= components[i] #> = <#= components[i] #>,
|
||||||
<# } #>
|
<# } #>
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@@ -170,19 +180,24 @@ public static unsafe partial class MathV
|
|||||||
<#= TLaneRestrictions #>
|
<#= TLaneRestrictions #>
|
||||||
<#= TNumberRestrictions #>
|
<#= TNumberRestrictions #>
|
||||||
{
|
{
|
||||||
var buffer = stackalloc <#= TNumber #>[TLane.LaneWidth * <#= dimension #>];
|
<# for (int i = 0; i < dimension; i++) { #>
|
||||||
|
Unsafe.SkipInit(out TLane <#= components[i] #>);
|
||||||
|
var p<#= components[i] #> = (<#= TNumber #>*)&<#= components[i] #>;
|
||||||
|
<# } #>
|
||||||
|
|
||||||
for (var i = 0; i < TLane.LaneWidth; i++)
|
for (var i = 0; i < TLane.LaneWidth; i++)
|
||||||
{
|
{
|
||||||
var scalarIdx = int.CreateTruncating(indices[i]);
|
var scalarIdx = int.CreateTruncating(indices[i]);
|
||||||
|
|
||||||
<# for (int i = 0; i < dimension; i++) { #>
|
<# for (int i = 0; i < dimension; i++) { #>
|
||||||
buffer[<#= i #> * TLane.LaneWidth + i] = Unsafe.Add(ref baseAddress, scalarIdx + <#= i #> * scale);
|
p<#= components[i] #>[i] = Unsafe.Add(ref baseAddress, scalarIdx + <#= i #> * scale);
|
||||||
<# } #>
|
<# } #>
|
||||||
}
|
}
|
||||||
|
|
||||||
return new <#= vectorType #>
|
return new Vector<#= dimension #><TLane, TNumber>
|
||||||
{
|
{
|
||||||
<# for (int i = 0; i < dimension; i++) { #>
|
<# for (int i = 0; i < dimension; i++) { #>
|
||||||
<#= components[i] #> = TLane.Load(buffer + <#= i #> * TLane.LaneWidth),
|
<#= components[i] #> = <#= components[i] #>,
|
||||||
<# } #>
|
<# } #>
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@@ -192,19 +207,24 @@ public static unsafe partial class MathV
|
|||||||
<#= TLaneRestrictions #>
|
<#= TLaneRestrictions #>
|
||||||
<#= TNumberRestrictions #>
|
<#= TNumberRestrictions #>
|
||||||
{
|
{
|
||||||
var buffer = stackalloc <#= TNumber #>[TLane.LaneWidth * <#= dimension #>];
|
<# for (int i = 0; i < dimension; i++) { #>
|
||||||
|
Unsafe.SkipInit(out TLane <#= components[i] #>);
|
||||||
|
var p<#= components[i] #> = (<#= TNumber #>*)&<#= components[i] #>;
|
||||||
|
<# } #>
|
||||||
|
|
||||||
for (var i = 0; i < TLane.LaneWidth; i++)
|
for (var i = 0; i < TLane.LaneWidth; i++)
|
||||||
{
|
{
|
||||||
var scalarIdx = Unsafe.Add(ref baseIndex, i);
|
var scalarIdx = Unsafe.Add(ref baseIndex, i);
|
||||||
|
|
||||||
<# for (int i = 0; i < dimension; i++) { #>
|
<# for (int i = 0; i < dimension; i++) { #>
|
||||||
buffer[<#= i #> * TLane.LaneWidth + i] = Unsafe.Add(ref baseAddress, scalarIdx + <#= i #> * scale);
|
p<#= components[i] #>[i] = Unsafe.Add(ref baseAddress, scalarIdx + <#= i #> * scale);
|
||||||
<# } #>
|
<# } #>
|
||||||
}
|
}
|
||||||
|
|
||||||
return new <#= vectorType #>
|
return new Vector<#= dimension #><TLane, TNumber>
|
||||||
{
|
{
|
||||||
<# for (int i = 0; i < dimension; i++) { #>
|
<# for (int i = 0; i < dimension; i++) { #>
|
||||||
<#= components[i] #> = TLane.Load(buffer + <#= i #> * TLane.LaneWidth),
|
<#= components[i] #> = <#= components[i] #>,
|
||||||
<# } #>
|
<# } #>
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -207,49 +207,69 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static WideLane<TNumber> Gather(TNumber* pData, WideLane<TNumber> indices, int scale)
|
public static WideLane<TNumber> Gather(TNumber* pData, WideLane<TNumber> indices, int scale)
|
||||||
{
|
{
|
||||||
var buffer = stackalloc TNumber[LaneWidth];
|
Unsafe.SkipInit(out Vector<TNumber> result);
|
||||||
for (var i = 0; i < LaneWidth; i++)
|
|
||||||
|
var pResult = (TNumber*)&result;
|
||||||
|
var pIndices = (TNumber*)&indices;
|
||||||
|
|
||||||
|
var count = Vector<TNumber>.Count;
|
||||||
|
for (var i = 0; i < count; i++)
|
||||||
{
|
{
|
||||||
buffer[i] = pData[int.CreateTruncating(indices[i]) * scale / sizeof(TNumber)];
|
var idx = int.CreateTruncating(pIndices[i]);
|
||||||
|
pResult[i] = pData[idx * scale / sizeof(TNumber)];
|
||||||
}
|
}
|
||||||
|
|
||||||
return new WideLane<TNumber>(Vector.Load(buffer));
|
return new WideLane<TNumber>(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static WideLane<TNumber> Gather(TNumber* pData, int* pIndices, int scale)
|
public static WideLane<TNumber> Gather(TNumber* pData, int* pIndices, int scale)
|
||||||
{
|
{
|
||||||
var buffer = stackalloc TNumber[LaneWidth];
|
Unsafe.SkipInit(out Vector<TNumber> result);
|
||||||
for (var i = 0; i < LaneWidth; i++)
|
|
||||||
|
var pResult = (TNumber*)&result;
|
||||||
|
|
||||||
|
var count = Vector<TNumber>.Count;
|
||||||
|
for (var i = 0; i < count; i++)
|
||||||
{
|
{
|
||||||
buffer[i] = pData[pIndices[i] * scale / sizeof(TNumber)];
|
pResult[i] = pData[pIndices[i] * scale / sizeof(TNumber)];
|
||||||
}
|
}
|
||||||
|
|
||||||
return new WideLane<TNumber>(Vector.Load(buffer));
|
return new WideLane<TNumber>(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static WideLane<TNumber> Gather(ref TNumber baseAddress, WideLane<TNumber> indices, int scale)
|
public static WideLane<TNumber> Gather(ref TNumber baseAddress, WideLane<TNumber> indices, int scale)
|
||||||
{
|
{
|
||||||
var buffer = stackalloc TNumber[LaneWidth];
|
Unsafe.SkipInit(out Vector<TNumber> result);
|
||||||
for (var i = 0; i < LaneWidth; i++)
|
|
||||||
|
var pResult = (TNumber*)&result;
|
||||||
|
var pIndices = (TNumber*)&indices;
|
||||||
|
|
||||||
|
var count = Vector<TNumber>.Count;
|
||||||
|
for (var i = 0; i < count; i++)
|
||||||
{
|
{
|
||||||
buffer[i] = Unsafe.Add(ref baseAddress, int.CreateTruncating(indices[i]) * scale / sizeof(TNumber));
|
var idx = int.CreateTruncating(pIndices[i]);
|
||||||
|
pResult[i] = Unsafe.Add(ref baseAddress, idx * scale / sizeof(TNumber));
|
||||||
}
|
}
|
||||||
|
|
||||||
return new WideLane<TNumber>(Vector.Load(buffer));
|
return new WideLane<TNumber>(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static WideLane<TNumber> Gather(ref TNumber baseAddress, ref int baseIndex, int scale)
|
public static WideLane<TNumber> Gather(ref TNumber baseAddress, ref int baseIndex, int scale)
|
||||||
{
|
{
|
||||||
var buffer = stackalloc TNumber[LaneWidth];
|
Unsafe.SkipInit(out Vector<TNumber> result);
|
||||||
for (var i = 0; i < LaneWidth; i++)
|
|
||||||
|
var pResult = (TNumber*)&result;
|
||||||
|
|
||||||
|
var count = Vector<TNumber>.Count;
|
||||||
|
for (var i = 0; i < count; i++)
|
||||||
{
|
{
|
||||||
buffer[i] = Unsafe.Add(ref baseAddress, Unsafe.Add(ref baseIndex, i) * scale / sizeof(TNumber));
|
pResult[i] = Unsafe.Add(ref baseAddress, Unsafe.Add(ref baseIndex, i) * scale / sizeof(TNumber));
|
||||||
}
|
}
|
||||||
|
|
||||||
return new WideLane<TNumber>(Vector.Load(buffer));
|
return new WideLane<TNumber>(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -274,11 +294,9 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public int CompressStore(WideLane<TNumber> mask, TNumber* pDestination)
|
public int CompressStore(WideLane<TNumber> mask, TNumber* pDestination)
|
||||||
{
|
{
|
||||||
var size = sizeof(TNumber);
|
|
||||||
|
|
||||||
if (LaneWidth == Vector512<TNumber>.Count && Vector512.IsHardwareAccelerated)
|
if (LaneWidth == Vector512<TNumber>.Count && Vector512.IsHardwareAccelerated)
|
||||||
{
|
{
|
||||||
if (size == 4)
|
if (sizeof(TNumber) == 4)
|
||||||
{
|
{
|
||||||
ref var vec = ref Unsafe.As<WideLane<TNumber>, Vector512<uint>>(ref Unsafe.AsRef(in this));
|
ref var vec = ref Unsafe.As<WideLane<TNumber>, Vector512<uint>>(ref Unsafe.AsRef(in this));
|
||||||
var m = Unsafe.As<WideLane<TNumber>, Vector512<uint>>(ref mask);
|
var m = Unsafe.As<WideLane<TNumber>, Vector512<uint>>(ref mask);
|
||||||
@@ -292,7 +310,7 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
|
|||||||
return BitOperations.PopCount(moveMask);
|
return BitOperations.PopCount(moveMask);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (size == 8)
|
if (sizeof(TNumber) == 8)
|
||||||
{
|
{
|
||||||
ref var vec = ref Unsafe.As<WideLane<TNumber>, Vector512<ulong>>(ref Unsafe.AsRef(in this));
|
ref var vec = ref Unsafe.As<WideLane<TNumber>, Vector512<ulong>>(ref Unsafe.AsRef(in this));
|
||||||
var m = Unsafe.As<WideLane<TNumber>, Vector512<ulong>>(ref mask);
|
var m = Unsafe.As<WideLane<TNumber>, Vector512<ulong>>(ref mask);
|
||||||
@@ -308,7 +326,7 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
|
|||||||
}
|
}
|
||||||
else if (LaneWidth == Vector256<TNumber>.Count && Vector256.IsHardwareAccelerated)
|
else if (LaneWidth == Vector256<TNumber>.Count && Vector256.IsHardwareAccelerated)
|
||||||
{
|
{
|
||||||
if (size == 4)
|
if (sizeof(TNumber) == 4)
|
||||||
{
|
{
|
||||||
ref var vec = ref Unsafe.As<WideLane<TNumber>, Vector256<uint>>(ref Unsafe.AsRef(in this));
|
ref var vec = ref Unsafe.As<WideLane<TNumber>, Vector256<uint>>(ref Unsafe.AsRef(in this));
|
||||||
var m = Unsafe.As<WideLane<TNumber>, Vector256<uint>>(ref mask);
|
var m = Unsafe.As<WideLane<TNumber>, Vector256<uint>>(ref mask);
|
||||||
@@ -322,7 +340,7 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
|
|||||||
return BitOperations.PopCount(moveMask);
|
return BitOperations.PopCount(moveMask);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (size == 8)
|
if (sizeof(TNumber) == 8)
|
||||||
{
|
{
|
||||||
ref var vec = ref Unsafe.As<WideLane<TNumber>, Vector256<ulong>>(ref Unsafe.AsRef(in this));
|
ref var vec = ref Unsafe.As<WideLane<TNumber>, Vector256<ulong>>(ref Unsafe.AsRef(in this));
|
||||||
var m = Unsafe.As<WideLane<TNumber>, Vector256<ulong>>(ref mask);
|
var m = Unsafe.As<WideLane<TNumber>, Vector256<ulong>>(ref mask);
|
||||||
@@ -340,7 +358,7 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
|
|||||||
}
|
}
|
||||||
else if (LaneWidth == Vector128<TNumber>.Count && Vector128.IsHardwareAccelerated)
|
else if (LaneWidth == Vector128<TNumber>.Count && Vector128.IsHardwareAccelerated)
|
||||||
{
|
{
|
||||||
if (size == 4)
|
if (sizeof(TNumber) == 4)
|
||||||
{
|
{
|
||||||
ref var vec = ref Unsafe.As<WideLane<TNumber>, Vector128<uint>>(ref Unsafe.AsRef(in this));
|
ref var vec = ref Unsafe.As<WideLane<TNumber>, Vector128<uint>>(ref Unsafe.AsRef(in this));
|
||||||
var m = Unsafe.As<WideLane<TNumber>, Vector128<uint>>(ref mask);
|
var m = Unsafe.As<WideLane<TNumber>, Vector128<uint>>(ref mask);
|
||||||
@@ -354,7 +372,7 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
|
|||||||
return BitOperations.PopCount(moveMask);
|
return BitOperations.PopCount(moveMask);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (size == 8)
|
if (sizeof(TNumber) == 8)
|
||||||
{
|
{
|
||||||
ref var vec = ref Unsafe.As<WideLane<TNumber>, Vector128<ulong>>(ref Unsafe.AsRef(in this));
|
ref var vec = ref Unsafe.As<WideLane<TNumber>, Vector128<ulong>>(ref Unsafe.AsRef(in this));
|
||||||
var m = Unsafe.As<WideLane<TNumber>, Vector128<ulong>>(ref mask);
|
var m = Unsafe.As<WideLane<TNumber>, Vector128<ulong>>(ref mask);
|
||||||
@@ -568,10 +586,8 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
|
|||||||
var result = Vector.FusedMultiplyAdd(va, vb, vc);
|
var result = Vector.FusedMultiplyAdd(va, vb, vc);
|
||||||
return new WideLane<TNumber>(Unsafe.As<Vector<double>, Vector<TNumber>>(ref result));
|
return new WideLane<TNumber>(Unsafe.As<Vector<double>, Vector<TNumber>>(ref result));
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
return new WideLane<TNumber>((a.value * b.value) + c.value);
|
||||||
return new WideLane<TNumber>((a.value * b.value) + c.value);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
@@ -870,7 +886,7 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
|
|||||||
var result = Vector.Exp(v);
|
var result = Vector.Exp(v);
|
||||||
return new WideLane<TNumber>(Unsafe.As<Vector<double>, Vector<TNumber>>(ref result));
|
return new WideLane<TNumber>(Unsafe.As<Vector<double>, Vector<TNumber>>(ref result));
|
||||||
}
|
}
|
||||||
|
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1041,15 +1057,7 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static TNumber ReduceAdd(WideLane<TNumber> value)
|
public static TNumber ReduceAdd(WideLane<TNumber> value)
|
||||||
{
|
{
|
||||||
// TODO: Use shuffle and add.
|
return Vector.Sum(value.value);
|
||||||
|
|
||||||
var result = TNumber.Zero;
|
|
||||||
for (var i = 0; i < LaneWidth; i++)
|
|
||||||
{
|
|
||||||
result += value[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
using BenchmarkDotNet.Attributes;
|
using BenchmarkDotNet.Attributes;
|
||||||
using BenchmarkDotNet.Diagnosers;
|
|
||||||
using BenchmarkDotNet.Engines;
|
using BenchmarkDotNet.Engines;
|
||||||
using Misaki.HighPerformance.Image;
|
using Misaki.HighPerformance.Image;
|
||||||
using Misaki.HighPerformance.Jobs;
|
using Misaki.HighPerformance.Jobs;
|
||||||
@@ -396,8 +395,8 @@ public unsafe class GGXMipGenerationBenchmark
|
|||||||
[GlobalSetup]
|
[GlobalSetup]
|
||||||
public void Setup()
|
public void Setup()
|
||||||
{
|
{
|
||||||
//const string imagePath = "F:\\c\\SimpleRayTracer\\native\\assets\\hdri\\golden_gate_hills_1k.hdr";
|
const string imagePath = "F:\\c\\SimpleRayTracer\\native\\assets\\hdri\\golden_gate_hills_1k.hdr";
|
||||||
const string imagePath = "C:\\Users\\Misaki\\Downloads\\grasslands_sunset_4k.hdr";
|
//const string imagePath = "C:\\Users\\Misaki\\Downloads\\grasslands_sunset_4k.hdr";
|
||||||
using var stream = new FileStream(imagePath, FileMode.Open, FileAccess.Read);
|
using var stream = new FileStream(imagePath, FileMode.Open, FileAccess.Read);
|
||||||
_image = ImageResultFloat.FromStream(stream, ColorComponents.RGB);
|
_image = ImageResultFloat.FromStream(stream, ColorComponents.RGB);
|
||||||
|
|
||||||
|
|||||||
@@ -6,6 +6,10 @@
|
|||||||
<ImplicitUsings>enable</ImplicitUsings>
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
<Nullable>enable</Nullable>
|
<Nullable>enable</Nullable>
|
||||||
<AllowUnsafeBlocks>True</AllowUnsafeBlocks>
|
<AllowUnsafeBlocks>True</AllowUnsafeBlocks>
|
||||||
|
<PublishAot>True</PublishAot>
|
||||||
|
<OptimizationPreference>Speed</OptimizationPreference>
|
||||||
|
<IlcInstructionSet>avx2</IlcInstructionSet>
|
||||||
|
<IlcDisableReflection>true</IlcDisableReflection>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
|
|
||||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
|
||||||
|
|||||||
@@ -8,11 +8,16 @@ using System.Runtime.InteropServices;
|
|||||||
|
|
||||||
//BenchmarkRunner.Run<GGXMipGenerationBenchmark>();
|
//BenchmarkRunner.Run<GGXMipGenerationBenchmark>();
|
||||||
|
|
||||||
const int count = 1;
|
const int count = 16;
|
||||||
|
|
||||||
var bench = new GGXMipGenerationBenchmark();
|
var bench = new GGXMipGenerationBenchmark();
|
||||||
bench.Setup();
|
bench.Setup();
|
||||||
|
|
||||||
|
for (int i = 0; i < count; i++)
|
||||||
|
{
|
||||||
|
bench.JobGGX();
|
||||||
|
}
|
||||||
|
|
||||||
var sw = System.Diagnostics.Stopwatch.StartNew();
|
var sw = System.Diagnostics.Stopwatch.StartNew();
|
||||||
|
|
||||||
for (int i = 0; i < count; i++)
|
for (int i = 0; i < count; i++)
|
||||||
@@ -22,7 +27,7 @@ for (int i = 0; i < count; i++)
|
|||||||
|
|
||||||
sw.Stop();
|
sw.Stop();
|
||||||
var avgTime = sw.Elapsed.TotalMilliseconds / count;
|
var avgTime = sw.Elapsed.TotalMilliseconds / count;
|
||||||
Console.WriteLine($"GGX Mip Generation: {avgTime} ms");
|
Console.WriteLine($"GGX Mip Generation (Inline): {avgTime} ms");
|
||||||
bench.Cleanup();
|
bench.Cleanup();
|
||||||
|
|
||||||
//AllocationManager.Initialize(AllocationManagerInitOpts.Default);
|
//AllocationManager.Initialize(AllocationManagerInitOpts.Default);
|
||||||
|
|||||||
Reference in New Issue
Block a user