Refactor allocation flags, SPMD API, and cleanup
Replaced HasFlag with HasOption for allocation flags to avoid boxing and improve performance. Added AllocationOptionExtensions. Reduced FreeListChunkSize default. Removed redundant allocation handle checks. Renamed MultipleAdd to MultiplyAdd in SPMD interfaces and implementations, updating all usages. Expanded SPMD lane interface with new mask/scatter methods and XML docs. Updated GGX jobs and allocation tests. Bumped assembly versions.
This commit is contained in:
@@ -217,17 +217,70 @@ public unsafe interface ISPMDLane<TSelf, TNumber> : ISPMDLane, IEquatable<TSelf>
|
||||
/// Implementations may use hardware-specific shuffle tables to reorder the selected lanes before storing, falling back to a scalar loop otherwise.
|
||||
/// </remarks>
|
||||
int CompressStore(TNumber* pDestination, TSelf mask);
|
||||
|
||||
/// <summary>
|
||||
/// Masks the lane value with the specified mask and stores the result to the given reference, where masked lanes are stored and unmasked lanes are left unchanged in the destination.
|
||||
/// </summary>
|
||||
/// <param name="pDestination">A pointer to the variable where the masked data will be stored.</param>
|
||||
/// <param name="mask">A mask value that determines which elements are included in the masking operation.</param>
|
||||
void MaskStore(TNumber* pDestination, TSelf mask);
|
||||
/// <summary>
|
||||
/// Masks the lane value with the specified mask and stores the result to the given reference, where masked lanes are stored and unmasked lanes are left unchanged in the destination.
|
||||
/// </summary>
|
||||
/// <param name="destination">A reference to the variable where the masked data will be stored.</param>
|
||||
/// <param name="mask">A mask value that determines which elements are included in the masking operation.</param>
|
||||
void MaskStore(ref TNumber destination, TSelf mask);
|
||||
|
||||
/// <summary>
|
||||
/// Scatters the lane value to the specified base address and indices, where each lane is stored to the address computed by adding the corresponding index (multiplied by the scale) to the base address.
|
||||
/// </summary>
|
||||
/// <param name="pDst">A pointer to the base address where the data will be scattered.</param>
|
||||
/// <param name="indices">A vector of indices that determine the destinations of each lane.</param>
|
||||
void Scatter(TNumber* pDst, TSelf indices);
|
||||
/// <summary>
|
||||
/// Scatters the lane value to the specified base address and indices, where each lane is stored to the address computed by adding the corresponding index (multiplied by the scale) to the base address.
|
||||
/// </summary>
|
||||
/// <param name="destination">A reference to the variable where the scattered data will be stored.</param>
|
||||
/// <param name="indices">A vector of indices that determine the destinations of each lane.</param>
|
||||
void Scatter(ref TNumber destination, TSelf indices);
|
||||
/// <summary>
|
||||
/// Scatters the lane value to the specified base address and indices, where each lane is stored to the address computed by adding the corresponding index (multiplied by the scale) to the base address.
|
||||
/// </summary>
|
||||
/// <param name="pDst">A pointer to the base address where the data will be scattered.</param>
|
||||
/// <param name="pIndices">A pointer to the array of indices that determine the destinations of each lane.</param>
|
||||
void Scatter(TNumber* pDst, int* pIndices);
|
||||
/// <summary>
|
||||
/// Scatters the lane value to the specified base address and indices, where each lane is stored to the address computed by adding the corresponding index (multiplied by the scale) to the base address.
|
||||
/// </summary>
|
||||
/// <param name="destination">A reference to the variable where the scattered data will be stored.</param>
|
||||
/// <param name="pIndices">A pointer to the array of indices that determine the destinations of each lane.</param>
|
||||
void Scatter(ref TNumber destination, int* pIndices);
|
||||
/// <summary>
|
||||
/// Masks the lane value with the specified mask and scatters the result to the given base address and indices, where masked lanes are stored to the address computed by adding the corresponding index (multiplied by the scale) to the base address, and unmasked lanes are left unchanged in the destination.
|
||||
/// </summary>
|
||||
/// <param name="pDst">A pointer to the base address where the data will be scattered.</param>
|
||||
/// <param name="indices">A vector of indices that determine the destinations of each lane.</param>
|
||||
/// <param name="mask">A vector of boolean values that determine which lanes to scatter.</param>
|
||||
void MaskScatter(TNumber* pDst, TSelf indices, TSelf mask);
|
||||
/// <summary>
|
||||
/// Masks the lane value with the specified mask and scatters the result to the given base address and indices, where masked lanes are stored to the address computed by adding the corresponding index (multiplied by the scale) to the base address, and unmasked lanes are left unchanged in the destination.
|
||||
/// </summary>
|
||||
/// <param name="destination">A reference to the variable where the scattered data will be stored.</param>
|
||||
/// <param name="indices">A vector of indices that determine the destinations of each lane.</param>
|
||||
/// <param name="mask">A vector of boolean values that determine which lanes to scatter.</param>
|
||||
void MaskScatter(ref TNumber destination, TSelf indices, TSelf mask);
|
||||
/// <summary>
|
||||
/// Masks the lane value with the specified mask and scatters the result to the given base address and indices, where masked lanes are stored to the address computed by adding the corresponding index (multiplied by the scale) to the base address, and unmasked lanes are left unchanged in the destination.
|
||||
/// </summary>
|
||||
/// <param name="pDst">A pointer to the base address where the data will be scattered.</param>
|
||||
/// <param name="pIndices">A pointer to the array of indices that determine the destinations of each lane.</param>
|
||||
/// <param name="mask">A vector of boolean values that determine which lanes to scatter.</param>
|
||||
void MaskScatter(TNumber* pDst, int* pIndices, TSelf mask);
|
||||
/// <summary>
|
||||
/// Masks the lane value with the specified mask and scatters the result to the given base address and indices, where masked lanes are stored to the address computed by adding the corresponding index (multiplied by the scale) to the base address, and unmasked lanes are left unchanged in the destination.
|
||||
/// </summary>
|
||||
/// <param name="destination">A reference to the variable where the scattered data will be stored.</param>
|
||||
/// <param name="pIndices">A pointer to the array of indices that determine the destinations of each lane.</param>
|
||||
/// <param name="mask">A vector of boolean values that determine which lanes to scatter.</param>
|
||||
void MaskScatter(ref TNumber destination, int* pIndices, TSelf mask);
|
||||
|
||||
/// <summary>
|
||||
@@ -423,7 +476,7 @@ public unsafe interface ISPMDLane<TSelf, TNumber> : ISPMDLane, IEquatable<TSelf>
|
||||
/// <remarks>
|
||||
/// Float and double implementations should use fused multiply-add instructions when available for both accuracy and performance.
|
||||
/// </remarks>
|
||||
static abstract TSelf MultipleAdd(TSelf a, TSelf b, TSelf c);
|
||||
static abstract TSelf MultiplyAdd(TSelf a, TSelf b, TSelf c);
|
||||
/// <summary>
|
||||
/// Returns the minimum of the two lane values element-wise.
|
||||
/// </summary>
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
||||
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
|
||||
<Authors>Misaki</Authors>
|
||||
<AssemblyVersion>1.3.7</AssemblyVersion>
|
||||
<AssemblyVersion>1.3.8</AssemblyVersion>
|
||||
<Version>$(AssemblyVersion)</Version>
|
||||
<PackageProjectUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</PackageProjectUrl>
|
||||
<RepositoryUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</RepositoryUrl>
|
||||
|
||||
@@ -446,7 +446,7 @@ public readonly unsafe struct ScalarLane<TNumber> : ISPMDLane<ScalarLane<TNumber
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static ScalarLane<TNumber> MultipleAdd(ScalarLane<TNumber> a, ScalarLane<TNumber> b, ScalarLane<TNumber> c)
|
||||
public static ScalarLane<TNumber> MultiplyAdd(ScalarLane<TNumber> a, ScalarLane<TNumber> b, ScalarLane<TNumber> c)
|
||||
{
|
||||
return new ScalarLane<TNumber>(TNumber.MultiplyAddEstimate(a.value, b.value, c.value));
|
||||
}
|
||||
|
||||
@@ -922,7 +922,7 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static WideLane<TNumber> MultipleAdd(WideLane<TNumber> a, WideLane<TNumber> b, WideLane<TNumber> c)
|
||||
public static WideLane<TNumber> MultiplyAdd(WideLane<TNumber> a, WideLane<TNumber> b, WideLane<TNumber> c)
|
||||
{
|
||||
if (typeof(TNumber) == typeof(float))
|
||||
{
|
||||
@@ -992,10 +992,10 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
|
||||
var c9 = Create(TNumber.CreateTruncating(0.08214589f)); // PI^9 / 362880
|
||||
|
||||
var z2_sin = z_sin * z_sin;
|
||||
var poly_sin = MultipleAdd(z2_sin, c9, c7); // c7 + c9*z^2
|
||||
poly_sin = MultipleAdd(z2_sin, poly_sin, c5); // c5 + z^2*(...)
|
||||
poly_sin = MultipleAdd(z2_sin, poly_sin, c3); // c3 + z^2*(...)
|
||||
poly_sin = MultipleAdd(z2_sin, poly_sin, c1); // c1 + z^2*(...)
|
||||
var poly_sin = MultiplyAdd(z2_sin, c9, c7); // c7 + c9*z^2
|
||||
poly_sin = MultiplyAdd(z2_sin, poly_sin, c5); // c5 + z^2*(...)
|
||||
poly_sin = MultiplyAdd(z2_sin, poly_sin, c3); // c3 + z^2*(...)
|
||||
poly_sin = MultiplyAdd(z2_sin, poly_sin, c1); // c1 + z^2*(...)
|
||||
poly_sin = z_sin * poly_sin; // z * (...)
|
||||
|
||||
return poly_sin * sign_sin;
|
||||
@@ -1042,10 +1042,10 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
|
||||
var c9 = Create(TNumber.CreateTruncating(0.08214589f)); // PI^9 / 362880
|
||||
|
||||
var z2_cos = z_cos * z_cos;
|
||||
var poly_cos = MultipleAdd(z2_cos, c9, c7);
|
||||
poly_cos = MultipleAdd(z2_cos, poly_cos, c5);
|
||||
poly_cos = MultipleAdd(z2_cos, poly_cos, c3);
|
||||
poly_cos = MultipleAdd(z2_cos, poly_cos, c1);
|
||||
var poly_cos = MultiplyAdd(z2_cos, c9, c7);
|
||||
poly_cos = MultiplyAdd(z2_cos, poly_cos, c5);
|
||||
poly_cos = MultiplyAdd(z2_cos, poly_cos, c3);
|
||||
poly_cos = MultiplyAdd(z2_cos, poly_cos, c1);
|
||||
poly_cos = z_cos * poly_cos;
|
||||
|
||||
return poly_cos * sign_cos;
|
||||
@@ -1117,17 +1117,17 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
|
||||
var c9 = Create(TNumber.CreateTruncating(0.08214589f)); // PI^9 / 362880
|
||||
|
||||
var z2_sin = z_sin * z_sin;
|
||||
var poly_sin = MultipleAdd(z2_sin, c9, c7); // c7 + c9*z^2
|
||||
poly_sin = MultipleAdd(z2_sin, poly_sin, c5); // c5 + z^2*(...)
|
||||
poly_sin = MultipleAdd(z2_sin, poly_sin, c3); // c3 + z^2*(...)
|
||||
poly_sin = MultipleAdd(z2_sin, poly_sin, c1); // c1 + z^2*(...)
|
||||
var poly_sin = MultiplyAdd(z2_sin, c9, c7); // c7 + c9*z^2
|
||||
poly_sin = MultiplyAdd(z2_sin, poly_sin, c5); // c5 + z^2*(...)
|
||||
poly_sin = MultiplyAdd(z2_sin, poly_sin, c3); // c3 + z^2*(...)
|
||||
poly_sin = MultiplyAdd(z2_sin, poly_sin, c1); // c1 + z^2*(...)
|
||||
poly_sin = z_sin * poly_sin; // z * (...)
|
||||
|
||||
var z2_cos = z_cos * z_cos;
|
||||
var poly_cos = MultipleAdd(z2_cos, c9, c7);
|
||||
poly_cos = MultipleAdd(z2_cos, poly_cos, c5);
|
||||
poly_cos = MultipleAdd(z2_cos, poly_cos, c3);
|
||||
poly_cos = MultipleAdd(z2_cos, poly_cos, c1);
|
||||
var poly_cos = MultiplyAdd(z2_cos, c9, c7);
|
||||
poly_cos = MultiplyAdd(z2_cos, poly_cos, c5);
|
||||
poly_cos = MultiplyAdd(z2_cos, poly_cos, c3);
|
||||
poly_cos = MultiplyAdd(z2_cos, poly_cos, c1);
|
||||
poly_cos = z_cos * poly_cos;
|
||||
|
||||
sin = poly_sin * sign_sin;
|
||||
@@ -1175,9 +1175,9 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
|
||||
var vc2 = Create(TNumber.CreateTruncating(0.1333923995)); // 2/15
|
||||
|
||||
// x2 * (c1 + c2 * x2)
|
||||
var poly = MultipleAdd(x2, vc2, vc1);
|
||||
var poly = MultiplyAdd(x2, vc2, vc1);
|
||||
// value * (1 + x2 * poly)
|
||||
return MultipleAdd(x, MultipleAdd(x2, poly, One), Zero);
|
||||
return MultiplyAdd(x, MultiplyAdd(x2, poly, One), Zero);
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
@@ -1202,9 +1202,9 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
|
||||
var c2 = Create(TNumber.CreateTruncating(0.0742610f));
|
||||
var c3 = Create(TNumber.CreateTruncating(-0.0187293f));
|
||||
|
||||
var term1 = MultipleAdd(x, c3, c2);
|
||||
var term2 = MultipleAdd(x, term1, c1);
|
||||
var poly = MultipleAdd(x, term2, c0);
|
||||
var term1 = MultiplyAdd(x, c3, c2);
|
||||
var term2 = MultiplyAdd(x, term1, c1);
|
||||
var poly = MultiplyAdd(x, term2, c0);
|
||||
|
||||
var sqrtTerm = Sqrt(One - x);
|
||||
var result = poly * sqrtTerm;
|
||||
@@ -1224,7 +1224,7 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
|
||||
var c2 = Create(TNumber.CreateTruncating(-0.19194795f));
|
||||
|
||||
var x2 = value * value;
|
||||
var poly = MultipleAdd(x2, c2, c1);
|
||||
var poly = MultiplyAdd(x2, c2, c1);
|
||||
return value * poly;
|
||||
}
|
||||
|
||||
@@ -1251,7 +1251,7 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
|
||||
var c2 = Create(TNumber.CreateTruncating(-0.19194795f));
|
||||
|
||||
// (c1 + c2 * t2)
|
||||
var poly = MultipleAdd(c2, t2, c1);
|
||||
var poly = MultiplyAdd(c2, t2, c1);
|
||||
|
||||
// result = t * poly
|
||||
var result = t * poly;
|
||||
|
||||
Reference in New Issue
Block a user