Refactor SPMD to HPC; add SIMD source generators

Major namespace migration from SPMD to HPC across all code, templates, and projects. Introduced Misaki.HighPerformance.HPC.Generator with Roslyn-based source generators for SIMD code (e.g., AVX2), including attribute and method generators. Renamed MultipleAdd to MultiplyAdd in all lanes and updated usages. Added AVX2 utility methods via codegen. Updated tests, benchmarks, and project references to use the new framework. Improved SIMD memory utilities and modernized project files. Removed legacy SPMD project from the solution.
This commit is contained in:
2026-05-06 13:43:58 +09:00
parent d3e497c7d8
commit c8f78f9d02
36 changed files with 895 additions and 130 deletions

View File

@@ -0,0 +1,164 @@
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.CSharp.Syntax;
using System;
namespace Misaki.HighPerformance.HPC.Generator
{
[Generator]
internal class AVX2UtilityGenerator : IIncrementalGenerator
{
public void Initialize(IncrementalGeneratorInitializationContext context)
{
context.RegisterPostInitializationOutput(static ctx =>
{
var source = @"
using System;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace Misaki.HighPerformance.HPC
{
public static class AVX2Utility
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<float> Asin(Vector256<float> value)
{
// asin(value) = pi/2 - acos(value)
var piOver2 = Vector256.Create(MathF.PI / 2.0f);
return Avx2.Subtract(piOver2, Acos(value));
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<float> Acos(Vector256<float> value)
{
// 0 <= value <= 1 : acos(value) = sqrt(1 - value) * (c0 + c1*value + c2*value^2 + c3*value^3)
// value < 0 : acos(value) = pi - acos(-value)
var x = Vector256.Abs(value);
var c0 = Vector256.Create(1.5707288f); // pi/2
var c1 = Vector256.Create(-0.2121144f);
var c2 = Vector256.Create(0.0742610f);
var c3 = Vector256.Create(-0.0187293f);
var term1 = Fma.MultiplyAdd(x, c3, c2);
var term2 = Fma.MultiplyAdd(x, term1, c1);
var poly = Fma.MultiplyAdd(x, term2, c0);
var sqrtTerm = Avx2.Sqrt(Avx2.Subtract(Vector256<float>.One, x));
var result = Avx2.Multiply(poly, sqrtTerm);
var pi = Vector256.Create(MathF.PI);
var isNegative = Avx2.CompareLessThan(value, Vector256<float>.Zero);
return Avx2.BlendVariable(pi, Avx2.Subtract(pi, result), isNegative);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<float> Atan2(Vector256<float> y, Vector256<float> x)
{
var absX = Vector256.Abs(x);
var absY = Vector256.Abs(y);
// 1. Determine the ratio (input to Atan)
// If |value| > |y|, we are in the ""shallow"" region, ratio = y/value
// If |y| > |value|, we are in the ""steep"" region, ratio = value/y (and we transform result)
var yGtX = Avx2.CompareGreaterThan(absY, absX);
// Select numerator and denominator to ensure ratio is always in [-1, 1]
var num = Avx2.BlendVariable(absX, absY, yGtX);
var den = Avx2.BlendVariable(absY, absX, yGtX);
var t = Avx2.Multiply(num, Avx2.Reciprocal(den)); // t is now in [0, 1]
var t2 = Avx2.Multiply(t, t);
// 2. Polynomial Approximation (Odd function: value * (c1 + c2*value^2))
var c1 = Vector256.Create(0.97239411f);
var c2 = Vector256.Create(-0.19194795f);
// (c1 + c2 * t2)
var poly = Fma.MultiplyAdd(c2, t2, c1);
// result = Avx2.Multiply(t, poly)
var result = Avx2.Multiply(t, poly);
// 3. Reconstruct the angle
// If we swapped value/y (yGtX), the identity is: atan(value/y) = PI/2 - atan(y/value)
var halfPi = Vector256.Create(1.570796327f);
result = Avx2.BlendVariable(halfPi - result, result, yGtX);
// 4. Adjust for Quadrants (Signs)
// If value < 0, we are in quadrants 2 or 3, so we need to add PI
var pi = Vector256.Create(3.141592654f);
var xLtZero = Avx2.CompareLessThan(x, Vector256<float>.Zero);
result = Avx2.BlendVariable(pi - result, result, xLtZero);
// If y < 0, the result should be negative (standard atan2 convention)
// NOTE: This sign flip strategy depends on exact polynomial range mapping,
// but typically just copy the sign of Y to the result.
var yLtZero = Avx2.CompareLessThan(y, Vector256<float>.Zero);
// If original Y was negative, negate the result
// (This works because our ratio logic effectively computed atan(|y|/|value|) above)
var negativeResult = Avx2.Subtract(Vector256<float>.Zero, result);
return Avx2.BlendVariable(negativeResult, result, yLtZero);
}
}
}";
ctx.AddSource("AVX2Utility.g.cs", source);
});
}
}
internal class AVX2Rewriter : HPCRewriter
{
public override string Name => "AVX2";
public override string GetNesessaryUsing()
{
return "using System.Runtime.Intrinsics;\nusing System.Runtime.Intrinsics.X86;";
}
protected override MathExpression RewriteMathExpression(SIMDInstruction instruction, bool isFloatingPoint)
{
switch (instruction)
{
case SIMDInstruction.Add:
break;
case SIMDInstruction.Subtract:
break;
case SIMDInstruction.Multiply:
break;
case SIMDInstruction.MultiplyAdd:
return new MathExpression
{
Expression = "Fma",
Name = "MultiplyAdd"
};
case SIMDInstruction.Asin:
return new MathExpression
{
Expression = "AVX2Utility",
Name = "Asin"
};
case SIMDInstruction.Atan2:
return new MathExpression
{
Expression = "AVX2Utility",
Name = "Atan2"
};
default:
break;
}
return default;
}
protected override void RewriteMathArguments(SIMDInstruction instruction, Span<ArgumentSyntax> originalArgs)
{
return;
}
}
}

View File

@@ -0,0 +1,274 @@
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.CSharp;
using Microsoft.CodeAnalysis.CSharp.Syntax;
using System;
using System.Collections.Generic;
using System.Linq;
namespace Misaki.HighPerformance.HPC.Generator
{
internal enum SIMDInstruction
{
Add,
Subtract,
Multiply,
MultiplyAdd,
Asin,
Atan2,
}
internal abstract class HPCRewriter : CSharpSyntaxRewriter
{
protected struct MathExpression
{
public string Expression
{
get; set;
}
public string Name
{
get; set;
}
}
public static IReadOnlyCollection<HPCRewriter> GetRewriter(TargetInstructionSet instructionSet)
{
var rewriters = new List<HPCRewriter>();
// TODO: Add more rewriters for different instruction sets
if (instructionSet.HasFlag(TargetInstructionSet.AVX2))
{
rewriters.Add(new AVX2Rewriter());
}
return rewriters;
}
private static readonly Dictionary<string, string> s_remapProperties = new()
{
["LaneWidth"] = "Count",
};
private static readonly Dictionary<string, SIMDInstruction> s_remapMath = new()
{
["Add"] = SIMDInstruction.Add,
["Subtract"] = SIMDInstruction.Subtract,
["Multiply"] = SIMDInstruction.Multiply,
["MultiplyAdd"] = SIMDInstruction.MultiplyAdd,
["Asin"] = SIMDInstruction.Asin,
["Atan2"] = SIMDInstruction.Atan2,
};
protected readonly Dictionary<string, string> spmdTypes = new();
public abstract string Name
{
get;
}
public virtual string GetNesessaryUsing()
{
return string.Empty;
}
public override SyntaxNode? VisitAttributeList(AttributeListSyntax node)
{
var filteredAttributes = SyntaxFactory.SeparatedList(
node.Attributes.Where(a => !a.Name.ToString().Contains("HPCompute"))
);
if (filteredAttributes.Count == 0)
{
return null;
}
return node.WithAttributes(filteredAttributes).WithTriviaFrom(node);
}
public override SyntaxNode? VisitMethodDeclaration(MethodDeclarationSyntax node)
{
var typesToRemove = new HashSet<string>();
// 1. Analyze constraints to identify ISPMDLane generics
foreach (var clause in node.ConstraintClauses)
{
var typeNameStr = clause.Name.Identifier.Text;
foreach (var constraint in clause.Constraints.OfType<TypeConstraintSyntax>())
{
if (constraint.Type is GenericNameSyntax genericType &&
genericType.Identifier.Text == "ISPMDLane" &&
genericType.TypeArgumentList.Arguments.Count == 2)
{
var primType = genericType.TypeArgumentList.Arguments[1].ToString();
spmdTypes[typeNameStr] = primType;
typesToRemove.Add(typeNameStr);
}
}
}
var methodToVisit = node;
// 2. Strip type parameter and constraints BEFORE visiting so VisitIdentifierName doesn't touch them
if (typesToRemove.Count > 0)
{
// Remove from <TLane0, ...> generics list
if (methodToVisit.TypeParameterList != null)
{
var newParams = methodToVisit.TypeParameterList.Parameters
.Where(p => !typesToRemove.Contains(p.Identifier.Text))
.ToList();
if (newParams.Any())
{
methodToVisit = methodToVisit.WithTypeParameterList(
SyntaxFactory.TypeParameterList(SyntaxFactory.SeparatedList(newParams))
);
}
else
{
methodToVisit = methodToVisit.WithTypeParameterList(null); // Removes angle brackets entirely
}
}
// Remove the matching `where TLane0 : ...` clause
var newConstraints = methodToVisit.ConstraintClauses
.Where(c => !typesToRemove.Contains(c.Name.Identifier.Text))
.ToList();
methodToVisit = methodToVisit.WithConstraintClauses(
SyntaxFactory.List(newConstraints)
);
}
// 3. Fallback to base to rewrite method arguments, return types, and body via our updated visitors
return base.VisitMethodDeclaration(methodToVisit);
}
public override SyntaxNode? VisitGenericName(GenericNameSyntax node)
{
if (node.Identifier.Text == "WideLane" &&
node.TypeArgumentList.Arguments.Count == 1)
{
return SyntaxFactory.GenericName("Vector256")
.WithTypeArgumentList(node.TypeArgumentList)
.WithTriviaFrom(node);
}
return base.VisitGenericName(node);
}
public override SyntaxNode? VisitIdentifierName(IdentifierNameSyntax node)
{
// Rewrites signature types and generic types from `TLane0` to `Vector256<float>`
if (spmdTypes.TryGetValue(node.Identifier.Text, out var primType))
{
return SyntaxFactory.GenericName("Vector256")
.WithTypeArgumentList(
SyntaxFactory.TypeArgumentList(
SyntaxFactory.SingletonSeparatedList<TypeSyntax>(
SyntaxFactory.IdentifierName(primType))))
.WithTriviaFrom(node);
}
return base.VisitIdentifierName(node);
}
public override SyntaxNode? VisitMemberAccessExpression(MemberAccessExpressionSyntax node)
{
var isSpmdOrWideLane = false;
var isFloatingPoint = false;
// 1. Check if the left-side expression is WideLane<...> or a tracked generic SPMD type
if (node.Expression is GenericNameSyntax genericName &&
genericName.Identifier.Text == "WideLane" &&
genericName.TypeArgumentList.Arguments.Count == 1)
{
isSpmdOrWideLane = true;
var argTypeStr = genericName.TypeArgumentList.Arguments[0].ToString();
isFloatingPoint = argTypeStr == "float" || argTypeStr == "double";
}
else if (node.Expression is IdentifierNameSyntax idName &&
spmdTypes.TryGetValue(idName.Identifier.Text, out var mappedPrimType))
{
isSpmdOrWideLane = true;
isFloatingPoint = mappedPrimType == "float" || mappedPrimType == "double";
}
if (isSpmdOrWideLane)
{
if (s_remapProperties.TryGetValue(node.Name.Identifier.Text, out var remappedName))
{
// Keep the evaluated left-hand side (TLane0 -> Vector256<float>) but change the property
var rewrittenExpression = (ExpressionSyntax)Visit(node.Expression);
return SyntaxFactory.MemberAccessExpression(
SyntaxKind.SimpleMemberAccessExpression,
rewrittenExpression,
SyntaxFactory.IdentifierName(remappedName)
).WithTriviaFrom(node);
}
if (s_remapMath.TryGetValue(node.Name.Identifier.Text, out var instruction))
{
var rewritResult = RewriteMathExpression(instruction, isFloatingPoint);
return SyntaxFactory.MemberAccessExpression(
SyntaxKind.SimpleMemberAccessExpression,
SyntaxFactory.IdentifierName(rewritResult.Expression),
SyntaxFactory.IdentifierName(rewritResult.Name)
).WithTriviaFrom(node);
}
}
return base.VisitMemberAccessExpression(node);
}
public override SyntaxNode? VisitInvocationExpression(InvocationExpressionSyntax node)
{
if (node.Expression is MemberAccessExpressionSyntax memberAccess)
{
bool isSpmdOrWideLane = false;
if (memberAccess.Expression is GenericNameSyntax genericName
&& genericName.Identifier.Text == "WideLane"
&& genericName.TypeArgumentList.Arguments.Count == 1)
{
isSpmdOrWideLane = true;
}
else if (memberAccess.Expression is IdentifierNameSyntax idName
&& spmdTypes.TryGetValue(idName.Identifier.Text, out var mappedPrimType))
{
isSpmdOrWideLane = true;
}
if (isSpmdOrWideLane)
{
var args = node.ArgumentList.Arguments;
var argList = new ArgumentSyntax[args.Count];
for (var i = 0; i < args.Count; i++)
{
argList[i] = (ArgumentSyntax)Visit(args[i]);
}
if (s_remapMath.TryGetValue(memberAccess.Name.Identifier.Text, out var instruction))
{
RewriteMathArguments(instruction, argList);
var arguments = SyntaxFactory.ArgumentList(SyntaxFactory.SeparatedList(argList));
var newExpression = (ExpressionSyntax)Visit(memberAccess);
return SyntaxFactory.InvocationExpression(newExpression, arguments)
.WithTriviaFrom(node);
}
}
}
return base.VisitInvocationExpression(node);
}
protected abstract MathExpression RewriteMathExpression(SIMDInstruction instruction, bool isFloatingPoint);
protected abstract void RewriteMathArguments(SIMDInstruction instruction, Span<ArgumentSyntax> originalArgs);
}
}

View File

@@ -0,0 +1,108 @@
using Microsoft.CodeAnalysis;
using System;
namespace Misaki.HighPerformance.HPC.Generator
{
internal enum FloatPrecision
{
Standard = 0,
High = 1,
Low = 2,
}
internal enum MathMode
{
Standard = 0,
Fast = 1,
}
[Flags]
internal enum TargetInstructionSet
{
None = 0,
SSE2 = 1 << 0,
SSE4 = 1 << 1,
AVX = 1 << 2,
AVX2 = 1 << 3,
AVX512 = 1 << 4,
}
[Generator]
public class HPComputeAttributeGenerator : IIncrementalGenerator
{
public void Initialize(IncrementalGeneratorInitializationContext context)
{
context.RegisterPostInitializationOutput(static ctx =>
{
var source = @$"
using System;
namespace Misaki.HighPerformance.HPC
{{
public enum FloatPrecision
{{
/// <summary>
/// Compute with an accuracy of 3.5 ULPs (Units in the Last Place). This is the default precision level for floating-point operations.
/// </summary>
Standard = {(int)FloatPrecision.Standard},
/// <summary>
/// Compute with an accuracy of 1 ULP. This level may use more aggressive optimizations that can lead to faster computations but with reduced precision.
/// </summary>
High = {(int)FloatPrecision.High},
/// <summary>
/// Compute with an accuracy that equals or lower than 3.5 ULPs. This level may use the most aggressive optimizations, potentially sacrificing precision for maximum performance.
/// </summary>
Low = {(int)FloatPrecision.Low},
}}
public enum MathMode
{{
/// <summary>
/// Use the default math mode, which balances performance and accuracy. This mode may allow certain optimizations that can lead to faster computations while maintaining reasonable precision.
/// </summary>
Standard = {(int)MathMode.Standard},
/// <summary>
/// Use a fast math mode, which prioritizes performance over accuracy. This mode assumes there are no special cases (like NaNs or infinities) and may allow for more aggressive optimizations.
/// </summary>
Fast = {(int)MathMode.Fast},
}}
[Flags]
public enum TargetInstructionSet
{{
None = {(int)TargetInstructionSet.None},
/// <summary>
/// Streaming SIMD Extensions 2.
/// </summary>
SSE2 = {(int)TargetInstructionSet.SSE2},
/// <summary>
/// Streaming SIMD Extensions 4.2.
/// </summary>
SSE4 = {(int)TargetInstructionSet.SSE4},
/// <summary>
/// Advanced Vector Extensions.
/// </summary>
AVX = {(int)TargetInstructionSet.AVX},
/// <summary>
/// Advanced Vector Extensions 2. Includes FMA, F16C and BMI1/2.
/// </summary>
AVX2 = {(int)TargetInstructionSet.AVX2},
/// <summary>
/// Advanced Vector Extensions 512.
/// </summary>
AVX512 = {(int)TargetInstructionSet.AVX512},
}}
[AttributeUsage(AttributeTargets.Class | AttributeTargets.Struct | AttributeTargets.Method, Inherited = false, AllowMultiple = false)]
public sealed class HPComputeAttribute : Attribute
{{
public HPComputeAttribute(TargetInstructionSet instructionSet, FloatPrecision precision = FloatPrecision.Standard, MathMode mode = MathMode.Standard)
{{
}}
}}
}}";
ctx.AddSource("HPComputeAttribute.g.cs", source);
});
}
}
}

View File

@@ -0,0 +1,109 @@
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.CSharp;
using Microsoft.CodeAnalysis.CSharp.Syntax;
using Microsoft.CodeAnalysis.Text;
using System;
using System.Collections.Immutable;
using System.Linq;
using System.Text;
namespace Misaki.HighPerformance.HPC.Generator
{
internal class HPComputeMethodInfo
{
public MethodDeclarationSyntax MethodDeclaration
{
get; set;
} = null!;
public IMethodSymbol MethodSymbol
{
get; set;
} = null!;
public TargetInstructionSet InstructionSet
{
get; set;
}
public FloatPrecision Precision
{
get; set;
}
public MathMode Mode
{
get; set;
}
}
[Generator]
public class HPComputeGenerator : IIncrementalGenerator
{
public void Initialize(IncrementalGeneratorInitializationContext context)
{
var methodDeclarations = context.SyntaxProvider
.ForAttributeWithMetadataName(
"Misaki.HighPerformance.HPC.HPComputeAttribute",
static (n, ct) => n is MethodDeclarationSyntax,
static (ctx, ct) =>
{
var attributes = ctx.Attributes.FirstOrDefault(a => a.AttributeClass?.ToDisplayString() == "Misaki.HighPerformance.HPC.HPComputeAttribute");
if (attributes != null && ctx.TargetSymbol is IMethodSymbol methodSymbol)
{
return new HPComputeMethodInfo
{
MethodDeclaration = (MethodDeclarationSyntax)ctx.TargetNode,
MethodSymbol = methodSymbol,
InstructionSet = (TargetInstructionSet)attributes.ConstructorArguments[0].Value!,
Precision = (FloatPrecision)attributes.ConstructorArguments[1].Value!,
Mode = (MathMode)attributes.ConstructorArguments[2].Value!,
};
}
return null;
})
.Collect();
context.RegisterSourceOutput(methodDeclarations, GenerateHPCMethod);
}
private void GenerateHPCMethod(SourceProductionContext context, ImmutableArray<HPComputeMethodInfo?> array)
{
if (array.IsEmpty)
{
return;
}
foreach (var info in array)
{
if (info == null)
{
continue;
}
var rewriters = HPCRewriter.GetRewriter(info.InstructionSet);
foreach (var writer in rewriters)
{
var rewrittenMethod = (MethodDeclarationSyntax)writer.Visit(info.MethodDeclaration);
var newMethod = rewrittenMethod
.WithIdentifier(SyntaxFactory.Identifier($"{info.MethodDeclaration.Identifier.Text}_{writer.Name}"));
var source = $@"
using Misaki.HighPerformance.HPC;
{writer.GetNesessaryUsing()}
namespace {info.MethodSymbol.ContainingNamespace.ToDisplayString()}
{{
partial class {info.MethodSymbol.ContainingType.Name}
{{
{newMethod.NormalizeWhitespace().ToFullString()}
}}
}}";
context.AddSource($"{info.MethodSymbol.ContainingType.Name}_{info.MethodDeclaration.Identifier.Text}_{writer.Name}.g.cs", SourceText.From(source, Encoding.UTF8));
}
}
}
}
}

View File

@@ -0,0 +1,19 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netstandard2.0</TargetFramework>
<Nullable>enable</Nullable>
<EnforceExtendedAnalyzerRules>True</EnforceExtendedAnalyzerRules>
<AllowUnsafeBlocks>True</AllowUnsafeBlocks>
<LangVersion>9.0</LangVersion>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.CodeAnalysis.Analyzers" Version="5.3.0">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
<PackageReference Include="Microsoft.CodeAnalysis.CSharp" Version="5.3.0" />
</ItemGroup>
</Project>

View File

@@ -1,7 +1,7 @@
using System.Diagnostics.CodeAnalysis; using System.Diagnostics.CodeAnalysis;
using System.Numerics; using System.Numerics;
namespace Misaki.HighPerformance.Mathematics.SPMD; namespace Misaki.HighPerformance.HPC;
/// <summary> /// <summary>
/// Common marker interface for SPMD lane types. /// Common marker interface for SPMD lane types.
@@ -423,7 +423,7 @@ public unsafe interface ISPMDLane<TSelf, TNumber> : ISPMDLane, IEquatable<TSelf>
/// <remarks> /// <remarks>
/// Float and double implementations should use fused multiply-add instructions when available for both accuracy and performance. /// Float and double implementations should use fused multiply-add instructions when available for both accuracy and performance.
/// </remarks> /// </remarks>
static abstract TSelf MultipleAdd(TSelf a, TSelf b, TSelf c); static abstract TSelf MultiplyAdd(TSelf a, TSelf b, TSelf c);
/// <summary> /// <summary>
/// Returns the minimum of the two lane values element-wise. /// Returns the minimum of the two lane values element-wise.
/// </summary> /// </summary>

View File

@@ -35,7 +35,7 @@
<ItemGroup> <ItemGroup>
<Content Include="**\*.cs" Exclude="obj\**;bin\**"> <Content Include="**\*.cs" Exclude="obj\**;bin\**">
<Pack>true</Pack> <Pack>true</Pack>
<PackagePath>contentFiles\cs\any\Misaki.HighPerformance.Mathematics.SPMD\</PackagePath> <PackagePath>contentFiles\cs\any\Misaki.HighPerformance.HPC\</PackagePath>
<PackageCopyToOutput>false</PackageCopyToOutput> <PackageCopyToOutput>false</PackageCopyToOutput>
<BuildAction>Compile</BuildAction> <BuildAction>Compile</BuildAction>
</Content> </Content>

View File

@@ -2,7 +2,7 @@ using System.Numerics;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics; using System.Runtime.Intrinsics;
namespace Misaki.HighPerformance.Mathematics.SPMD; namespace Misaki.HighPerformance.HPC;
internal static unsafe class SPMDUtility internal static unsafe class SPMDUtility
{ {

View File

@@ -3,7 +3,7 @@ using System.Numerics;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
namespace Misaki.HighPerformance.Mathematics.SPMD; namespace Misaki.HighPerformance.HPC;
[StructLayout(LayoutKind.Sequential)] [StructLayout(LayoutKind.Sequential)]
public readonly unsafe struct ScalarLane<TNumber> : ISPMDLane<ScalarLane<TNumber>, TNumber> public readonly unsafe struct ScalarLane<TNumber> : ISPMDLane<ScalarLane<TNumber>, TNumber>
@@ -446,7 +446,7 @@ public readonly unsafe struct ScalarLane<TNumber> : ISPMDLane<ScalarLane<TNumber
} }
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<TNumber> MultipleAdd(ScalarLane<TNumber> a, ScalarLane<TNumber> b, ScalarLane<TNumber> c) public static ScalarLane<TNumber> MultiplyAdd(ScalarLane<TNumber> a, ScalarLane<TNumber> b, ScalarLane<TNumber> c)
{ {
return new ScalarLane<TNumber>(TNumber.MultiplyAddEstimate(a.value, b.value, c.value)); return new ScalarLane<TNumber>(TNumber.MultiplyAddEstimate(a.value, b.value, c.value));
} }

View File

@@ -1,6 +1,6 @@
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
namespace Misaki.HighPerformance.Mathematics.SPMD; namespace Misaki.HighPerformance.HPC;
internal static unsafe class ShuffleTableGenerator internal static unsafe class ShuffleTableGenerator
{ {

View File

@@ -1,7 +1,7 @@
using Misaki.HighPerformance.Jobs; using Misaki.HighPerformance.Jobs;
using System.Numerics; using System.Numerics;
namespace Misaki.HighPerformance.Mathematics.SPMD; namespace Misaki.HighPerformance.HPC;
/// <summary> /// <summary>
/// A job interface for Single Program Multiple Data (SPMD) execution, allowing for efficient parallel processing of data across multiple lanes. /// A job interface for Single Program Multiple Data (SPMD) execution, allowing for efficient parallel processing of data across multiple lanes.

View File

@@ -7,7 +7,7 @@
using Misaki.HighPerformance.Jobs; using Misaki.HighPerformance.Jobs;
using System.Numerics; using System.Numerics;
namespace Misaki.HighPerformance.Mathematics.SPMD; namespace Misaki.HighPerformance.HPC;
<# <#
const string TLane = "TLane"; const string TLane = "TLane";

View File

@@ -7,10 +7,9 @@ using System.Diagnostics.CodeAnalysis;
using System.Numerics; using System.Numerics;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics; using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.X86; using System.Runtime.Intrinsics.X86;
namespace Misaki.HighPerformance.Mathematics.SPMD; namespace Misaki.HighPerformance.HPC;
public static unsafe partial class MathV public static unsafe partial class MathV
{ {

View File

@@ -15,7 +15,7 @@ using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics; using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86; using System.Runtime.Intrinsics.X86;
namespace Misaki.HighPerformance.Mathematics.SPMD; namespace Misaki.HighPerformance.HPC;
<# <#
const string TLane = "TLane"; const string TLane = "TLane";
const string TNumber = "TNumber"; const string TNumber = "TNumber";

View File

@@ -3,7 +3,7 @@ using System.Diagnostics;
using System.Numerics; using System.Numerics;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
namespace Misaki.HighPerformance.Mathematics.SPMD; namespace Misaki.HighPerformance.HPC;
public unsafe struct Vector2<TLane, TNumber> : IEquatable<Vector2<TLane, TNumber>> public unsafe struct Vector2<TLane, TNumber> : IEquatable<Vector2<TLane, TNumber>>
where TLane : ISPMDLane<TLane, TNumber> where TLane : ISPMDLane<TLane, TNumber>

View File

@@ -9,5 +9,5 @@ using System.Diagnostics;
using System.Numerics; using System.Numerics;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
namespace Misaki.HighPerformance.Mathematics.SPMD; namespace Misaki.HighPerformance.HPC;
<#= code #> <#= code #>

View File

@@ -3,7 +3,7 @@ using System.Diagnostics;
using System.Numerics; using System.Numerics;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
namespace Misaki.HighPerformance.Mathematics.SPMD; namespace Misaki.HighPerformance.HPC;
public unsafe struct Vector3<TLane, TNumber> : IEquatable<Vector3<TLane, TNumber>> public unsafe struct Vector3<TLane, TNumber> : IEquatable<Vector3<TLane, TNumber>>
where TLane : ISPMDLane<TLane, TNumber> where TLane : ISPMDLane<TLane, TNumber>

View File

@@ -9,5 +9,5 @@ using System.Diagnostics;
using System.Numerics; using System.Numerics;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
namespace Misaki.HighPerformance.Mathematics.SPMD; namespace Misaki.HighPerformance.HPC;
<#= code #> <#= code #>

View File

@@ -3,7 +3,7 @@ using System.Diagnostics;
using System.Numerics; using System.Numerics;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
namespace Misaki.HighPerformance.Mathematics.SPMD; namespace Misaki.HighPerformance.HPC;
public unsafe struct Vector4<TLane, TNumber> : IEquatable<Vector4<TLane, TNumber>> public unsafe struct Vector4<TLane, TNumber> : IEquatable<Vector4<TLane, TNumber>>
where TLane : ISPMDLane<TLane, TNumber> where TLane : ISPMDLane<TLane, TNumber>

View File

@@ -9,5 +9,5 @@ using System.Diagnostics;
using System.Numerics; using System.Numerics;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
namespace Misaki.HighPerformance.Mathematics.SPMD; namespace Misaki.HighPerformance.HPC;
<#= code #> <#= code #>

View File

@@ -1,7 +1,7 @@
using System.Numerics; using System.Numerics;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
namespace Misaki.HighPerformance.Mathematics.SPMD; namespace Misaki.HighPerformance.HPC;
public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNumber>, TNumber> public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNumber>, TNumber>
where TNumber : unmanaged, INumber<TNumber>, IBinaryNumber<TNumber>, IMinMaxValue<TNumber>, IBitwiseOperators<TNumber, TNumber, TNumber> where TNumber : unmanaged, INumber<TNumber>, IBinaryNumber<TNumber>, IMinMaxValue<TNumber>, IBitwiseOperators<TNumber, TNumber, TNumber>

View File

@@ -7,7 +7,7 @@
using System.Numerics; using System.Numerics;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
namespace Misaki.HighPerformance.Mathematics.SPMD; namespace Misaki.HighPerformance.HPC;
<# <#
var conversions = new CastRoute[] var conversions = new CastRoute[]
{ {

View File

@@ -5,7 +5,7 @@ using System.Runtime.InteropServices;
using System.Runtime.Intrinsics; using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86; using System.Runtime.Intrinsics.X86;
namespace Misaki.HighPerformance.Mathematics.SPMD; namespace Misaki.HighPerformance.HPC;
public static unsafe class WideLane public static unsafe class WideLane
{ {
@@ -891,7 +891,7 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
{ {
var v = Unsafe.BitCast<WideLane<TNumber>, Vector<float>>(value); var v = Unsafe.BitCast<WideLane<TNumber>, Vector<float>>(value);
var floored = Vector.Floor(v); var floored = Vector.Floor(v);
return new WideLane<TNumber>(Unsafe.BitCast<Vector<float>, Vector<TNumber>>(floored)); return Unsafe.BitCast<Vector<float>, WideLane<TNumber>>(floored);
} }
else if (typeof(TNumber) == typeof(double)) else if (typeof(TNumber) == typeof(double))
{ {
@@ -922,7 +922,7 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
} }
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<TNumber> MultipleAdd(WideLane<TNumber> a, WideLane<TNumber> b, WideLane<TNumber> c) public static WideLane<TNumber> MultiplyAdd(WideLane<TNumber> a, WideLane<TNumber> b, WideLane<TNumber> c)
{ {
if (typeof(TNumber) == typeof(float)) if (typeof(TNumber) == typeof(float))
{ {
@@ -930,7 +930,7 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
var vb = Unsafe.BitCast<WideLane<TNumber>, Vector<float>>(b); var vb = Unsafe.BitCast<WideLane<TNumber>, Vector<float>>(b);
var vc = Unsafe.BitCast<WideLane<TNumber>, Vector<float>>(c); var vc = Unsafe.BitCast<WideLane<TNumber>, Vector<float>>(c);
var result = Vector.FusedMultiplyAdd(va, vb, vc); var result = Vector.FusedMultiplyAdd(va, vb, vc);
return new WideLane<TNumber>(Unsafe.BitCast<Vector<float>, Vector<TNumber>>(result)); return Unsafe.BitCast<Vector<float>, WideLane<TNumber>>(result);
} }
else if (typeof(TNumber) == typeof(double)) else if (typeof(TNumber) == typeof(double))
{ {
@@ -938,7 +938,7 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
var vb = Unsafe.BitCast<WideLane<TNumber>, Vector<double>>(b); var vb = Unsafe.BitCast<WideLane<TNumber>, Vector<double>>(b);
var vc = Unsafe.BitCast<WideLane<TNumber>, Vector<double>>(c); var vc = Unsafe.BitCast<WideLane<TNumber>, Vector<double>>(c);
var result = Vector.FusedMultiplyAdd(va, vb, vc); var result = Vector.FusedMultiplyAdd(va, vb, vc);
return new WideLane<TNumber>(Unsafe.BitCast<Vector<double>, Vector<TNumber>>(result)); return Unsafe.BitCast<Vector<double>, WideLane<TNumber>>(result);
} }
return new WideLane<TNumber>((a.value * b.value) + c.value); return new WideLane<TNumber>((a.value * b.value) + c.value);
@@ -992,10 +992,10 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
var c9 = Create(TNumber.CreateTruncating(0.08214589f)); // PI^9 / 362880 var c9 = Create(TNumber.CreateTruncating(0.08214589f)); // PI^9 / 362880
var z2_sin = z_sin * z_sin; var z2_sin = z_sin * z_sin;
var poly_sin = MultipleAdd(z2_sin, c9, c7); // c7 + c9*z^2 var poly_sin = MultiplyAdd(z2_sin, c9, c7); // c7 + c9*z^2
poly_sin = MultipleAdd(z2_sin, poly_sin, c5); // c5 + z^2*(...) poly_sin = MultiplyAdd(z2_sin, poly_sin, c5); // c5 + z^2*(...)
poly_sin = MultipleAdd(z2_sin, poly_sin, c3); // c3 + z^2*(...) poly_sin = MultiplyAdd(z2_sin, poly_sin, c3); // c3 + z^2*(...)
poly_sin = MultipleAdd(z2_sin, poly_sin, c1); // c1 + z^2*(...) poly_sin = MultiplyAdd(z2_sin, poly_sin, c1); // c1 + z^2*(...)
poly_sin = z_sin * poly_sin; // z * (...) poly_sin = z_sin * poly_sin; // z * (...)
return poly_sin * sign_sin; return poly_sin * sign_sin;
@@ -1004,7 +1004,7 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
{ {
var v = Unsafe.BitCast<WideLane<TNumber>, Vector<float>>(value); var v = Unsafe.BitCast<WideLane<TNumber>, Vector<float>>(value);
var result = Vector.Sin(v); var result = Vector.Sin(v);
return new WideLane<TNumber>(Unsafe.BitCast<Vector<float>, Vector<TNumber>>(result)); return Unsafe.BitCast<Vector<float>, WideLane<TNumber>>(result));
} }
else if (typeof(TNumber) == typeof(double)) else if (typeof(TNumber) == typeof(double))
{ {
@@ -1042,10 +1042,10 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
var c9 = Create(TNumber.CreateTruncating(0.08214589f)); // PI^9 / 362880 var c9 = Create(TNumber.CreateTruncating(0.08214589f)); // PI^9 / 362880
var z2_cos = z_cos * z_cos; var z2_cos = z_cos * z_cos;
var poly_cos = MultipleAdd(z2_cos, c9, c7); var poly_cos = MultiplyAdd(z2_cos, c9, c7);
poly_cos = MultipleAdd(z2_cos, poly_cos, c5); poly_cos = MultiplyAdd(z2_cos, poly_cos, c5);
poly_cos = MultipleAdd(z2_cos, poly_cos, c3); poly_cos = MultiplyAdd(z2_cos, poly_cos, c3);
poly_cos = MultipleAdd(z2_cos, poly_cos, c1); poly_cos = MultiplyAdd(z2_cos, poly_cos, c1);
poly_cos = z_cos * poly_cos; poly_cos = z_cos * poly_cos;
return poly_cos * sign_cos; return poly_cos * sign_cos;
@@ -1054,7 +1054,7 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
{ {
var v = Unsafe.BitCast<WideLane<TNumber>, Vector<float>>(value); var v = Unsafe.BitCast<WideLane<TNumber>, Vector<float>>(value);
var result = Vector.Cos(v); var result = Vector.Cos(v);
return new WideLane<TNumber>(Unsafe.BitCast<Vector<float>, Vector<TNumber>>(result)); return Unsafe.BitCast<Vector<float>, WideLane<TNumber>>(result));
} }
else if (typeof(TNumber) == typeof(double)) else if (typeof(TNumber) == typeof(double))
{ {
@@ -1117,17 +1117,17 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
var c9 = Create(TNumber.CreateTruncating(0.08214589f)); // PI^9 / 362880 var c9 = Create(TNumber.CreateTruncating(0.08214589f)); // PI^9 / 362880
var z2_sin = z_sin * z_sin; var z2_sin = z_sin * z_sin;
var poly_sin = MultipleAdd(z2_sin, c9, c7); // c7 + c9*z^2 var poly_sin = MultiplyAdd(z2_sin, c9, c7); // c7 + c9*z^2
poly_sin = MultipleAdd(z2_sin, poly_sin, c5); // c5 + z^2*(...) poly_sin = MultiplyAdd(z2_sin, poly_sin, c5); // c5 + z^2*(...)
poly_sin = MultipleAdd(z2_sin, poly_sin, c3); // c3 + z^2*(...) poly_sin = MultiplyAdd(z2_sin, poly_sin, c3); // c3 + z^2*(...)
poly_sin = MultipleAdd(z2_sin, poly_sin, c1); // c1 + z^2*(...) poly_sin = MultiplyAdd(z2_sin, poly_sin, c1); // c1 + z^2*(...)
poly_sin = z_sin * poly_sin; // z * (...) poly_sin = z_sin * poly_sin; // z * (...)
var z2_cos = z_cos * z_cos; var z2_cos = z_cos * z_cos;
var poly_cos = MultipleAdd(z2_cos, c9, c7); var poly_cos = MultiplyAdd(z2_cos, c9, c7);
poly_cos = MultipleAdd(z2_cos, poly_cos, c5); poly_cos = MultiplyAdd(z2_cos, poly_cos, c5);
poly_cos = MultipleAdd(z2_cos, poly_cos, c3); poly_cos = MultiplyAdd(z2_cos, poly_cos, c3);
poly_cos = MultipleAdd(z2_cos, poly_cos, c1); poly_cos = MultiplyAdd(z2_cos, poly_cos, c1);
poly_cos = z_cos * poly_cos; poly_cos = z_cos * poly_cos;
sin = poly_sin * sign_sin; sin = poly_sin * sign_sin;
@@ -1137,8 +1137,8 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
{ {
var v = Unsafe.BitCast<WideLane<TNumber>, Vector<float>>(value); var v = Unsafe.BitCast<WideLane<TNumber>, Vector<float>>(value);
var (sinResult, cosResult) = Vector.SinCos(v); var (sinResult, cosResult) = Vector.SinCos(v);
sin = new WideLane<TNumber>(Unsafe.BitCast<Vector<float>, Vector<TNumber>>(sinResult)); sin = Unsafe.BitCast<Vector<float>, WideLane<TNumber>>(sinResult));
cos = new WideLane<TNumber>(Unsafe.BitCast<Vector<float>, Vector<TNumber>>(cosResult)); cos = Unsafe.BitCast<Vector<float>, WideLane<TNumber>>(cosResult));
} }
else if (typeof(TNumber) == typeof(double)) else if (typeof(TNumber) == typeof(double))
{ {
@@ -1175,9 +1175,9 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
var vc2 = Create(TNumber.CreateTruncating(0.1333923995)); // 2/15 var vc2 = Create(TNumber.CreateTruncating(0.1333923995)); // 2/15
// x2 * (c1 + c2 * x2) // x2 * (c1 + c2 * x2)
var poly = MultipleAdd(x2, vc2, vc1); var poly = MultiplyAdd(x2, vc2, vc1);
// value * (1 + x2 * poly) // value * (1 + x2 * poly)
return MultipleAdd(x, MultipleAdd(x2, poly, One), Zero); return MultiplyAdd(x, MultiplyAdd(x2, poly, One), Zero);
} }
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -1202,9 +1202,9 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
var c2 = Create(TNumber.CreateTruncating(0.0742610f)); var c2 = Create(TNumber.CreateTruncating(0.0742610f));
var c3 = Create(TNumber.CreateTruncating(-0.0187293f)); var c3 = Create(TNumber.CreateTruncating(-0.0187293f));
var term1 = MultipleAdd(x, c3, c2); var term1 = MultiplyAdd(x, c3, c2);
var term2 = MultipleAdd(x, term1, c1); var term2 = MultiplyAdd(x, term1, c1);
var poly = MultipleAdd(x, term2, c0); var poly = MultiplyAdd(x, term2, c0);
var sqrtTerm = Sqrt(One - x); var sqrtTerm = Sqrt(One - x);
var result = poly * sqrtTerm; var result = poly * sqrtTerm;
@@ -1224,7 +1224,7 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
var c2 = Create(TNumber.CreateTruncating(-0.19194795f)); var c2 = Create(TNumber.CreateTruncating(-0.19194795f));
var x2 = value * value; var x2 = value * value;
var poly = MultipleAdd(x2, c2, c1); var poly = MultiplyAdd(x2, c2, c1);
return value * poly; return value * poly;
} }
@@ -1251,7 +1251,7 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
var c2 = Create(TNumber.CreateTruncating(-0.19194795f)); var c2 = Create(TNumber.CreateTruncating(-0.19194795f));
// (c1 + c2 * t2) // (c1 + c2 * t2)
var poly = MultipleAdd(c2, t2, c1); var poly = MultiplyAdd(c2, t2, c1);
// result = t * poly // result = t * poly
var result = t * poly; var result = t * poly;
@@ -1290,7 +1290,7 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
{ {
var v = Unsafe.BitCast<WideLane<TNumber>, Vector<float>>(value); var v = Unsafe.BitCast<WideLane<TNumber>, Vector<float>>(value);
var result = Vector.Exp(v); var result = Vector.Exp(v);
return new WideLane<TNumber>(Unsafe.BitCast<Vector<float>, Vector<TNumber>>(result)); return Unsafe.BitCast<Vector<float>, WideLane<TNumber>>(result);
} }
else if (typeof(TNumber) == typeof(double)) else if (typeof(TNumber) == typeof(double))
{ {
@@ -1315,13 +1315,13 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
{ {
var v = Unsafe.BitCast<WideLane<TNumber>, Vector<float>>(value); var v = Unsafe.BitCast<WideLane<TNumber>, Vector<float>>(value);
var result = Vector.Log(v); var result = Vector.Log(v);
return new WideLane<TNumber>(Unsafe.BitCast<Vector<float>, Vector<TNumber>>(result)); return Unsafe.BitCast<Vector<float>, WideLane<TNumber>>(result);
} }
else if (typeof(TNumber) == typeof(double)) else if (typeof(TNumber) == typeof(double))
{ {
var v = Unsafe.BitCast<WideLane<TNumber>, Vector<double>>(value); var v = Unsafe.BitCast<WideLane<TNumber>, Vector<double>>(value);
var result = Vector.Log(v); var result = Vector.Log(v);
return new WideLane<TNumber>(Unsafe.BitCast<Vector<double>, Vector<TNumber>>(result)); return Unsafe.BitCast<Vector<double>, WideLane<TNumber>>(result);
} }
return value; return value;
@@ -1334,13 +1334,13 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
{ {
var v = Unsafe.BitCast<WideLane<TNumber>, Vector<float>>(value); var v = Unsafe.BitCast<WideLane<TNumber>, Vector<float>>(value);
var result = Vector.Log2(v); var result = Vector.Log2(v);
return new WideLane<TNumber>(Unsafe.BitCast<Vector<float>, Vector<TNumber>>(result)); return Unsafe.BitCast<Vector<float>, WideLane<TNumber>>(result);
} }
else if (typeof(TNumber) == typeof(double)) else if (typeof(TNumber) == typeof(double))
{ {
var v = Unsafe.BitCast<WideLane<TNumber>, Vector<double>>(value); var v = Unsafe.BitCast<WideLane<TNumber>, Vector<double>>(value);
var result = Vector.Log2(v); var result = Vector.Log2(v);
return new WideLane<TNumber>(Unsafe.BitCast<Vector<double>, Vector<TNumber>>(result)); return Unsafe.BitCast<Vector<double>, WideLane<TNumber>>(result);
} }
return value; return value;
@@ -1353,13 +1353,13 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
{ {
var v = Unsafe.BitCast<WideLane<TNumber>, Vector<float>>(value); var v = Unsafe.BitCast<WideLane<TNumber>, Vector<float>>(value);
var result = Vector.Ceiling(v); var result = Vector.Ceiling(v);
return new WideLane<TNumber>(Unsafe.BitCast<Vector<float>, Vector<TNumber>>(result)); return Unsafe.BitCast<Vector<float>, WideLane<TNumber>>(result);
} }
else if (typeof(TNumber) == typeof(double)) else if (typeof(TNumber) == typeof(double))
{ {
var v = Unsafe.BitCast<WideLane<TNumber>, Vector<double>>(value); var v = Unsafe.BitCast<WideLane<TNumber>, Vector<double>>(value);
var result = Vector.Ceiling(v); var result = Vector.Ceiling(v);
return new WideLane<TNumber>(Unsafe.BitCast<Vector<double>, Vector<TNumber>>(result)); return Unsafe.BitCast<Vector<double>, WideLane<TNumber>>(result);
} }
return value; return value;
@@ -1372,13 +1372,13 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
{ {
var v = Unsafe.BitCast<WideLane<TNumber>, Vector<float>>(value); var v = Unsafe.BitCast<WideLane<TNumber>, Vector<float>>(value);
var result = Vector.Round(v); var result = Vector.Round(v);
return new WideLane<TNumber>(Unsafe.BitCast<Vector<float>, Vector<TNumber>>(result)); return Unsafe.BitCast<Vector<float>, WideLane<TNumber>>(result);
} }
else if (typeof(TNumber) == typeof(double)) else if (typeof(TNumber) == typeof(double))
{ {
var v = Unsafe.BitCast<WideLane<TNumber>, Vector<double>>(value); var v = Unsafe.BitCast<WideLane<TNumber>, Vector<double>>(value);
var result = Vector.Round(v); var result = Vector.Round(v);
return new WideLane<TNumber>(Unsafe.BitCast<Vector<double>, Vector<TNumber>>(result)); return Unsafe.BitCast<Vector<double>, WideLane<TNumber>>(result);
} }
return value; return value;
@@ -1391,13 +1391,13 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
{ {
var v = Unsafe.BitCast<WideLane<TNumber>, Vector<float>>(value); var v = Unsafe.BitCast<WideLane<TNumber>, Vector<float>>(value);
var result = Vector.Truncate(v); var result = Vector.Truncate(v);
return new WideLane<TNumber>(Unsafe.BitCast<Vector<float>, Vector<TNumber>>(result)); return Unsafe.BitCast<Vector<float>, WideLane<TNumber>>(result);
} }
else if (typeof(TNumber) == typeof(double)) else if (typeof(TNumber) == typeof(double))
{ {
var v = Unsafe.BitCast<WideLane<TNumber>, Vector<double>>(value); var v = Unsafe.BitCast<WideLane<TNumber>, Vector<double>>(value);
var result = Vector.Truncate(v); var result = Vector.Truncate(v);
return new WideLane<TNumber>(Unsafe.BitCast<Vector<double>, Vector<TNumber>>(result)); return Unsafe.BitCast<Vector<double>, WideLane<TNumber>>(result);
} }
return value; return value;
@@ -1544,6 +1544,12 @@ public readonly unsafe partial struct WideLane<TNumber> : ISPMDLane<WideLane<TNu
ifFalse.value)); ifFalse.value));
} }
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<TNumber> Select(byte conditionMask, WideLane<TNumber> ifTrue, WideLane<TNumber> ifFalse)
{
throw new NotImplementedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<TNumber> GreaterThan(WideLane<TNumber> a, WideLane<TNumber> b) public static WideLane<TNumber> GreaterThan(WideLane<TNumber> a, WideLane<TNumber> b)
{ {

View File

@@ -405,20 +405,54 @@ public static unsafe partial class MemoryUtility
} }
var i = 0; var i = 0;
if (Vector.IsHardwareAccelerated && a.Length >= Vector<byte>.Count) if (Vector512.IsHardwareAccelerated && a.Length >= Vector512<byte>.Count)
{ {
ref var ptrA = ref MemoryMarshal.GetReference(a); ref var ptrA = ref MemoryMarshal.GetReference(a);
ref var ptrB = ref MemoryMarshal.GetReference(b); ref var ptrB = ref MemoryMarshal.GetReference(b);
var limit = a.Length - Vector<byte>.Count; var limit = a.Length - Vector512<byte>.Count;
for (; i <= limit; i += Vector<byte>.Count) for (; i <= limit; i += Vector512<byte>.Count)
{ {
var vecA = Vector.LoadUnsafe(ref ptrA, (nuint)i); var vecA = Vector512.LoadUnsafe(ref ptrA, (nuint)i);
var vecB = Vector.LoadUnsafe(ref ptrB, (nuint)i); var vecB = Vector512.LoadUnsafe(ref ptrB, (nuint)i);
var mask = Vector.Equals(vecA, Vector<byte>.Zero); var mask = Vector512.Equals(vecA, Vector512<byte>.Zero);
var result = Vector.ConditionalSelect(mask, vecB, vecA); var result = Vector512.ConditionalSelect(mask, vecB, vecA);
result.StoreUnsafe(ref ptrA, (nuint)i);
}
}
else if (Vector256.IsHardwareAccelerated && a.Length >= Vector256<byte>.Count)
{
ref var ptrA = ref MemoryMarshal.GetReference(a);
ref var ptrB = ref MemoryMarshal.GetReference(b);
var limit = a.Length - Vector256<byte>.Count;
for (; i <= limit; i += Vector256<byte>.Count)
{
var vecA = Vector256.LoadUnsafe(ref ptrA, (nuint)i);
var vecB = Vector256.LoadUnsafe(ref ptrB, (nuint)i);
var mask = Vector256.Equals(vecA, Vector256<byte>.Zero);
var result = Vector256.ConditionalSelect(mask, vecB, vecA);
result.StoreUnsafe(ref ptrA, (nuint)i);
}
}
else if (Vector128.IsHardwareAccelerated && a.Length >= Vector128<byte>.Count)
{
ref var ptrA = ref MemoryMarshal.GetReference(a);
ref var ptrB = ref MemoryMarshal.GetReference(b);
var limit = a.Length - Vector128<byte>.Count;
for (; i <= limit; i += Vector128<byte>.Count)
{
var vecA = Vector128.LoadUnsafe(ref ptrA, (nuint)i);
var vecB = Vector128.LoadUnsafe(ref ptrB, (nuint)i);
var mask = Vector128.Equals(vecA, Vector128<byte>.Zero);
var result = Vector128.ConditionalSelect(mask, vecB, vecA);
result.StoreUnsafe(ref ptrA, (nuint)i); result.StoreUnsafe(ref ptrA, (nuint)i);
} }
} }
@@ -440,18 +474,48 @@ public static unsafe partial class MemoryUtility
nuint i = 0u; nuint i = 0u;
if (Vector.IsHardwareAccelerated && length >= (nuint)Vector<byte>.Count) if (Vector512.IsHardwareAccelerated && length >= (nuint)Vector512<byte>.Count)
{ {
var vectorSize = (nuint)Vector<byte>.Count; var vectorSize = (nuint)Vector512<byte>.Count;
var limit = length - vectorSize; var limit = length - vectorSize;
for (; i <= limit; i += vectorSize) for (; i <= limit; i += vectorSize)
{ {
var vecA = Vector.Load(ptrA + i); var vecA = Vector512.Load(ptrA + i);
var vecB = Vector.Load(ptrB + i); var vecB = Vector512.Load(ptrB + i);
var mask = Vector.Equals(vecA, Vector<byte>.Zero); var mask = Vector512.Equals(vecA, Vector512<byte>.Zero);
var result = Vector.ConditionalSelect(mask, vecB, vecA); var result = Vector512.ConditionalSelect(mask, vecB, vecA);
result.Store(ptrA + i);
}
}
else if (Vector256.IsHardwareAccelerated && length >= (nuint)Vector256<byte>.Count)
{
var vectorSize = (nuint)Vector256<byte>.Count;
var limit = length - vectorSize;
for (; i <= limit; i += vectorSize)
{
var vecA = Vector256.Load(ptrA + i);
var vecB = Vector256.Load(ptrB + i);
var mask = Vector256.Equals(vecA, Vector256<byte>.Zero);
var result = Vector256.ConditionalSelect(mask, vecB, vecA);
result.Store(ptrA + i);
}
}
else if (Vector128.IsHardwareAccelerated && length >= (nuint)Vector128<byte>.Count)
{
var vectorSize = (nuint)Vector128<byte>.Count;
var limit = length - vectorSize;
for (; i <= limit; i += vectorSize)
{
var vecA = Vector128.Load(ptrA + i);
var vecB = Vector128.Load(ptrB + i);
var mask = Vector128.Equals(vecA, Vector128<byte>.Zero);
var result = Vector128.ConditionalSelect(mask, vecB, vecA);
result.Store(ptrA + i); result.Store(ptrA + i);
} }
} }

View File

@@ -3,17 +3,17 @@
<PropertyGroup> <PropertyGroup>
<TargetFramework>netstandard2.0</TargetFramework> <TargetFramework>netstandard2.0</TargetFramework>
<Nullable>enable</Nullable> <Nullable>enable</Nullable>
<EnforceExtendedAnalyzerRules>true</EnforceExtendedAnalyzerRules> <EnforceExtendedAnalyzerRules>True</EnforceExtendedAnalyzerRules>
<AllowUnsafeBlocks>True</AllowUnsafeBlocks> <AllowUnsafeBlocks>True</AllowUnsafeBlocks>
<LangVersion>9.0</LangVersion> <LangVersion>9.0</LangVersion>
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>
<PackageReference Include="Microsoft.CodeAnalysis.Analyzers" Version="4.14.0"> <PackageReference Include="Microsoft.CodeAnalysis.Analyzers" Version="5.3.0">
<PrivateAssets>all</PrivateAssets> <PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets> <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference> </PackageReference>
<PackageReference Include="Microsoft.CodeAnalysis.CSharp" Version="4.14.0" /> <PackageReference Include="Microsoft.CodeAnalysis.CSharp" Version="5.3.0" />
</ItemGroup> </ItemGroup>
</Project> </Project>

View File

@@ -1,9 +1,8 @@
using BenchmarkDotNet.Attributes; using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Engines; using BenchmarkDotNet.Engines;
using Misaki.HighPerformance.HPC;
using Misaki.HighPerformance.Image; using Misaki.HighPerformance.Image;
using Misaki.HighPerformance.Jobs; using Misaki.HighPerformance.Jobs;
using Misaki.HighPerformance.Mathematics;
using Misaki.HighPerformance.Mathematics.SPMD;
using SkiaSharp; using SkiaSharp;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
@@ -23,6 +22,7 @@ internal unsafe struct MipLevel
public float roughness; public float roughness;
} }
[HPCompute(TargetInstructionSet.AVX2)]
internal unsafe struct GGXMipGenerationJobSPMD : IJobSPMD<float, int> internal unsafe struct GGXMipGenerationJobSPMD : IJobSPMD<float, int>
{ {
public ImageResultFloat image; public ImageResultFloat image;
@@ -47,7 +47,7 @@ internal unsafe struct GGXMipGenerationJobSPMD : IJobSPMD<float, int>
var phi = 2.0f * PI * Xi.x; var phi = 2.0f * PI * Xi.x;
var cosTheta = TFloat.Sqrt((1.0f - Xi.y) / TFloat.MultipleAdd(a * a - 1.0f, Xi.y, 1.0f)); var cosTheta = TFloat.Sqrt((1.0f - Xi.y) / TFloat.MultiplyAdd(a * a - 1.0f, Xi.y, 1.0f));
var sinTheta = TFloat.Sqrt(1.0f - cosTheta * cosTheta); var sinTheta = TFloat.Sqrt(1.0f - cosTheta * cosTheta);
// Spherical to Cartesian coordinates (Halfway vector) // Spherical to Cartesian coordinates (Halfway vector)
@@ -198,7 +198,7 @@ internal unsafe struct GGXMipGenerationJobSPMD<TFloat, TInt> : IJobParallelFor
var phi = 2.0f * PI * Xi.x; var phi = 2.0f * PI * Xi.x;
var cosTheta = TFloat.Sqrt((1.0f - Xi.y) / TFloat.MultipleAdd(a * a - 1.0f, Xi.y, 1.0f)); var cosTheta = TFloat.Sqrt((1.0f - Xi.y) / TFloat.MultiplyAdd(a * a - 1.0f, Xi.y, 1.0f));
var sinTheta = TFloat.Sqrt(1.0f - cosTheta * cosTheta); var sinTheta = TFloat.Sqrt(1.0f - cosTheta * cosTheta);
// Spherical to Cartesian coordinates (Halfway vector) // Spherical to Cartesian coordinates (Halfway vector)

View File

@@ -1,6 +1,6 @@
using BenchmarkDotNet.Attributes; using BenchmarkDotNet.Attributes;
using Misaki.HighPerformance.Jobs; using Misaki.HighPerformance.Jobs;
using Misaki.HighPerformance.Mathematics.SPMD; using Misaki.HighPerformance.HPC;
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
namespace Misaki.HighPerformance.Test.Benchmark; namespace Misaki.HighPerformance.Test.Benchmark;

View File

@@ -1,6 +1,6 @@
using Misaki.HighPerformance.Jobs; using Misaki.HighPerformance.Jobs;
using Misaki.HighPerformance.Mathematics; using Misaki.HighPerformance.Mathematics;
using Misaki.HighPerformance.Mathematics.SPMD; using Misaki.HighPerformance.HPC;
using System.Numerics; using System.Numerics;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics; using System.Runtime.Intrinsics;

View File

@@ -31,7 +31,8 @@
<ProjectReference Include="..\Misaki.HighPerformance.Image\Misaki.HighPerformance.Image.csproj" /> <ProjectReference Include="..\Misaki.HighPerformance.Image\Misaki.HighPerformance.Image.csproj" />
<ProjectReference Include="..\Misaki.HighPerformance.Jobs\Misaki.HighPerformance.Jobs.csproj" /> <ProjectReference Include="..\Misaki.HighPerformance.Jobs\Misaki.HighPerformance.Jobs.csproj" />
<ProjectReference Include="..\Misaki.HighPerformance.LowLevel\Misaki.HighPerformance.LowLevel.csproj" /> <ProjectReference Include="..\Misaki.HighPerformance.LowLevel\Misaki.HighPerformance.LowLevel.csproj" />
<ProjectReference Include="..\Misaki.HighPerformance.Mathematics.SPMD\Misaki.HighPerformance.Mathematics.SPMD.csproj" /> <ProjectReference Include="..\Misaki.HighPerformance.HPC.Generator\Misaki.HighPerformance.HPC.Generator.csproj" OutputItemType="Analyzer" ReferenceOutputAssembly="false" />
<ProjectReference Include="..\Misaki.HighPerformance.HPC\Misaki.HighPerformance.HPC.csproj" />
<ProjectReference Include="..\Misaki.HighPerformance.Mathematics\Misaki.HighPerformance.Mathematics.csproj" /> <ProjectReference Include="..\Misaki.HighPerformance.Mathematics\Misaki.HighPerformance.Mathematics.csproj" />
<ProjectReference Include="..\Misaki.HighPerformance\Misaki.HighPerformance.csproj" /> <ProjectReference Include="..\Misaki.HighPerformance\Misaki.HighPerformance.csproj" />
<ProjectReference Include="..\Misaki.HighPerformance.Analyzer\Misaki.HighPerformance.Analyzer\Misaki.HighPerformance.Analyzer.csproj" OutputItemType="Analyzer" ReferenceOutputAssembly="false" /> <ProjectReference Include="..\Misaki.HighPerformance.Analyzer\Misaki.HighPerformance.Analyzer\Misaki.HighPerformance.Analyzer.csproj" OutputItemType="Analyzer" ReferenceOutputAssembly="false" />

View File

@@ -1,3 +1,4 @@
using Misaki.HighPerformance.HPC;
using Misaki.HighPerformance.LowLevel.Buffer; using Misaki.HighPerformance.LowLevel.Buffer;
using Misaki.HighPerformance.LowLevel.Collections; using Misaki.HighPerformance.LowLevel.Collections;
using Misaki.HighPerformance.Test.Benchmark; using Misaki.HighPerformance.Test.Benchmark;
@@ -7,50 +8,50 @@ using System.Buffers;
//BenchmarkRunner.Run<GGXMipGenerationBenchmark>(); //BenchmarkRunner.Run<GGXMipGenerationBenchmark>();
//const int count = 16; const int count = 16;
//var bench = new GGXMipGenerationBenchmark(); var bench = new GGXMipGenerationBenchmark();
//bench.Setup(); bench.Setup();
//for (var i = 0; i < count; i++) for (var i = 0; i < count; i++)
//{ {
// bench.JobGGX(); bench.JobGGX();
//} }
//var sw = System.Diagnostics.Stopwatch.StartNew(); var sw = System.Diagnostics.Stopwatch.StartNew();
//for (var i = 0; i < count; i++) for (var i = 0; i < count; i++)
//{ {
// bench.JobGGX(); bench.JobGGX();
//} }
//sw.Stop(); sw.Stop();
//var avgTime = sw.Elapsed.TotalMilliseconds / count; var avgTime = sw.Elapsed.TotalMilliseconds / count;
//Console.WriteLine($"GGX Mip Generation (Inline): {avgTime} ms"); Console.WriteLine($"GGX Mip Generation (Inline): {avgTime} ms");
//bench.Cleanup(); bench.Cleanup();
//GlobalSetup.GlobalInitialize(null!); //GlobalSetup.GlobalInitialize(null!);
//TestJobSystem.Initialize(null!); //TestJobSystem.Initialize(null!);
AllocationManager.Initialize(); //AllocationManager.Initialize();
Console.WriteLine(0); //Console.WriteLine(0);
for (var i = 0; i < 64; i++) //for (var i = 0; i < 64; i++)
{ //{
var size = Random.Shared.Next(2048, 8192 * 2); // var size = Random.Shared.Next(2048, 8192 * 2);
var arr = new UnsafeArray<Guid>(size, AllocationHandle.TLSF); // AllocationHandle.FreeList // var arr = new UnsafeArray<Guid>(size, AllocationHandle.TLSF); // AllocationHandle.FreeList
arr.Dispose(); // arr.Dispose();
} //}
Thread.Sleep(1000); //Thread.Sleep(1000);
Console.WriteLine(1); //Console.WriteLine(1);
for (var i = 0; i < 64; i++) //for (var i = 0; i < 64; i++)
{ //{
var size = Random.Shared.Next(2048, 8192 * 2); // var size = Random.Shared.Next(2048, 8192 * 2);
var arr = new UnsafeArray<Guid>(size, AllocationHandle.TLSF); // AllocationHandle.FreeList // var arr = new UnsafeArray<Guid>(size, AllocationHandle.TLSF); // AllocationHandle.FreeList
arr.Dispose(); // arr.Dispose();
} //}
AllocationManager.Dispose(); //AllocationManager.Dispose();
Console.Read(); //Console.Read();

View File

@@ -1,5 +1,6 @@
using Misaki.HighPerformance.Mathematics.SPMD; using Misaki.HighPerformance.HPC;
using System.Numerics; using System.Numerics;
using System.Runtime.Intrinsics.X86;
namespace Misaki.HighPerformance.Test.UnitTest.Jobs; namespace Misaki.HighPerformance.Test.UnitTest.Jobs;

View File

@@ -1,6 +1,6 @@
using Misaki.HighPerformance.Jobs; using Misaki.HighPerformance.Jobs;
using Misaki.HighPerformance.Mathematics; using Misaki.HighPerformance.Mathematics;
using Misaki.HighPerformance.Mathematics.SPMD; using Misaki.HighPerformance.HPC;
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
namespace Misaki.HighPerformance.Test.UnitTest.Jobs; namespace Misaki.HighPerformance.Test.UnitTest.Jobs;
@@ -116,8 +116,26 @@ internal struct DistanceJob : IJobSPMD<float>
} }
[TestClass] [TestClass]
public class SPMDTest public partial class SPMDTest
{ {
[HPCompute(TargetInstructionSet.AVX2)]
private static WideLane<float> Test(WideLane<float> a, WideLane<float> b, WideLane<float> c)
{
return WideLane<float>.MultiplyAdd(a, b, c);
}
[HPCompute(TargetInstructionSet.AVX2)]
private static (TFloat, TFloat) Test_SPMD<TFloat>(TFloat a, TFloat b, TFloat c)
where TFloat : unmanaged, ISPMDLane<TFloat, float>
{
var u = TFloat.Atan2(a, b);
var v = TFloat.Asin(c);
u = u / (2.0f * 3.14159265358979323846f) + 0.5f;
v = v / 3.14159265358979323846f + 0.5f;
return (u, v);
}
[TestMethod] [TestMethod]
public unsafe void TestSPMDVectorDot() public unsafe void TestSPMDVectorDot()
{ {

View File

@@ -2,7 +2,7 @@ using Misaki.HighPerformance.Jobs;
using Misaki.HighPerformance.LowLevel.Buffer; using Misaki.HighPerformance.LowLevel.Buffer;
using Misaki.HighPerformance.LowLevel.Collections; using Misaki.HighPerformance.LowLevel.Collections;
using Misaki.HighPerformance.LowLevel.Utilities; using Misaki.HighPerformance.LowLevel.Utilities;
using Misaki.HighPerformance.Mathematics.SPMD; using Misaki.HighPerformance.HPC;
using Misaki.HighPerformance.Test.Jobs; using Misaki.HighPerformance.Test.Jobs;
namespace Misaki.HighPerformance.Test.UnitTest.Jobs; namespace Misaki.HighPerformance.Test.UnitTest.Jobs;

View File

@@ -8,7 +8,8 @@
<Project Path="Misaki.HighPerformance.Jobs/Misaki.HighPerformance.Jobs.csproj" /> <Project Path="Misaki.HighPerformance.Jobs/Misaki.HighPerformance.Jobs.csproj" />
<Project Path="Misaki.HighPerformance.LowLevel/Misaki.HighPerformance.LowLevel.csproj" /> <Project Path="Misaki.HighPerformance.LowLevel/Misaki.HighPerformance.LowLevel.csproj" />
<Project Path="Misaki.HighPerformance.Mathematics.CodeGen/Misaki.HighPerformance.Mathematics.CodeGen.csproj" /> <Project Path="Misaki.HighPerformance.Mathematics.CodeGen/Misaki.HighPerformance.Mathematics.CodeGen.csproj" />
<Project Path="Misaki.HighPerformance.Mathematics.SPMD/Misaki.HighPerformance.Mathematics.SPMD.csproj" /> <Project Path="Misaki.HighPerformance.HPC/Misaki.HighPerformance.HPC.csproj" />
<Project Path="Misaki.HighPerformance.HPC.Generator/Misaki.HighPerformance.HPC.Generator.csproj" Id="2b8a9c0d-ce6d-4064-8bcb-517001f631d3" />
<Project Path="Misaki.HighPerformance.Mathematics/Misaki.HighPerformance.Mathematics.csproj" /> <Project Path="Misaki.HighPerformance.Mathematics/Misaki.HighPerformance.Mathematics.csproj" />
<Project Path="Misaki.HighPerformance.Test/Misaki.HighPerformance.Test.csproj" /> <Project Path="Misaki.HighPerformance.Test/Misaki.HighPerformance.Test.csproj" />
<Project Path="Misaki.HighPerformance/Misaki.HighPerformance.csproj" /> <Project Path="Misaki.HighPerformance/Misaki.HighPerformance.csproj" />