Refactor SPMD to HPC; add SIMD source generators

Major namespace migration from SPMD to HPC across all code, templates, and projects. Introduced Misaki.HighPerformance.HPC.Generator with Roslyn-based source generators for SIMD code (e.g., AVX2), including attribute and method generators. Renamed MultipleAdd to MultiplyAdd in all lanes and updated usages. Added AVX2 utility methods via codegen. Updated tests, benchmarks, and project references to use the new framework. Improved SIMD memory utilities and modernized project files. Removed legacy SPMD project from the solution.
This commit is contained in:
2026-05-06 13:43:58 +09:00
parent d3e497c7d8
commit c8f78f9d02
36 changed files with 895 additions and 130 deletions

View File

@@ -0,0 +1,164 @@
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.CSharp.Syntax;
using System;
namespace Misaki.HighPerformance.HPC.Generator
{
[Generator]
internal class AVX2UtilityGenerator : IIncrementalGenerator
{
public void Initialize(IncrementalGeneratorInitializationContext context)
{
context.RegisterPostInitializationOutput(static ctx =>
{
var source = @"
using System;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace Misaki.HighPerformance.HPC
{
public static class AVX2Utility
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<float> Asin(Vector256<float> value)
{
// asin(value) = pi/2 - acos(value)
var piOver2 = Vector256.Create(MathF.PI / 2.0f);
return Avx2.Subtract(piOver2, Acos(value));
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<float> Acos(Vector256<float> value)
{
// 0 <= value <= 1 : acos(value) = sqrt(1 - value) * (c0 + c1*value + c2*value^2 + c3*value^3)
// value < 0 : acos(value) = pi - acos(-value)
var x = Vector256.Abs(value);
var c0 = Vector256.Create(1.5707288f); // pi/2
var c1 = Vector256.Create(-0.2121144f);
var c2 = Vector256.Create(0.0742610f);
var c3 = Vector256.Create(-0.0187293f);
var term1 = Fma.MultiplyAdd(x, c3, c2);
var term2 = Fma.MultiplyAdd(x, term1, c1);
var poly = Fma.MultiplyAdd(x, term2, c0);
var sqrtTerm = Avx2.Sqrt(Avx2.Subtract(Vector256<float>.One, x));
var result = Avx2.Multiply(poly, sqrtTerm);
var pi = Vector256.Create(MathF.PI);
var isNegative = Avx2.CompareLessThan(value, Vector256<float>.Zero);
return Avx2.BlendVariable(pi, Avx2.Subtract(pi, result), isNegative);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<float> Atan2(Vector256<float> y, Vector256<float> x)
{
var absX = Vector256.Abs(x);
var absY = Vector256.Abs(y);
// 1. Determine the ratio (input to Atan)
// If |value| > |y|, we are in the ""shallow"" region, ratio = y/value
// If |y| > |value|, we are in the ""steep"" region, ratio = value/y (and we transform result)
var yGtX = Avx2.CompareGreaterThan(absY, absX);
// Select numerator and denominator to ensure ratio is always in [-1, 1]
var num = Avx2.BlendVariable(absX, absY, yGtX);
var den = Avx2.BlendVariable(absY, absX, yGtX);
var t = Avx2.Multiply(num, Avx2.Reciprocal(den)); // t is now in [0, 1]
var t2 = Avx2.Multiply(t, t);
// 2. Polynomial Approximation (Odd function: value * (c1 + c2*value^2))
var c1 = Vector256.Create(0.97239411f);
var c2 = Vector256.Create(-0.19194795f);
// (c1 + c2 * t2)
var poly = Fma.MultiplyAdd(c2, t2, c1);
// result = Avx2.Multiply(t, poly)
var result = Avx2.Multiply(t, poly);
// 3. Reconstruct the angle
// If we swapped value/y (yGtX), the identity is: atan(value/y) = PI/2 - atan(y/value)
var halfPi = Vector256.Create(1.570796327f);
result = Avx2.BlendVariable(halfPi - result, result, yGtX);
// 4. Adjust for Quadrants (Signs)
// If value < 0, we are in quadrants 2 or 3, so we need to add PI
var pi = Vector256.Create(3.141592654f);
var xLtZero = Avx2.CompareLessThan(x, Vector256<float>.Zero);
result = Avx2.BlendVariable(pi - result, result, xLtZero);
// If y < 0, the result should be negative (standard atan2 convention)
// NOTE: This sign flip strategy depends on exact polynomial range mapping,
// but typically just copy the sign of Y to the result.
var yLtZero = Avx2.CompareLessThan(y, Vector256<float>.Zero);
// If original Y was negative, negate the result
// (This works because our ratio logic effectively computed atan(|y|/|value|) above)
var negativeResult = Avx2.Subtract(Vector256<float>.Zero, result);
return Avx2.BlendVariable(negativeResult, result, yLtZero);
}
}
}";
ctx.AddSource("AVX2Utility.g.cs", source);
});
}
}
internal class AVX2Rewriter : HPCRewriter
{
public override string Name => "AVX2";
public override string GetNesessaryUsing()
{
return "using System.Runtime.Intrinsics;\nusing System.Runtime.Intrinsics.X86;";
}
protected override MathExpression RewriteMathExpression(SIMDInstruction instruction, bool isFloatingPoint)
{
switch (instruction)
{
case SIMDInstruction.Add:
break;
case SIMDInstruction.Subtract:
break;
case SIMDInstruction.Multiply:
break;
case SIMDInstruction.MultiplyAdd:
return new MathExpression
{
Expression = "Fma",
Name = "MultiplyAdd"
};
case SIMDInstruction.Asin:
return new MathExpression
{
Expression = "AVX2Utility",
Name = "Asin"
};
case SIMDInstruction.Atan2:
return new MathExpression
{
Expression = "AVX2Utility",
Name = "Atan2"
};
default:
break;
}
return default;
}
protected override void RewriteMathArguments(SIMDInstruction instruction, Span<ArgumentSyntax> originalArgs)
{
return;
}
}
}