using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; namespace Misaki.HighPerformance.Mathematics.SPMD; public static unsafe class WideLane { internal static readonly uint* s_shuffleTable512_32bit; internal static readonly ulong* s_shuffleTable512_64bit; internal static readonly uint* s_shuffleTable256_32bit; internal static readonly ulong* s_shuffleTable256_64bit; internal static readonly uint* s_shuffleTable128_32bit; internal static readonly ulong* s_shuffleTable128_64bit; /// /// Gets whether WideLane is supported on the current hardware. /// public static bool IsSupported => Vector.IsHardwareAccelerated; static WideLane() { s_shuffleTable512_32bit = ShuffleTableGenerator.ComputeShuffleTable512_32Bit(); s_shuffleTable512_64bit = ShuffleTableGenerator.ComputeShuffleTable512_64Bit(); s_shuffleTable256_32bit = ShuffleTableGenerator.ComputeShuffleTable256_32Bit(); s_shuffleTable256_64bit = ShuffleTableGenerator.ComputeShuffleTable256_64Bit(); s_shuffleTable128_32bit = ShuffleTableGenerator.ComputeShuffleTable128_32Bit(); s_shuffleTable128_64bit = ShuffleTableGenerator.ComputeShuffleTable128_64Bit(); } } [StructLayout(LayoutKind.Sequential)] public readonly unsafe partial struct WideLane : ISPMDLane, TNumber> where TNumber : unmanaged, INumber, IBinaryNumber, IMinMaxValue, IBitwiseOperators { private static readonly Vector s_indices; public readonly Vector value; public static int LaneWidth { [MethodImpl(MethodImplOptions.AggressiveInlining)] get => Vector.Count; } public static WideLane Zero { [MethodImpl(MethodImplOptions.AggressiveInlining)] get => new WideLane(Vector.Zero); } public static WideLane One { [MethodImpl(MethodImplOptions.AggressiveInlining)] get => new WideLane(Vector.One); } public static WideLane MinValue { [MethodImpl(MethodImplOptions.AggressiveInlining)] get => Create(TNumber.MinValue); } public static WideLane MaxValue { [MethodImpl(MethodImplOptions.AggressiveInlining)] get => Create(TNumber.MaxValue); } public readonly TNumber this[int index] { [MethodImpl(MethodImplOptions.AggressiveInlining)] get => value[index]; } static WideLane() { var pValues = stackalloc TNumber[LaneWidth]; for (var i = 0; i < LaneWidth; i++) { pValues[i] = TNumber.CreateTruncating(i); } s_indices = Vector.Load(pValues); } public WideLane(Vector value) { this.value = value; } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector VectorFloor(Vector vector) { if (typeof(TNumber) == typeof(float)) { ref var v = ref Unsafe.As, Vector>(ref vector); var floored = Vector.Floor(v); return Unsafe.As, Vector>(ref floored); } else if (typeof(TNumber) == typeof(double)) { ref var v = ref Unsafe.As, Vector>(ref vector); var floored = Vector.Floor(v); return Unsafe.As, Vector>(ref floored); } return vector; } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector VectorTruncate(Vector vector) { if (typeof(TNumber) == typeof(float)) { ref var v = ref Unsafe.As, Vector>(ref vector); var truncated = Vector.Truncate(v); return Unsafe.As, Vector>(ref truncated); } else if (typeof(TNumber) == typeof(double)) { ref var v = ref Unsafe.As, Vector>(ref vector); var truncated = Vector.Truncate(v); return Unsafe.As, Vector>(ref truncated); } return vector; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Create(TNumber value) { return new WideLane(Vector.Create(value)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Create(params ReadOnlySpan values) { return new WideLane(Vector.Create(values)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Create(Vector value) { return new WideLane(value); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Sequence(TNumber start, TNumber step) { return new WideLane(Vector.Create(start) + (Vector.Create(step) * s_indices)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Load(ref TNumber value) { return new WideLane(Vector.LoadUnsafe(ref value)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Load(TNumber* pValue) { return new WideLane(Vector.Load(pValue)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane MaskLoad(WideLane mask, ref TNumber value) { return MaskLoad(mask, (TNumber*)Unsafe.AsPointer(ref value)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane MaskLoad(WideLane mask, TNumber* pValue) { var vector = Vector.Load(pValue); return new WideLane(Vector.ConditionalSelect(mask.value, vector, Vector.Zero)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Gather(TNumber* pData, WideLane indices, int scale) { var buffer = stackalloc TNumber[LaneWidth]; for (var i = 0; i < LaneWidth; i++) { buffer[i] = pData[int.CreateTruncating(indices[i]) * scale / sizeof(TNumber)]; } return new WideLane(Vector.Load(buffer)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Gather(TNumber* pData, int* pIndices, int scale) { var buffer = stackalloc TNumber[LaneWidth]; for (var i = 0; i < LaneWidth; i++) { buffer[i] = pData[pIndices[i] * scale / sizeof(TNumber)]; } return new WideLane(Vector.Load(buffer)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Gather(ref TNumber baseAddress, WideLane indices, int scale) { var buffer = stackalloc TNumber[LaneWidth]; for (var i = 0; i < LaneWidth; i++) { buffer[i] = Unsafe.Add(ref baseAddress, int.CreateTruncating(indices[i]) * scale / sizeof(TNumber)); } return new WideLane(Vector.Load(buffer)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Gather(ref TNumber baseAddress, ref int baseIndex, int scale) { var buffer = stackalloc TNumber[LaneWidth]; for (var i = 0; i < LaneWidth; i++) { buffer[i] = Unsafe.Add(ref baseAddress, Unsafe.Add(ref baseIndex, i) * scale / sizeof(TNumber)); } return new WideLane(Vector.Load(buffer)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly void Store(ref TNumber destination) { value.StoreUnsafe(ref destination); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly void Store(TNumber* pDestination) { value.Store(pDestination); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public int CompressStore(WideLane mask, ref TNumber destination) { return CompressStore(mask, (TNumber*)Unsafe.AsPointer(in destination)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public int CompressStore(WideLane mask, TNumber* pDestination) { var size = sizeof(TNumber); if (LaneWidth == Vector512.Count && Vector512.IsHardwareAccelerated) { if (size == 4) { ref var vec = ref Unsafe.As, Vector512>(ref Unsafe.AsRef(in this)); var m = Unsafe.As, Vector512>(ref mask); var moveMask = m.ExtractMostSignificantBits(); // Offset is (moveMask * 16) because each control vector has 16 elements var shuffle = Vector512.Load(WideLane.s_shuffleTable512_32bit + (moveMask * 16)); var compressed = Vector512.Shuffle(vec, shuffle); compressed.Store((uint*)pDestination); return BitOperations.PopCount(moveMask); } if (size == 8) { ref var vec = ref Unsafe.As, Vector512>(ref Unsafe.AsRef(in this)); var m = Unsafe.As, Vector512>(ref mask); var moveMask = m.ExtractMostSignificantBits(); // Offset is (moveMask * 8) because each control vector has 8 elements var shuffle = Vector512.Load(WideLane.s_shuffleTable512_64bit + (moveMask * 8)); var compressed = Vector512.Shuffle(vec, shuffle); compressed.Store((ulong*)pDestination); return BitOperations.PopCount(moveMask); } } else if (LaneWidth == Vector256.Count && Vector256.IsHardwareAccelerated) { if (size == 4) { ref var vec = ref Unsafe.As, Vector256>(ref Unsafe.AsRef(in this)); var m = Unsafe.As, Vector256>(ref mask); var moveMask = m.ExtractMostSignificantBits(); // Offset is (moveMask * 8) because each control vector has 8 elements var shuffle = Vector256.Load(WideLane.s_shuffleTable256_32bit + (moveMask * 8)); var compressed = Vector256.Shuffle(vec, shuffle); compressed.Store((uint*)pDestination); return BitOperations.PopCount(moveMask); } if (size == 8) { ref var vec = ref Unsafe.As, Vector256>(ref Unsafe.AsRef(in this)); var m = Unsafe.As, Vector256>(ref mask); // For 64-bit, ExtractMostSignificantBits only populates 4 bits (0-15) var moveMask = m.ExtractMostSignificantBits(); // Offset is (moveMask * 4) because each control vector has 4 elements var shuffle = Vector256.Load(WideLane.s_shuffleTable256_64bit + (moveMask * 4)); var compressed = Vector256.Shuffle(vec, shuffle); compressed.Store((ulong*)pDestination); return BitOperations.PopCount(moveMask); } } else if (LaneWidth == Vector128.Count && Vector128.IsHardwareAccelerated) { if (size == 4) { ref var vec = ref Unsafe.As, Vector128>(ref Unsafe.AsRef(in this)); var m = Unsafe.As, Vector128>(ref mask); var moveMask = m.ExtractMostSignificantBits(); // Offset is (moveMask * 4) because each control vector has 4 elements var shuffle = Vector128.Load(WideLane.s_shuffleTable128_32bit + (moveMask * 4)); var compressed = Vector128.Shuffle(vec, shuffle); compressed.Store((uint*)pDestination); return BitOperations.PopCount(moveMask); } if (size == 8) { ref var vec = ref Unsafe.As, Vector128>(ref Unsafe.AsRef(in this)); var m = Unsafe.As, Vector128>(ref mask); var moveMask = m.ExtractMostSignificantBits(); // Offset is (moveMask * 2) because each control vector has 2 elements var shuffle = Vector128.Load(WideLane.s_shuffleTable128_64bit + (moveMask * 2)); var compressed = Vector128.Shuffle(vec, shuffle); compressed.Store((ulong*)pDestination); return BitOperations.PopCount(moveMask); } } // This is slow, but correct on ANY hardware. // Check sign bit of the mask lane var count = 0; for (var i = 0; i < LaneWidth; i++) { if (mask.value[i] == ~TNumber.Zero) { pDestination[count++] = value[i]; } } return count; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly Vector AsVector() { return value; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly TNumber* GetUnsafePtr() { return (TNumber*)Unsafe.AsPointer(ref Unsafe.AsRef(in value)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public TOther BitCast() where TOther : ISPMDLane where TOtherNumber : unmanaged, INumber, IBinaryNumber, IMinMaxValue, IBitwiseOperators { return Unsafe.BitCast, TOther>(this); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane operator +(WideLane a, WideLane b) { return new WideLane(a.value + b.value); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane operator -(WideLane a, WideLane b) { return new WideLane(a.value - b.value); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane operator *(WideLane a, WideLane b) { return new WideLane(a.value * b.value); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane operator /(WideLane a, WideLane b) { return new WideLane(a.value / b.value); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane operator %(WideLane a, WideLane b) { return new WideLane(a.value - VectorFloor(a.value / b.value) * b.value); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane operator -(WideLane a) { return new WideLane(-a.value); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane operator &(WideLane a, WideLane b) { return new WideLane(a.value & b.value); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane operator |(WideLane a, WideLane b) { return new WideLane(a.value | b.value); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane operator ^(WideLane a, WideLane b) { return new WideLane(a.value ^ b.value); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane operator ~(WideLane a) { return new WideLane(~a.value); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane operator ==(WideLane a, WideLane b) { return Equal(a, b); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane operator !=(WideLane a, WideLane b) { return ~Equal(a, b); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane operator >(WideLane a, WideLane b) { return GreaterThan(a, b); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane operator >=(WideLane a, WideLane b) { return GreaterThanOrEqual(a, b); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane operator <(WideLane a, WideLane b) { return LessThan(a, b); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane operator <=(WideLane a, WideLane b) { return LessThanOrEqual(a, b); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static implicit operator WideLane(TNumber value) { return Create(value); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Abs(WideLane value) { return new WideLane(Vector.Abs(value.value)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Floor(WideLane value) { if (typeof(TNumber) == typeof(float)) { ref var v = ref Unsafe.As, Vector>(ref value); var floored = Vector.Floor(v); return new WideLane(Unsafe.As, Vector>(ref floored)); } else if (typeof(TNumber) == typeof(double)) { ref var v = ref Unsafe.As, Vector>(ref value); var floored = Vector.Floor(v); return new WideLane(Unsafe.As, Vector>(ref floored)); } return value; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Frac(WideLane value) { return new WideLane(value.value - VectorFloor(value.value)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Sqrt(WideLane value) { return new WideLane(Vector.SquareRoot(value.value)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Lerp(WideLane a, WideLane b, WideLane t) { return new WideLane(a.value + (b.value - a.value) * t.value); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane MultipleAdd(WideLane a, WideLane b, WideLane c) { if (typeof(TNumber) == typeof(float)) { ref var va = ref Unsafe.As, Vector>(ref a); ref var vb = ref Unsafe.As, Vector>(ref b); ref var vc = ref Unsafe.As, Vector>(ref c); var result = Vector.FusedMultiplyAdd(va, vb, vc); return new WideLane(Unsafe.As, Vector>(ref result)); } else if (typeof(TNumber) == typeof(double)) { ref var va = ref Unsafe.As, Vector>(ref a); ref var vb = ref Unsafe.As, Vector>(ref b); ref var vc = ref Unsafe.As, Vector>(ref c); var result = Vector.FusedMultiplyAdd(va, vb, vc); return new WideLane(Unsafe.As, Vector>(ref result)); } else { return new WideLane((a.value * b.value) + c.value); } } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Min(WideLane a, WideLane b) { return new WideLane(Vector.Min(a.value, b.value)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Max(WideLane a, WideLane b) { return new WideLane(Vector.Max(a.value, b.value)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Clamp(WideLane value, WideLane min, WideLane max) { return new WideLane(Vector.Clamp(value.value, min.value, max.value)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Saturate(WideLane value) { return Clamp(value, Create(TNumber.Zero), Create(TNumber.One)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Sin(WideLane value) { var invPi = Create(TNumber.CreateTruncating(0.318309886f)); // 1 / PI var x_sin = value; var y_sin = x_sin * invPi; var k_sin = Round(y_sin); var z_sin = y_sin - k_sin; var half = Create(TNumber.CreateTruncating(0.5f)); var two = Create(TNumber.CreateTruncating(2.0f)); var k_even_sin = Round(k_sin * half) * two; var sign_sin = One - two * Abs(k_sin - k_even_sin); var c1 = Create(TNumber.CreateTruncating(3.14159265f)); // PI var c3 = Create(TNumber.CreateTruncating(-5.16771278f)); // -PI^3 / 6 var c5 = Create(TNumber.CreateTruncating(2.55016404f)); // PI^5 / 120 var c7 = Create(TNumber.CreateTruncating(-0.59926453f)); // -PI^7 / 5040 var c9 = Create(TNumber.CreateTruncating(0.08214589f)); // PI^9 / 362880 var z2_sin = z_sin * z_sin; var poly_sin = MultipleAdd(z2_sin, c9, c7); // c7 + c9*z^2 poly_sin = MultipleAdd(z2_sin, poly_sin, c5); // c5 + z^2*(...) poly_sin = MultipleAdd(z2_sin, poly_sin, c3); // c3 + z^2*(...) poly_sin = MultipleAdd(z2_sin, poly_sin, c1); // c1 + z^2*(...) poly_sin = z_sin * poly_sin; // z * (...) return poly_sin * sign_sin; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Cos(WideLane value) { var halfPi = Create(TNumber.CreateTruncating(1.570796327f)); var invPi = Create(TNumber.CreateTruncating(0.318309886f)); // 1 / PI var x_cos = value + halfPi; var y_cos = x_cos * invPi; var k_cos = Round(y_cos); var z_cos = y_cos - k_cos; var half = Create(TNumber.CreateTruncating(0.5f)); var two = Create(TNumber.CreateTruncating(2.0f)); var k_even_cos = Round(k_cos * half) * two; var sign_cos = One - two * Abs(k_cos - k_even_cos); var c1 = Create(TNumber.CreateTruncating(3.14159265f)); // PI var c3 = Create(TNumber.CreateTruncating(-5.16771278f)); // -PI^3 / 6 var c5 = Create(TNumber.CreateTruncating(2.55016404f)); // PI^5 / 120 var c7 = Create(TNumber.CreateTruncating(-0.59926453f)); // -PI^7 / 5040 var c9 = Create(TNumber.CreateTruncating(0.08214589f)); // PI^9 / 362880 var z2_cos = z_cos * z_cos; var poly_cos = MultipleAdd(z2_cos, c9, c7); poly_cos = MultipleAdd(z2_cos, poly_cos, c5); poly_cos = MultipleAdd(z2_cos, poly_cos, c3); poly_cos = MultipleAdd(z2_cos, poly_cos, c1); poly_cos = z_cos * poly_cos; return poly_cos * sign_cos; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static (WideLane sin, WideLane cos) SinCos(WideLane value) { var halfPi = Create(TNumber.CreateTruncating(1.570796327f)); var invPi = Create(TNumber.CreateTruncating(0.318309886f)); // 1 / PI var x_sin = value; var x_cos = value + halfPi; // Range Reduction // We map any angle to the interval [-0.5, 0.5] (corresponding to the actual angle range [-PI/2, PI/2]) // y = x * (1 / PI) var y_sin = x_sin * invPi; var y_cos = x_cos * invPi; // k = Round(y) var k_sin = Round(y_sin); var k_cos = Round(y_cos); // z = y - k (Now, the range of z is perfectly reduced to [-0.5, 0.5]) var z_sin = y_sin - k_sin; var z_cos = y_cos - k_cos; // 2. Branchless Sign Flip // Mathematical principle: Sin(x + k*PI) = Sin(x) * (-1)^k // We need to compute (-1)^k. To avoid inefficient bit operations or branches, we compute it with floating-point math: // sign = 1.0 - 2.0 * Abs(k - 2.0 * Round(k * 0.5)) var half = Create(TNumber.CreateTruncating(0.5f)); var two = Create(TNumber.CreateTruncating(2.0f)); var one = One; var k_even_sin = Round(k_sin * half) * two; var sign_sin = one - two * Abs(k_sin - k_even_sin); var k_even_cos = Round(k_cos * half) * two; var sign_cos = one - two * Abs(k_cos - k_even_cos); // 3. Taylor/Remez Polynomial for Sin(PI * z) // For z in [-0.5, 0.5],Calculate sin(PI * z) // z * (C1 + z^2 * (C3 + z^2 * (C5 + z^2 * (C7 + z^2 * C9)))) var c1 = Create(TNumber.CreateTruncating(3.14159265f)); // PI var c3 = Create(TNumber.CreateTruncating(-5.16771278f)); // -PI^3 / 6 var c5 = Create(TNumber.CreateTruncating(2.55016404f)); // PI^5 / 120 var c7 = Create(TNumber.CreateTruncating(-0.59926453f)); // -PI^7 / 5040 var c9 = Create(TNumber.CreateTruncating(0.08214589f)); // PI^9 / 362880 var z2_sin = z_sin * z_sin; var poly_sin = MultipleAdd(z2_sin, c9, c7); // c7 + c9*z^2 poly_sin = MultipleAdd(z2_sin, poly_sin, c5); // c5 + z^2*(...) poly_sin = MultipleAdd(z2_sin, poly_sin, c3); // c3 + z^2*(...) poly_sin = MultipleAdd(z2_sin, poly_sin, c1); // c1 + z^2*(...) poly_sin = z_sin * poly_sin; // z * (...) var z2_cos = z_cos * z_cos; var poly_cos = MultipleAdd(z2_cos, c9, c7); poly_cos = MultipleAdd(z2_cos, poly_cos, c5); poly_cos = MultipleAdd(z2_cos, poly_cos, c3); poly_cos = MultipleAdd(z2_cos, poly_cos, c1); poly_cos = z_cos * poly_cos; return (poly_sin * sign_sin, poly_cos * sign_cos); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Tan(WideLane value) { // 1. Range Reduction // Transform value into range [-pi/4, pi/4]. // This is complex to do right (Payne-Hanek), but for games // a simple approximation: value = value - (PI * Round(value / PI)) is good enough. var pi = Create(TNumber.CreateTruncating(Math.PI)); var x = value - pi * Round(value / pi); // 2. The Approximation (Remez Polynomial) // tan(value) ~= value + c1*value^3 + c2*value^5 // Factored (Horner's Method) for fewer ops: value * (1 + value^2 * (c1 + c2*value^2)) var x2 = x * x; var vc1 = Create(TNumber.CreateTruncating(0.3333314036)); // 1/3 var vc2 = Create(TNumber.CreateTruncating(0.1333923995)); // 2/15 // x2 * (c1 + c2 * x2) var poly = MultipleAdd(x2, vc2, vc1); // value * (1 + x2 * poly) return MultipleAdd(x, MultipleAdd(x2, poly, One), Zero); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Asin(WideLane value) { // asin(value) = pi/2 - acos(value) var piOver2 = Create(TNumber.CreateTruncating(Math.PI / 2)); return piOver2 - Acos(value); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Acos(WideLane value) { // 0 <= value <= 1 : acos(value) = sqrt(1 - value) * (c0 + c1*value + c2*value^2 + c3*value^3) // value < 0 : acos(value) = pi - acos(-value) var x = Abs(value); var c0 = Create(TNumber.CreateTruncating(1.5707288f)); // pi/2 var c1 = Create(TNumber.CreateTruncating(-0.2121144f)); var c2 = Create(TNumber.CreateTruncating(0.0742610f)); var c3 = Create(TNumber.CreateTruncating(-0.0187293f)); var term1 = MultipleAdd(x, c3, c2); var term2 = MultipleAdd(x, term1, c1); var poly = MultipleAdd(x, term2, c0); var sqrtTerm = Sqrt(One - x); var result = poly * sqrtTerm; var pi = Create(TNumber.CreateTruncating(Math.PI)); var isNegative = LessThan(value, Zero); return Select(isNegative, pi - result, result); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Atan(WideLane value) { // atan(value) = value * (c1 + c2*value^2) var c1 = Create(TNumber.CreateTruncating(0.97239411f)); var c2 = Create(TNumber.CreateTruncating(-0.19194795f)); var x2 = value * value; var poly = MultipleAdd(x2, c2, c1); return value * poly; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Atan2(WideLane y, WideLane x) { var absX = Abs(x); var absY = Abs(y); // 1. Determine the ratio (input to Atan) // If |value| > |y|, we are in the "shallow" region, ratio = y/value // If |y| > |value|, we are in the "steep" region, ratio = value/y (and we transform result) var yGtX = GreaterThan(absY, absX); // Select numerator and denominator to ensure ratio is always in [-1, 1] var num = Select(yGtX, absX, absY); var den = Select(yGtX, absY, absX); var t = num / den; // t is now in [0, 1] var t2 = t * t; // 2. Polynomial Approximation (Odd function: value * (c1 + c2*value^2)) var c1 = Create(TNumber.CreateTruncating(0.97239411f)); var c2 = Create(TNumber.CreateTruncating(-0.19194795f)); // (c1 + c2 * t2) var poly = MultipleAdd(c2, t2, c1); // result = t * poly var result = t * poly; // 3. Reconstruct the angle // If we swapped value/y (yGtX), the identity is: atan(value/y) = PI/2 - atan(y/value) var halfPi = Create(TNumber.CreateTruncating(1.570796327f)); result = Select(yGtX, halfPi - result, result); // 4. Adjust for Quadrants (Signs) // If value < 0, we are in quadrants 2 or 3, so we need to add PI var pi = Create(TNumber.CreateTruncating(3.141592654f)); var xLtZero = LessThan(x, Zero); result = Select(xLtZero, pi - result, result); // If y < 0, the result should be negative (standard atan2 convention) // NOTE: This sign flip strategy depends on exact polynomial range mapping, // but typically just copy the sign of Y to the result. var yLtZero = LessThan(y, Zero); // If original Y was negative, negate the result // (This works because our ratio logic effectively computed atan(|y|/|value|) above) var negativeResult = -result; return Select(yLtZero, negativeResult, result); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Pow(WideLane x, WideLane y) { return Exp(y * Log(x)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Exp(WideLane value) { if (typeof(TNumber) == typeof(float)) { ref var v = ref Unsafe.As, Vector>(ref value); var result = Vector.Exp(v); return new WideLane(Unsafe.As, Vector>(ref result)); } else if (typeof(TNumber) == typeof(double)) { ref var v = ref Unsafe.As, Vector>(ref value); var result = Vector.Exp(v); return new WideLane(Unsafe.As, Vector>(ref result)); } return value; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Exp2(WideLane value) { return Pow(Create(TNumber.CreateTruncating(2)), value); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Log(WideLane value) { if (typeof(TNumber) == typeof(float)) { ref var v = ref Unsafe.As, Vector>(ref value); var result = Vector.Log(v); return new WideLane(Unsafe.As, Vector>(ref result)); } else if (typeof(TNumber) == typeof(double)) { ref var v = ref Unsafe.As, Vector>(ref value); var result = Vector.Log(v); return new WideLane(Unsafe.As, Vector>(ref result)); } return value; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Log2(WideLane value) { if (typeof(TNumber) == typeof(float)) { ref var v = ref Unsafe.As, Vector>(ref value); var result = Vector.Log2(v); return new WideLane(Unsafe.As, Vector>(ref result)); } else if (typeof(TNumber) == typeof(double)) { ref var v = ref Unsafe.As, Vector>(ref value); var result = Vector.Log2(v); return new WideLane(Unsafe.As, Vector>(ref result)); } return value; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Ceil(WideLane value) { if (typeof(TNumber) == typeof(float)) { ref var v = ref Unsafe.As, Vector>(ref value); var result = Vector.Ceiling(v); return new WideLane(Unsafe.As, Vector>(ref result)); } else if (typeof(TNumber) == typeof(double)) { ref var v = ref Unsafe.As, Vector>(ref value); var result = Vector.Ceiling(v); return new WideLane(Unsafe.As, Vector>(ref result)); } return value; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Round(WideLane value) { if (typeof(TNumber) == typeof(float)) { ref var v = ref Unsafe.As, Vector>(ref value); var result = Vector.Round(v); return new WideLane(Unsafe.As, Vector>(ref result)); } else if (typeof(TNumber) == typeof(double)) { ref var v = ref Unsafe.As, Vector>(ref value); var result = Vector.Round(v); return new WideLane(Unsafe.As, Vector>(ref result)); } return value; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Trunc(WideLane value) { if (typeof(TNumber) == typeof(float)) { ref var v = ref Unsafe.As, Vector>(ref value); var result = Vector.Truncate(v); return new WideLane(Unsafe.As, Vector>(ref result)); } else if (typeof(TNumber) == typeof(double)) { ref var v = ref Unsafe.As, Vector>(ref value); var result = Vector.Truncate(v); return new WideLane(Unsafe.As, Vector>(ref result)); } return value; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Sign(WideLane value) { return Select( GreaterThan(value, Zero), One, Select( LessThan(value, Zero), ~Zero, Zero)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane CopySign(WideLane magnitude, WideLane sign) { return new WideLane(Vector.CopySign(magnitude.value, sign.value)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Rcp(WideLane value) { if (typeof(TNumber) == typeof(float)) { if (Sse.IsSupported && LaneWidth == Vector128.Count) { var vf = Unsafe.As, Vector128>(ref value); var result = Sse.Reciprocal(vf); return Unsafe.As, WideLane>(ref result); } else if (Avx.IsSupported && LaneWidth == Vector256.Count) { var vf = Unsafe.As, Vector256>(ref value); var result = Avx.Reciprocal(vf); return Unsafe.As, WideLane>(ref result); } } return Create(TNumber.One) / value; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Rsqrt(WideLane value) { if (typeof(TNumber) == typeof(float)) { if (Sse.IsSupported && LaneWidth == Vector128.Count) { var vf = Unsafe.As, Vector128>(ref value); var result = Sse.ReciprocalSqrt(vf); return Unsafe.As, WideLane>(ref result); } else if (Avx.IsSupported && LaneWidth == Vector256.Count) { var vf = Unsafe.As, Vector256>(ref value); var result = Avx.ReciprocalSqrt(vf); return Unsafe.As, WideLane>(ref result); } } return Create(TNumber.One) / Sqrt(value); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Select(WideLane conditionMask, WideLane ifTrue, WideLane ifFalse) { return new WideLane(Vector.ConditionalSelect( conditionMask.value, ifTrue.value, ifFalse.value)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane GreaterThan(WideLane a, WideLane b) { return new WideLane(Vector.GreaterThan(a.value, b.value)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane GreaterThanOrEqual(WideLane a, WideLane b) { return new WideLane(Vector.GreaterThanOrEqual(a.value, b.value)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane LessThan(WideLane a, WideLane b) { return new WideLane(Vector.LessThan(a.value, b.value)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane LessThanOrEqual(WideLane a, WideLane b) { return new WideLane(Vector.LessThanOrEqual(a.value, b.value)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static WideLane Equal(WideLane a, WideLane b) { return new WideLane(Vector.Equals(a.value, b.value)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool Any(WideLane mask) { return !Vector.EqualsAll(mask.value, Vector.Zero); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool All(WideLane mask) { return Vector.EqualsAll(mask.value, Vector.AllBitsSet); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool None(WideLane mask) { return Vector.EqualsAll(mask.value, Vector.Zero); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool Equals(WideLane other) { return value.Equals(other.value); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public override bool Equals(object? obj) { return obj is WideLane other && Equals(other); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public override int GetHashCode() { return value.GetHashCode(); } [MethodImpl(MethodImplOptions.AggressiveInlining)] public override string ToString() { return value.ToString(); } }