using System.Diagnostics.CodeAnalysis; using System.Numerics; namespace Misaki.HighPerformance.Mathematics.SPMD; /// /// Common marker interface for SPMD lane types. /// public interface ISPMDLane { /// /// Gets the number of lanes (vector width) for the SPMD implementation. /// static abstract int LaneWidth { get; } } /// /// Represents a single-lane or multi-lane (vectorized) SPMD value and the operations supported on it. /// /// The concrete SPMD lane type implementing this interface. /// The underlying numeric element type. public unsafe interface ISPMDLane : ISPMDLane, IEquatable where TSelf : ISPMDLane where TNumber : unmanaged, INumber, IBinaryNumber, IMinMaxValue, IBitwiseOperators { /// /// Gets a lane value where all lanes are set to numeric zero. /// static abstract TSelf Zero { get; } /// /// Gets a lane value where all lanes are set to numeric one. /// static abstract TSelf One { get; } /// /// Gets a lane value where all lanes are set to the minimum representable value of the underlying numeric type. /// static abstract TSelf MinValue { get; } /// /// Gets a lane value where all lanes are set to the maximum representable value of the underlying numeric type. /// static abstract TSelf MaxValue { get; } /// /// Gets a lane value where all bits are set to 1 for each lane. /// static abstract TSelf AllBitsSet { get; } /// /// Gets the element value for the specified lane index. /// /// The zero-based lane index. TNumber this[int index] { get; } /// /// Creates a lane value where all lanes are set to the specified value. /// /// The value to set for all lanes. /// The created lane value. static abstract TSelf Create(TNumber value); /// /// Creates a new instance of the type from the specified sequence of numeric values. /// /// A parameter array of read-only spans containing the numeric values to use for initialization. /// A new instance of the type initialized with the provided numeric values. static abstract TSelf Create(params ReadOnlySpan values); /// /// Creates a lane value from the specified vector. /// /// The vector to create the lane value from. /// The lane value built from the vector. static abstract TSelf Create(Vector value); /// /// Creates a lane value with a sequence starting from the specified value with the given step. /// /// The starting value. /// The step value for the sequence. /// The lane value containing the arithmetic sequence. /// /// Implementations may rely on vector creation helpers and assume that the resulting sequence length matches . /// static abstract TSelf Sequence(TNumber start, TNumber step); /// /// Loads a lane value from the specified reference. /// /// The reference to load from. /// The loaded lane value. static abstract TSelf Load(ref TNumber value); /// /// Loads a lane value from the specified pointer. /// /// The pointer to load from. /// The loaded lane value. /// /// Unsafe pointer overloads are provided for scenarios where sequential lane data is already contiguous in memory. /// static abstract TSelf Load(TNumber* pValue); /// /// Uses the specified mask to conditionally load lane values from the given reference, returning a lane value where masked lanes are loaded and unmasked lanes are set to zero. /// /// The reference to load from. /// The mask to use for conditional loading. /// The loaded lane value. static abstract TSelf MaskLoad(ref TNumber value, TSelf mask); /// /// Uses the specified mask to conditionally load lane values from the given pointer, returning a lane value where masked lanes are loaded and unmasked lanes are set to zero. /// /// The pointer to load from. /// The mask to use for conditional loading. /// The loaded lane value. static abstract TSelf MaskLoad(TNumber* pValue, TSelf mask); /// /// Gathers lane values from the specified base address and indices, returning a lane value where each lane is loaded from the address computed by adding the corresponding index (multiplied by the scale) to the base address. /// /// The base address from which to gather values. /// The indices of the values to gather. /// The scale factor for the indices. /// The gathered lane value. static abstract TSelf Gather(TNumber* pData, TSelf indices, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale); /// /// Gathers lane values from the specified base address and indices, returning a lane value where each lane is loaded from the address computed by adding the corresponding index (multiplied by the scale) to the base address. /// /// The base address from which to gather values. /// The pointer to the indices of the values to gather. /// The scale factor for the indices. /// The gathered lane value. static abstract TSelf Gather(TNumber* pData, int* pIndices, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale); /// /// Gathers lane values from the specified base address and indices, returning a lane value where each lane is loaded from the address computed by adding the corresponding index (multiplied by the scale) to the base address. /// /// The base address from which to gather values. /// The indices of the values to gather. /// The scale factor for the indices. /// The gathered lane value. static abstract TSelf Gather(ref TNumber baseAddress, TSelf indices, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale); /// /// Gathers lane values from the specified base address and indices, returning a lane value where each lane is loaded from the address computed by adding the corresponding index (multiplied by the scale) to the base address. /// /// The base address from which to gather values. /// The reference to the base index. /// The scale factor for the indices. /// The gathered lane value. static abstract TSelf Gather(ref TNumber baseAddress, ref int baseIndex, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale); /// /// Gathers lane values from the specified base address and indices, returning a lane value where each lane is loaded from the address computed by adding the corresponding index (multiplied by the scale) to the base address, but only for lanes where the corresponding mask bit is set; other lanes are set to zero. /// /// The base address from which to gather values. /// The indices of the values to gather. /// The mask value that determines which elements are included in the gathering operation. /// The scale factor for the indices. /// The gathered lane value. static abstract TSelf MaskGather(TNumber* pData, TSelf indices, TSelf mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale); /// /// Gathers lane values from the specified base address and indices, returning a lane value where each lane is loaded from the address computed by adding the corresponding index (multiplied by the scale) to the base address, but only for lanes where the corresponding mask bit is set; other lanes are set to zero. /// /// The base address from which to gather values. /// The pointer to the indices of the values to gather. /// The mask value that determines which elements are included in the gathering operation. /// The scale factor for the indices. /// The gathered lane value. static abstract TSelf MaskGather(TNumber* pData, int* pIndices, TSelf mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale); /// /// Stores the lane value to the specified reference. /// /// The reference to store to. void Store(ref TNumber destination); /// /// Stores the lane value to the specified pointer. /// /// The pointer to store to. void Store(TNumber* pDestination); /// /// Compresses the data specified by the given mask and stores the compressed result in the provided destination /// variable. /// /// A reference to the variable where the compressed data will be stored. /// A mask value that determines which elements are included in the compression operation. /// The number of elements written to the destination as a result of the compression. Returns 0 if no elements are compressed. /// /// Implementations may use hardware-specific shuffle tables to reorder the selected lanes before storing, falling back to a scalar loop otherwise. /// int CompressStore(ref TNumber destination, TSelf mask); /// /// Compresses the data specified by the given mask and stores the compressed result in the provided destination /// variable. /// /// A pointer to the variable where the compressed data will be stored. /// A mask value that determines which elements are included in the compression operation. /// The number of elements written to the destination as a result of the compression. Returns 0 if no elements are compressed. /// /// Implementations may use hardware-specific shuffle tables to reorder the selected lanes before storing, falling back to a scalar loop otherwise. /// int CompressStore(TNumber* pDestination, TSelf mask); /// /// Masks the lane value with the specified mask and stores the result to the given reference, where masked lanes are stored and unmasked lanes are left unchanged in the destination. /// /// A pointer to the variable where the masked data will be stored. /// A mask value that determines which elements are included in the masking operation. void MaskStore(TNumber* pDestination, TSelf mask); /// /// Masks the lane value with the specified mask and stores the result to the given reference, where masked lanes are stored and unmasked lanes are left unchanged in the destination. /// /// A reference to the variable where the masked data will be stored. /// A mask value that determines which elements are included in the masking operation. void MaskStore(ref TNumber destination, TSelf mask); /// /// Scatters the lane value to the specified base address and indices, where each lane is stored to the address computed by adding the corresponding index (multiplied by the scale) to the base address. /// /// A pointer to the base address where the data will be scattered. /// A vector of indices that determine the destinations of each lane. void Scatter(TNumber* pDst, TSelf indices); /// /// Scatters the lane value to the specified base address and indices, where each lane is stored to the address computed by adding the corresponding index (multiplied by the scale) to the base address. /// /// A reference to the variable where the scattered data will be stored. /// A vector of indices that determine the destinations of each lane. void Scatter(ref TNumber destination, TSelf indices); /// /// Scatters the lane value to the specified base address and indices, where each lane is stored to the address computed by adding the corresponding index (multiplied by the scale) to the base address. /// /// A pointer to the base address where the data will be scattered. /// A pointer to the array of indices that determine the destinations of each lane. void Scatter(TNumber* pDst, int* pIndices); /// /// Scatters the lane value to the specified base address and indices, where each lane is stored to the address computed by adding the corresponding index (multiplied by the scale) to the base address. /// /// A reference to the variable where the scattered data will be stored. /// A pointer to the array of indices that determine the destinations of each lane. void Scatter(ref TNumber destination, int* pIndices); /// /// Masks the lane value with the specified mask and scatters the result to the given base address and indices, where masked lanes are stored to the address computed by adding the corresponding index (multiplied by the scale) to the base address, and unmasked lanes are left unchanged in the destination. /// /// A pointer to the base address where the data will be scattered. /// A vector of indices that determine the destinations of each lane. /// A vector of boolean values that determine which lanes to scatter. void MaskScatter(TNumber* pDst, TSelf indices, TSelf mask); /// /// Masks the lane value with the specified mask and scatters the result to the given base address and indices, where masked lanes are stored to the address computed by adding the corresponding index (multiplied by the scale) to the base address, and unmasked lanes are left unchanged in the destination. /// /// A reference to the variable where the scattered data will be stored. /// A vector of indices that determine the destinations of each lane. /// A vector of boolean values that determine which lanes to scatter. void MaskScatter(ref TNumber destination, TSelf indices, TSelf mask); /// /// Masks the lane value with the specified mask and scatters the result to the given base address and indices, where masked lanes are stored to the address computed by adding the corresponding index (multiplied by the scale) to the base address, and unmasked lanes are left unchanged in the destination. /// /// A pointer to the base address where the data will be scattered. /// A pointer to the array of indices that determine the destinations of each lane. /// A vector of boolean values that determine which lanes to scatter. void MaskScatter(TNumber* pDst, int* pIndices, TSelf mask); /// /// Masks the lane value with the specified mask and scatters the result to the given base address and indices, where masked lanes are stored to the address computed by adding the corresponding index (multiplied by the scale) to the base address, and unmasked lanes are left unchanged in the destination. /// /// A reference to the variable where the scattered data will be stored. /// A pointer to the array of indices that determine the destinations of each lane. /// A vector of boolean values that determine which lanes to scatter. void MaskScatter(ref TNumber destination, int* pIndices, TSelf mask); /// /// Converts the lane value to a vector. /// /// The backing vector representation. Vector AsVector(); /// /// Gets an pointer to the lane's underlying data. /// /// An pointer to the lane's underlying data. TNumber* GetUnsafePtr(); /// /// Casts the lane value to another SPMD lane type with a different underlying numeric type. /// /// The type of the other SPMD lane. /// The underlying numeric type of the other SPMD lane. /// The casted lane value. TOther Cast() where TOther : ISPMDLane where TOtherNumber : unmanaged, INumber, IBinaryNumber, IMinMaxValue, IBitwiseOperators; /// /// Bitwise reinterprets the lane value as another SPMD lane type with a different underlying numeric type. /// /// The type of the other SPMD lane. /// The underlying numeric type of the other SPMD lane. /// The bit-cast lane value. TOther BitCast() where TOther : ISPMDLane where TOtherNumber : unmanaged, INumber, IBinaryNumber, IMinMaxValue, IBitwiseOperators; /// /// Adds two lane values element-wise. /// /// The first lane value. /// The second lane value. /// The lane-wise sum. static abstract TSelf operator +(TSelf a, TSelf b); /// /// Subtracts two lane values element-wise. /// /// The first lane value. /// The second lane value. /// The lane-wise difference. static abstract TSelf operator -(TSelf a, TSelf b); /// /// Multiplies two lane values element-wise. /// /// The first lane value. /// The second lane value. /// The lane-wise product. static abstract TSelf operator *(TSelf a, TSelf b); /// /// Divides two lane values element-wise. /// /// The first lane value. /// The second lane value. /// The lane-wise quotient. static abstract TSelf operator /(TSelf a, TSelf b); /// /// Computes the modulus of two lane values element-wise. /// /// The first lane value. /// The second lane value. /// The lane-wise modulus. static abstract TSelf operator %(TSelf a, TSelf b); /// /// Negates the lane value element-wise. /// /// The lane value to negate. /// The negated lane value. static abstract TSelf operator -(TSelf a); /// /// Computes the bitwise AND of two lane values element-wise. /// /// The first lane value. /// The second lane value. /// The result of the bitwise AND. static abstract TSelf operator &(TSelf a, TSelf b); /// /// Computes the bitwise OR of two lane values element-wise. /// /// The first lane value. /// The second lane value. /// The result of the bitwise OR. static abstract TSelf operator |(TSelf a, TSelf b); /// /// Computes the bitwise XOR of two lane values element-wise. /// /// The first lane value. /// The second lane value. /// The result of the bitwise XOR. static abstract TSelf operator ^(TSelf a, TSelf b); /// /// Computes the bitwise NOT of a lane value element-wise. /// /// The lane value. /// The bitwise complement of the lane value. static abstract TSelf operator ~(TSelf a); /// /// Determines whether two instances of the type are equal component-wise. /// /// The first value to compare. /// The second value to compare. /// All bits set where the elements are equal; otherwise, all bits cleared. static abstract TSelf operator ==(TSelf a, TSelf b); /// /// Determines whether two instances of the type are not equal component-wise. /// /// The first value to compare. /// The second value to compare. /// All bits set where the elements are not equal; otherwise, all bits cleared. static abstract TSelf operator !=(TSelf a, TSelf b); /// /// Determines whether one instance of the type is greater than another instance component-wise. /// /// The first value to compare. /// The second value to compare. /// A value indicating whether the first parameter is greater than the second parameter. static abstract TSelf operator >(TSelf a, TSelf b); /// /// Determines whether the first operand is greater than or equal to the second operand component-wise. /// /// The first value to compare. /// The second value to compare. /// All bits set where the first parameter is greater than or equal to the second parameter; otherwise, all bits cleared. static abstract TSelf operator >=(TSelf a, TSelf b); /// /// Determines whether one instance of the type is less than another instance component-wise. /// /// The first value to compare. /// The second value to compare. /// All bits set where the first parameter is less than the second parameter; otherwise, all bits cleared. static abstract TSelf operator <(TSelf a, TSelf b); /// /// Determines whether the first operand is less than or equal to the second operand component-wise. /// /// The first value to compare. /// The second value to compare. /// All bits set where the first parameter is less than or equal to the second parameter; otherwise, all bits cleared. static abstract TSelf operator <=(TSelf a, TSelf b); /// /// Implicitly converts a scalar numeric value to a lane value where all lanes are set to that value. /// /// The scalar numeric value to convert. static abstract implicit operator TSelf(TNumber value); /// /// Computes the absolute value of the lane value element-wise. /// /// The lane value. /// The absolute lane value. static abstract TSelf Abs(TSelf value); /// /// Computes the floor of the lane value element-wise. /// /// The lane value. /// The lane value with each element rounded toward negative infinity. static abstract TSelf Floor(TSelf value); /// /// Computes the fractional part of the lane value element-wise. /// /// The lane value. /// The fractional lane value. static abstract TSelf Frac(TSelf value); /// /// Computes the square root of the lane value element-wise. /// /// The lane value. /// The square root lane value. static abstract TSelf Sqrt(TSelf value); /// /// Performs linear interpolation between two lane values. /// /// The start lane value. /// The end lane value. /// The interpolation factor. /// The interpolated lane value. static abstract TSelf Lerp(TSelf a, TSelf b, TSelf t); /// /// Computes a * b + c element-wise. /// /// The first multiplier. /// The second multiplier. /// The addend. /// The result of the fused multiply-add operation. /// /// Float and double implementations should use fused multiply-add instructions when available for both accuracy and performance. /// static abstract TSelf MultiplyAdd(TSelf a, TSelf b, TSelf c); /// /// Returns the minimum of the two lane values element-wise. /// /// The first lane value. /// The second lane value. /// The lane value containing the minimum of each element. static abstract TSelf Min(TSelf a, TSelf b); /// /// Returns the maximum of the two lane values element-wise. /// /// The first lane value. /// The second lane value. /// The lane value containing the maximum of each element. static abstract TSelf Max(TSelf a, TSelf b); /// /// Clamps each element of the lane value between the specified minimum and maximum values. /// /// The lane value to clamp. /// The inclusive minimum. /// The inclusive maximum. /// The clamped lane value. static abstract TSelf Clamp(TSelf value, TSelf min, TSelf max); /// /// Saturates each element in the lane value to the 0..1 range. /// /// The lane value to saturate. /// The saturated lane value. static abstract TSelf Saturate(TSelf value); /// /// Computes the sine of each lane element. /// /// The source lane value. /// The sine of each lane element. /// /// Implementations may rely on vectorized math intrinsics for float/double and approximate values for other types. /// static abstract TSelf Sin(TSelf value); /// /// Computes the cosine of each lane element. /// /// The source lane value. /// The cosine of each lane element. /// /// Implementations may rely on vectorized math intrinsics for float/double and approximate values for other types. /// static abstract TSelf Cos(TSelf value); /// /// Computes both sine and cosine of each lane element. /// /// The source lane value. /// A tuple containing sine and cosine lane values. /// /// Implementations returning both sin and cos simultaneously can reuse intermediate values for better performance. /// static abstract void SinCos(TSelf value, out TSelf sin, out TSelf cos); /// /// Computes the tangent of each lane element. /// /// The source lane value. /// The tangent of each lane element. /// /// Many implementations use polynomial approximations and assume the input is reduced to [-pi/4, pi/4] for accuracy. /// static abstract TSelf Tan(TSelf value); /// /// Computes the arcsine of each lane element. /// /// The source lane value. /// The arcsine of each lane element. /// /// Implementations typically assume input is within [-1, 1] and may use polynomial approximations for performance. /// static abstract TSelf Asin(TSelf value); /// /// Computes the arccosine of each lane element. /// /// The source lane value. /// The arccosine of each lane element. /// /// Input is expected to be in [-1, 1]; implementations often rely on approximation polynomials combined with range reduction. /// static abstract TSelf Acos(TSelf value); /// /// Computes the arctangent of each lane element. /// /// The source lane value. /// The arctangent of each lane element. /// /// Polynomial approximations with restricted input ranges are commonly used for performance-sensitive implementations. /// static abstract TSelf Atan(TSelf value); /// /// Computes the arctangent of y/x for each lane element. /// /// The numerator lane value. /// The denominator lane value. /// The arctangent of each lane pair. /// /// Implementations often rely on quadrant-aware polynomial routines and assume inputs are finite to avoid NaNs. /// static abstract TSelf Atan2(TSelf y, TSelf x); /// /// Raises each lane element to the specified power. /// /// The base lane value. /// The exponent lane value. Cannot be negative. /// The power result for each lane. static abstract TSelf Pow(TSelf x, TSelf y); /// /// Computes the exponential of each lane element. /// /// The source lane value. /// The exponential of each lane element. /// /// Float and double implementations typically call into vectorized exp intrinsics; other types may fall back to scalar paths. /// static abstract TSelf Exp(TSelf value); /// /// Computes 2 raised to each lane element. /// /// The source lane value. /// The base-2 exponential of each lane element. /// /// This can be implemented via when no dedicated base-2 intrinsic exists. /// static abstract TSelf Exp2(TSelf value); /// /// Computes the natural logarithm of each lane element. /// /// The source lane value. /// The natural logarithm of each lane element. /// /// Vectorized logarithm instructions may only exist for floating-point types; other types should mimic the scalar behavior. /// static abstract TSelf Log(TSelf value); /// /// Computes the base-2 logarithm of each lane element. /// /// The source lane value. /// The base-2 logarithm of each lane element. /// /// If a dedicated base-2 intrinsic is unavailable, the implementation may compute Log(value)/Log(2). /// static abstract TSelf Log2(TSelf value); /// /// Computes the ceiling of each lane element. /// /// The lane value. /// The smallest integral value greater than or equal to each element. /// /// Implementations should use helpers for floating-point types when available. /// static abstract TSelf Ceil(TSelf value); /// /// Rounds each lane element to the nearest integer value. /// /// The lane value. /// The rounded lane value. /// /// Implementations should prefer vectorized round intrinsics for floating-point implementations. /// static abstract TSelf Round(TSelf value); /// /// Truncates each lane element toward zero. /// /// The lane value. /// The truncated lane value. /// /// Floating-point truncation typically maps to . /// static abstract TSelf Trunc(TSelf value); /// /// Returns the sign of each lane element. /// /// The lane value. /// -1, 0, or 1 per lane. static abstract TSelf Sign(TSelf value); /// /// Copies the sign of the second lane value to the magnitude of the first. /// /// The magnitude lane value. /// The sign lane value. /// The result of merging magnitude with sign. static abstract TSelf CopySign(TSelf magnitude, TSelf sign); /// /// Computes the reciprocal of each lane element. /// /// The lane value. /// The reciprocal lane value. /// /// Fast paths may use Sse.Reciprocal or Avx.Reciprocal when TNumber is float. /// static abstract TSelf Rcp(TSelf value); /// /// Computes the reciprocal square root of each lane element. /// /// The lane value. /// The reciprocal square root lane value. /// /// Float implementations may prefer hardware reciprocal-sqrt intrinsics and fallback to Create(TNumber.One)/Sqrt(x) otherwise. /// static abstract TSelf Rsqrt(TSelf value); /// /// Reduces the lane value to a single scalar by adding all lanes together. /// /// The lane value to reduce. /// The reduced scalar value. static abstract TNumber ReduceAdd(TSelf value); /// /// Reduces the lane value to a single scalar by finding the maximum element. /// /// The lane value to reduce. /// The reduced scalar value. static abstract TNumber ReduceMax(TSelf value); /// /// Reduces the lane value to a single scalar by finding the minimum element. /// /// The lane value to reduce. /// The reduced scalar value. static abstract TNumber ReduceMin(TSelf value); /// /// Selects values from two lane values based on a condition mask. /// /// The condition mask. /// The value to select if true. /// The value to select if false. /// The selected lane value. static abstract TSelf Select(TSelf conditionMask, TSelf ifTrue, TSelf ifFalse); /// /// Compares two lane values for greater than element-wise. /// /// The first lane value. /// The second lane value. /// The mask representing the greater than comparison result. static abstract TSelf GreaterThan(TSelf a, TSelf b); /// /// Compares two lane values for greater than or equal element-wise. /// /// The first lane value. /// The second lane value. /// The mask representing the greater than or equal comparison result. static abstract TSelf GreaterThanOrEqual(TSelf a, TSelf b); /// /// Compares two lane values for less than element-wise. /// /// The first lane value. /// The second lane value. /// The mask representing the less than comparison result. static abstract TSelf LessThan(TSelf a, TSelf b); /// /// Compares two lane values for less than or equal element-wise. /// /// The first lane value. /// The second lane value. /// The mask representing the less than or equal comparison result. static abstract TSelf LessThanOrEqual(TSelf a, TSelf b); /// /// Compares two lane values for equality element-wise. /// /// The first lane value. /// The second lane value. /// The mask representing the equality comparison result. static abstract TSelf Equal(TSelf a, TSelf b); /// /// Checks if any lane in the mask is true. /// /// The mask to check. /// True if any lane is true; otherwise, false. static abstract bool Any(TSelf mask); /// /// Checks if all lanes in the mask are true. /// /// The mask to check. /// True if all lanes are true; otherwise, false. static abstract bool All(TSelf mask); /// /// Checks if no lanes in the mask are true. /// /// The mask to check. /// True if no lanes are true; otherwise, false. static abstract bool None(TSelf mask); }