using System.Numerics;
namespace Misaki.HighPerformance.Mathematics.SPMD;
///
/// Common marker interface for SPMD lane types.
///
public interface ISPMD
{
///
/// Gets the number of lanes (vector width) for the SPMD implementation.
///
static abstract int LaneWidth
{
get;
}
}
// TODO:
// - ReduceAdd
// - ReduceMin
// - ReduceMax
// - LeadingZeroCount
// - TrailingZeroCount
// - PopCount
///
/// Represents a single-lane or multi-lane (vectorized) SPMD value and the operations supported on it.
///
/// The concrete SPMD lane type implementing this interface.
/// The underlying numeric element type.
public interface ISPMD : ISPMD, IEquatable
where TSelf : ISPMD
where TNumber : unmanaged, INumber, IBinaryNumber, IMinMaxValue, IBitwiseOperators
{
///
/// Gets a lane value where all lanes are set to numeric zero.
///
static abstract TSelf Zero
{
get;
}
///
/// Gets a lane value where all lanes are set to numeric one.
///
static abstract TSelf One
{
get;
}
///
/// Gets a lane value where all lanes are set to the minimum representable value of the underlying numeric type.
///
static abstract TSelf MinValue
{
get;
}
///
/// Gets a lane value where all lanes are set to the maximum representable value of the underlying numeric type.
///
static abstract TSelf MaxValue
{
get;
}
///
/// Gets the element value for the specified lane index.
///
/// The zero-based lane index.
TNumber this[int index]
{
get;
}
///
/// Creates a lane value where all lanes are set to the specified value.
///
/// The value to set for all lanes.
/// The created lane value.
static abstract TSelf Create(TNumber value);
///
/// Creates a new instance of the type from the specified sequence of numeric values.
///
/// A parameter array of read-only spans containing the numeric values to use for initialization.
/// A new instance of the type initialized with the provided numeric values.
static abstract TSelf Create(params ReadOnlySpan values);
///
/// Creates a lane value from the specified vector.
///
/// The vector to create the lane value from.
/// The lane value built from the vector.
static abstract TSelf Create(Vector value);
///
/// Creates a lane value with a sequence starting from the specified value with the given step.
///
/// The starting value.
/// The step value for the sequence.
/// The lane value containing the arithmetic sequence.
///
/// Implementations may rely on vector creation helpers and assume that the resulting sequence length matches .
///
static abstract TSelf Sequence(TNumber start, TNumber step);
///
/// Loads a lane value from the specified reference.
///
/// The reference to load from.
/// The loaded lane value.
static abstract TSelf Load(ref TNumber value);
///
/// Loads a lane value from the specified pointer.
///
/// The pointer to load from.
/// The loaded lane value.
///
/// Unsafe pointer overloads are provided for scenarios where sequential lane data is already contiguous in memory.
///
static abstract unsafe TSelf Load(TNumber* pValue);
///
/// Stores the lane value to the specified reference.
///
/// The reference to store to.
void Store(ref TNumber destination);
///
/// Stores the lane value to the specified pointer.
///
/// The pointer to store to.
unsafe void Store(TNumber* pDestination);
///
/// Compresses the data specified by the given mask and stores the compressed result in the provided destination
/// variable.
///
/// A mask value that determines which elements are included in the compression operation.
/// A reference to the variable where the compressed data will be stored.
/// The number of elements written to the destination as a result of the compression. Returns 0 if no elements are compressed.
///
/// Implementations may use hardware-specific shuffle tables to reorder the selected lanes before storing, falling back to a scalar loop otherwise.
///
int CompressStore(TSelf mask, ref TNumber destination);
///
/// Compresses the data specified by the given mask and stores the compressed result in the provided destination
/// variable.
///
/// A mask value that determines which elements are included in the compression operation.
/// A pointer to the variable where the compressed data will be stored.
/// The number of elements written to the destination as a result of the compression. Returns 0 if no elements are compressed.
///
/// Implementations may use hardware-specific shuffle tables to reorder the selected lanes before storing, falling back to a scalar loop otherwise.
///
unsafe int CompressStore(TSelf mask, TNumber* pDestination);
///
/// Converts the lane value to a vector.
///
/// The backing vector representation.
Vector AsVector();
///
/// Adds two lane values element-wise.
///
/// The first lane value.
/// The second lane value.
/// The lane-wise sum.
static abstract TSelf operator +(TSelf a, TSelf b);
///
/// Subtracts two lane values element-wise.
///
/// The first lane value.
/// The second lane value.
/// The lane-wise difference.
static abstract TSelf operator -(TSelf a, TSelf b);
///
/// Multiplies two lane values element-wise.
///
/// The first lane value.
/// The second lane value.
/// The lane-wise product.
static abstract TSelf operator *(TSelf a, TSelf b);
///
/// Divides two lane values element-wise.
///
/// The first lane value.
/// The second lane value.
/// The lane-wise quotient.
static abstract TSelf operator /(TSelf a, TSelf b);
///
/// Computes the modulus of two lane values element-wise.
///
/// The first lane value.
/// The second lane value.
/// The lane-wise modulus.
static abstract TSelf operator %(TSelf a, TSelf b);
///
/// Negates the lane value element-wise.
///
/// The lane value to negate.
/// The negated lane value.
static abstract TSelf operator -(TSelf a);
///
/// Computes the bitwise AND of two lane values element-wise.
///
/// The first lane value.
/// The second lane value.
/// The result of the bitwise AND.
static abstract TSelf operator &(TSelf a, TSelf b);
///
/// Computes the bitwise OR of two lane values element-wise.
///
/// The first lane value.
/// The second lane value.
/// The result of the bitwise OR.
static abstract TSelf operator |(TSelf a, TSelf b);
///
/// Computes the bitwise XOR of two lane values element-wise.
///
/// The first lane value.
/// The second lane value.
/// The result of the bitwise XOR.
static abstract TSelf operator ^(TSelf a, TSelf b);
///
/// Computes the bitwise NOT of a lane value element-wise.
///
/// The lane value.
/// The bitwise complement of the lane value.
static abstract TSelf operator ~(TSelf a);
///
/// Determines whether two instances of the type are equal component-wise.
///
/// The first value to compare.
/// The second value to compare.
/// All bits set where the elements are equal; otherwise, all bits cleared.
static abstract TSelf operator ==(TSelf a, TSelf b);
///
/// Determines whether two instances of the type are not equal component-wise.
///
/// The first value to compare.
/// The second value to compare.
/// All bits set where the elements are not equal; otherwise, all bits cleared.
static abstract TSelf operator !=(TSelf a, TSelf b);
///
/// Determines whether one instance of the type is greater than another instance component-wise.
///
/// The first value to compare.
/// The second value to compare.
/// A value indicating whether the first parameter is greater than the second parameter.
static abstract TSelf operator >(TSelf a, TSelf b);
///
/// Determines whether the first operand is greater than or equal to the second operand component-wise.
///
/// The first value to compare.
/// The second value to compare.
/// All bits set where the first parameter is greater than or equal to the second parameter; otherwise, all bits cleared.
static abstract TSelf operator >=(TSelf a, TSelf b);
///
/// Determines whether one instance of the type is less than another instance component-wise.
///
/// The first value to compare.
/// The second value to compare.
/// All bits set where the first parameter is less than the second parameter; otherwise, all bits cleared.
static abstract TSelf operator <(TSelf a, TSelf b);
///
/// Determines whether the first operand is less than or equal to the second operand component-wise.
///
/// The first value to compare.
/// The second value to compare.
/// All bits set where the first parameter is less than or equal to the second parameter; otherwise, all bits cleared.
static abstract TSelf operator <=(TSelf a, TSelf b);
///
/// Implicitly converts a scalar numeric value to a lane value where all lanes are set to that value.
///
/// The scalar numeric value to convert.
static abstract implicit operator TSelf(TNumber value);
///
/// Computes the absolute value of the lane value element-wise.
///
/// The lane value.
/// The absolute lane value.
static abstract TSelf Abs(TSelf value);
///
/// Computes the floor of the lane value element-wise.
///
/// The lane value.
/// The lane value with each element rounded toward negative infinity.
static abstract TSelf Floor(TSelf value);
///
/// Computes the fractional part of the lane value element-wise.
///
/// The lane value.
/// The fractional lane value.
static abstract TSelf Frac(TSelf value);
///
/// Computes the square root of the lane value element-wise.
///
/// The lane value.
/// The square root lane value.
static abstract TSelf Sqrt(TSelf value);
///
/// Performs linear interpolation between two lane values.
///
/// The start lane value.
/// The end lane value.
/// The interpolation factor.
/// The interpolated lane value.
static abstract TSelf Lerp(TSelf a, TSelf b, TSelf t);
///
/// Computes a * b + c element-wise.
///
/// The first multiplier.
/// The second multiplier.
/// The addend.
/// The result of the fused multiply-add operation.
///
/// Float and double implementations should use fused multiply-add instructions when available for both accuracy and performance.
///
static abstract TSelf MultipleAdd(TSelf a, TSelf b, TSelf c);
///
/// Returns the minimum of the two lane values element-wise.
///
/// The first lane value.
/// The second lane value.
/// The lane value containing the minimum of each element.
static abstract TSelf Min(TSelf a, TSelf b);
///
/// Returns the maximum of the two lane values element-wise.
///
/// The first lane value.
/// The second lane value.
/// The lane value containing the maximum of each element.
static abstract TSelf Max(TSelf a, TSelf b);
///
/// Clamps each element of the lane value between the specified minimum and maximum values.
///
/// The lane value to clamp.
/// The inclusive minimum.
/// The inclusive maximum.
/// The clamped lane value.
static abstract TSelf Clamp(TSelf value, TSelf min, TSelf max);
///
/// Saturates each element in the lane value to the 0..1 range.
///
/// The lane value to saturate.
/// The saturated lane value.
static abstract TSelf Saturate(TSelf value);
///
/// Computes the sine of each lane element.
///
/// The source lane value.
/// The sine of each lane element.
///
/// Implementations may rely on vectorized math intrinsics for float/double and approximate values for other types.
///
static abstract TSelf Sin(TSelf value);
///
/// Computes the cosine of each lane element.
///
/// The source lane value.
/// The cosine of each lane element.
///
/// Implementations may rely on vectorized math intrinsics for float/double and approximate values for other types.
///
static abstract TSelf Cos(TSelf value);
///
/// Computes both sine and cosine of each lane element.
///
/// The source lane value.
/// A tuple containing sine and cosine lane values.
///
/// Implementations returning both sin and cos simultaneously can reuse intermediate values for better performance.
///
static abstract (TSelf sin, TSelf cos) SinCos(TSelf value);
///
/// Computes the tangent of each lane element.
///
/// The source lane value.
/// The tangent of each lane element.
///
/// Many implementations use polynomial approximations and assume the input is reduced to [-pi/4, pi/4] for accuracy.
///
static abstract TSelf Tan(TSelf value);
///
/// Computes the arcsine of each lane element.
///
/// The source lane value.
/// The arcsine of each lane element.
///
/// Implementations typically assume input is within [-1, 1] and may use polynomial approximations for performance.
///
static abstract TSelf Asin(TSelf value);
///
/// Computes the arccosine of each lane element.
///
/// The source lane value.
/// The arccosine of each lane element.
///
/// Input is expected to be in [-1, 1]; implementations often rely on approximation polynomials combined with range reduction.
///
static abstract TSelf Acos(TSelf value);
///
/// Computes the arctangent of each lane element.
///
/// The source lane value.
/// The arctangent of each lane element.
///
/// Polynomial approximations with restricted input ranges are commonly used for performance-sensitive implementations.
///
static abstract TSelf Atan(TSelf value);
///
/// Computes the arctangent of y/x for each lane element.
///
/// The numerator lane value.
/// The denominator lane value.
/// The arctangent of each lane pair.
///
/// Implementations often rely on quadrant-aware polynomial routines and assume inputs are finite to avoid NaNs.
///
static abstract TSelf Atan2(TSelf y, TSelf x);
///
/// Raises each lane element to the specified power.
///
/// The base lane value.
/// The exponent lane value. Cannot be negative.
/// The power result for each lane.
static abstract TSelf Pow(TSelf x, TSelf y);
///
/// Computes the exponential of each lane element.
///
/// The source lane value.
/// The exponential of each lane element.
///
/// Float and double implementations typically call into vectorized exp intrinsics; other types may fall back to scalar paths.
///
static abstract TSelf Exp(TSelf value);
///
/// Computes 2 raised to each lane element.
///
/// The source lane value.
/// The base-2 exponential of each lane element.
///
/// This can be implemented via when no dedicated base-2 intrinsic exists.
///
static abstract TSelf Exp2(TSelf value);
///
/// Computes the natural logarithm of each lane element.
///
/// The source lane value.
/// The natural logarithm of each lane element.
///
/// Vectorized logarithm instructions may only exist for floating-point types; other types should mimic the scalar behavior.
///
static abstract TSelf Log(TSelf value);
///
/// Computes the base-2 logarithm of each lane element.
///
/// The source lane value.
/// The base-2 logarithm of each lane element.
///
/// If a dedicated base-2 intrinsic is unavailable, the implementation may compute Log(value)/Log(2).
///
static abstract TSelf Log2(TSelf value);
///
/// Computes the ceiling of each lane element.
///
/// The lane value.
/// The smallest integral value greater than or equal to each element.
///
/// Implementations should use helpers for floating-point types when available.
///
static abstract TSelf Ceil(TSelf value);
///
/// Rounds each lane element to the nearest integer value.
///
/// The lane value.
/// The rounded lane value.
///
/// Implementations should prefer vectorized round intrinsics for floating-point implementations.
///
static abstract TSelf Round(TSelf value);
///
/// Truncates each lane element toward zero.
///
/// The lane value.
/// The truncated lane value.
///
/// Floating-point truncation typically maps to .
///
static abstract TSelf Trunc(TSelf value);
///
/// Returns the sign of each lane element.
///
/// The lane value.
/// -1, 0, or 1 per lane.
static abstract TSelf Sign(TSelf value);
///
/// Copies the sign of the second lane value to the magnitude of the first.
///
/// The magnitude lane value.
/// The sign lane value.
/// The result of merging magnitude with sign.
static abstract TSelf CopySign(TSelf magnitude, TSelf sign);
///
/// Computes the reciprocal of each lane element.
///
/// The lane value.
/// The reciprocal lane value.
///
/// Fast paths may use Sse.Reciprocal or Avx.Reciprocal when TNumber is float.
///
static abstract TSelf Rcp(TSelf value);
///
/// Computes the reciprocal square root of each lane element.
///
/// The lane value.
/// The reciprocal square root lane value.
///
/// Float implementations may prefer hardware reciprocal-sqrt intrinsics and fallback to Create(TNumber.One)/Sqrt(x) otherwise.
///
static abstract TSelf Rsqrt(TSelf value);
///
/// Selects values from two lane values based on a condition mask.
///
/// The condition mask.
/// The value to select if true.
/// The value to select if false.
/// The selected lane value.
static abstract TSelf Select(TSelf conditionMask, TSelf ifTrue, TSelf ifFalse);
///
/// Compares two lane values for greater than element-wise.
///
/// The first lane value.
/// The second lane value.
/// The mask representing the greater than comparison result.
static abstract TSelf GreaterThan(TSelf a, TSelf b);
///
/// Compares two lane values for greater than or equal element-wise.
///
/// The first lane value.
/// The second lane value.
/// The mask representing the greater than or equal comparison result.
static abstract TSelf GreaterThanOrEqual(TSelf a, TSelf b);
///
/// Compares two lane values for less than element-wise.
///
/// The first lane value.
/// The second lane value.
/// The mask representing the less than comparison result.
static abstract TSelf LessThan(TSelf a, TSelf b);
///
/// Compares two lane values for less than or equal element-wise.
///
/// The first lane value.
/// The second lane value.
/// The mask representing the less than or equal comparison result.
static abstract TSelf LessThanOrEqual(TSelf a, TSelf b);
///
/// Compares two lane values for equality element-wise.
///
/// The first lane value.
/// The second lane value.
/// The mask representing the equality comparison result.
static abstract TSelf Equal(TSelf a, TSelf b);
///
/// Checks if any lane in the mask is true.
///
/// The mask to check.
/// True if any lane is true; otherwise, false.
static abstract bool Any(TSelf mask);
///
/// Checks if all lanes in the mask are true.
///
/// The mask to check.
/// True if all lanes are true; otherwise, false.
static abstract bool All(TSelf mask);
///
/// Checks if no lanes in the mask are true.
///
/// The mask to check.
/// True if no lanes are true; otherwise, false.
static abstract bool None(TSelf mask);
}