SPMD SIMD math library & lock-free job system integration
- Add new SPMD SIMD math project with scalar/vector lanes - Integrate SPMD jobs and scheduling into job system - Implement lock-free job dependency management - Update math functions for .NET 10 and SIMD performance - Add SPMD benchmarks, compress-store tests, and race tests - Introduce generic Result<T> error handling utilities - Solution/project file updates and code cleanup
This commit is contained in:
@@ -1,4 +1,4 @@
|
|||||||
namespace Misaki.HighPerformance.Jobs;
|
namespace Misaki.HighPerformance.Jobs;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Represents a job that performs a single unit of work.
|
/// Represents a job that performs a single unit of work.
|
||||||
@@ -24,3 +24,24 @@ public interface IJobParallelFor
|
|||||||
/// <param name="threadIndex">The index of the thread executing the job, useful for thread-specific operations.</param>
|
/// <param name="threadIndex">The index of the thread executing the job, useful for thread-specific operations.</param>
|
||||||
void Execute(int loopIndex, int threadIndex);
|
void Execute(int loopIndex, int threadIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static class IJobExtensions
|
||||||
|
{
|
||||||
|
public static void Run<T>(this ref T job, int threadIndex)
|
||||||
|
where T : unmanaged, IJob
|
||||||
|
{
|
||||||
|
job.Execute(threadIndex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class IJobParallelForExtensions
|
||||||
|
{
|
||||||
|
public static void Run<T>(this ref T job, int totalIterations, int threadIndex)
|
||||||
|
where T : unmanaged, IJobParallelFor
|
||||||
|
{
|
||||||
|
for (var i = 0; i < totalIterations; i++)
|
||||||
|
{
|
||||||
|
job.Execute(i, threadIndex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,18 +1,21 @@
|
|||||||
namespace Misaki.HighPerformance.Jobs;
|
namespace Misaki.HighPerformance.Jobs;
|
||||||
|
|
||||||
public readonly struct JobHandle : IEquatable<JobHandle>
|
public readonly struct JobHandle : IEquatable<JobHandle>
|
||||||
{
|
{
|
||||||
internal readonly int _id;
|
private readonly int _id;
|
||||||
internal readonly int _generation;
|
private readonly int _generation;
|
||||||
|
|
||||||
public static JobHandle Invalid => new(-1, -1);
|
public int ID => _id - 1;
|
||||||
|
public int Generation => _generation - 1;
|
||||||
|
|
||||||
|
public static JobHandle Invalid => default;
|
||||||
|
|
||||||
public bool IsValid => this != Invalid;
|
public bool IsValid => this != Invalid;
|
||||||
|
|
||||||
internal JobHandle(int id, int generation)
|
internal JobHandle(int id, int generation)
|
||||||
{
|
{
|
||||||
_id = id;
|
_id = id + 1;
|
||||||
_generation = generation;
|
_generation = generation + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
public bool Equals(JobHandle other)
|
public bool Equals(JobHandle other)
|
||||||
|
|||||||
@@ -87,7 +87,7 @@ public interface IJobScheduler
|
|||||||
/// <param name="threadIndex">The index of the thread that will execute the job. This is used to assign thread-specific data. Use -1 to allow any thread to execute the job.</param>
|
/// <param name="threadIndex">The index of the thread that will execute the job. This is used to assign thread-specific data. Use -1 to allow any thread to execute the job.</param>
|
||||||
/// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job.
|
/// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job.
|
||||||
/// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns>
|
/// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns>
|
||||||
public JobHandle ScheduleParallel<T>(ref T job, int totalIteration, int batchSize, int threadIndex)
|
JobHandle ScheduleParallel<T>(ref T job, int totalIteration, int batchSize, int threadIndex)
|
||||||
where T : unmanaged, IJobParallelFor;
|
where T : unmanaged, IJobParallelFor;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@@ -100,7 +100,7 @@ public interface IJobScheduler
|
|||||||
/// <param name="threadIndex">The index of the thread that will execute the job. This is used to assign thread-specific data. Use -1 to allow any thread to execute the job.</param>
|
/// <param name="threadIndex">The index of the thread that will execute the job. This is used to assign thread-specific data. Use -1 to allow any thread to execute the job.</param>
|
||||||
/// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job.
|
/// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job.
|
||||||
/// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns>
|
/// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns>
|
||||||
public JobHandle ScheduleParallel<T>(ref T job, int totalIteration, int batchSize, JobHandle dependency)
|
JobHandle ScheduleParallel<T>(ref T job, int totalIteration, int batchSize, JobHandle dependency)
|
||||||
where T : unmanaged, IJobParallelFor;
|
where T : unmanaged, IJobParallelFor;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@@ -113,7 +113,7 @@ public interface IJobScheduler
|
|||||||
/// <param name="threadIndex">The index of the thread that will execute the job. This is used to assign thread-specific data. Use -1 to allow any thread to execute the job.</param>
|
/// <param name="threadIndex">The index of the thread that will execute the job. This is used to assign thread-specific data. Use -1 to allow any thread to execute the job.</param>
|
||||||
/// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job.
|
/// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job.
|
||||||
/// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns>
|
/// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns>
|
||||||
public JobHandle ScheduleParallel<T>(ref T job, int totalIteration, int batchSize)
|
JobHandle ScheduleParallel<T>(ref T job, int totalIteration, int batchSize)
|
||||||
where T : unmanaged, IJobParallelFor;
|
where T : unmanaged, IJobParallelFor;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@@ -122,7 +122,7 @@ public interface IJobScheduler
|
|||||||
/// <param name="dependencies">A collection of <see cref="JobHandle"/> instances representing the dependencies to combine.</param>
|
/// <param name="dependencies">A collection of <see cref="JobHandle"/> instances representing the dependencies to combine.</param>
|
||||||
/// <returns>A <see cref="JobHandle"/> that represents the combined dependencies. The returned handle can be used to ensure
|
/// <returns>A <see cref="JobHandle"/> that represents the combined dependencies. The returned handle can be used to ensure
|
||||||
/// that all specified dependencies are completed before proceeding.</returns>
|
/// that all specified dependencies are completed before proceeding.</returns>
|
||||||
public JobHandle CombineDependencies(params ReadOnlySpan<JobHandle> dependencies);
|
JobHandle CombineDependencies(params ReadOnlySpan<JobHandle> dependencies);
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Retrieves the current status of a job identified by the specified handle.
|
/// Retrieves the current status of a job identified by the specified handle.
|
||||||
@@ -130,13 +130,13 @@ public interface IJobScheduler
|
|||||||
/// <param name="handle">The handle representing the job whose status is to be retrieved. The handle must be valid.</param>
|
/// <param name="handle">The handle representing the job whose status is to be retrieved. The handle must be valid.</param>
|
||||||
/// <returns>The current status of the job as a <see cref="JobState"/> value.
|
/// <returns>The current status of the job as a <see cref="JobState"/> value.
|
||||||
/// Returns <see cref="JobState.Invalid"/> if the handle is invalid or the job does not exist.</returns>
|
/// Returns <see cref="JobState.Invalid"/> if the handle is invalid or the job does not exist.</returns>
|
||||||
public JobState GetJobStatus(JobHandle handle);
|
JobState GetJobStatus(JobHandle handle);
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Blocks the calling thread until the specified job is completed.
|
/// Blocks the calling thread until the specified job is completed.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="handle">The handle of the job to wait for.</param>
|
/// <param name="handle">The handle of the job to wait for.</param>
|
||||||
public void WaitComplete(JobHandle handle);
|
void WaitComplete(JobHandle handle);
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Blocks the calling thread until all specified job handles have completed.
|
/// Blocks the calling thread until all specified job handles have completed.
|
||||||
@@ -147,7 +147,7 @@ public interface IJobScheduler
|
|||||||
/// concurrently from multiple threads.</remarks>
|
/// concurrently from multiple threads.</remarks>
|
||||||
/// <param name="handles">A collection of job handles to wait for. Each handle represents an asynchronous job whose completion is awaited.
|
/// <param name="handles">A collection of job handles to wait for. Each handle represents an asynchronous job whose completion is awaited.
|
||||||
/// The collection must not be empty.</param>
|
/// The collection must not be empty.</param>
|
||||||
public void WaitAll(params ReadOnlySpan<JobHandle> handles);
|
void WaitAll(params ReadOnlySpan<JobHandle> handles);
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Waits until any of the specified job handles has completed and returns the first completed handle.
|
/// Waits until any of the specified job handles has completed and returns the first completed handle.
|
||||||
@@ -158,12 +158,14 @@ public interface IJobScheduler
|
|||||||
/// <param name="handles">A read-only span containing the job handles to monitor for completion. Each handle represents a job whose
|
/// <param name="handles">A read-only span containing the job handles to monitor for completion. Each handle represents a job whose
|
||||||
/// completion status will be checked.</param>
|
/// completion status will be checked.</param>
|
||||||
/// <returns>The first job handle from the provided collection that has completed.</returns>
|
/// <returns>The first job handle from the provided collection that has completed.</returns>
|
||||||
public JobHandle WaitAny(params ReadOnlySpan<JobHandle> handles);
|
JobHandle WaitAny(params ReadOnlySpan<JobHandle> handles);
|
||||||
}
|
}
|
||||||
|
|
||||||
public unsafe partial class JobScheduler
|
public unsafe partial class JobScheduler
|
||||||
{
|
{
|
||||||
public static readonly TempJobAllocator* pTempAllocator;
|
public static int MainThreadIndex => -1;
|
||||||
|
|
||||||
|
public static TempJobAllocator* pTempAllocator;
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Gets the allocation handle for the temporary job allocator.
|
/// Gets the allocation handle for the temporary job allocator.
|
||||||
@@ -173,7 +175,7 @@ public unsafe partial class JobScheduler
|
|||||||
/// </remarks>
|
/// </remarks>
|
||||||
public static AllocationHandle TempAllocatorHandle => pTempAllocator->Handle;
|
public static AllocationHandle TempAllocatorHandle => pTempAllocator->Handle;
|
||||||
|
|
||||||
static JobScheduler()
|
public static void InitTempAllocator()
|
||||||
{
|
{
|
||||||
pTempAllocator = (TempJobAllocator*)MemoryUtility.Malloc((nuint)sizeof(TempJobAllocator));
|
pTempAllocator = (TempJobAllocator*)MemoryUtility.Malloc((nuint)sizeof(TempJobAllocator));
|
||||||
pTempAllocator->Init();
|
pTempAllocator->Init();
|
||||||
@@ -194,14 +196,18 @@ public unsafe partial class JobScheduler
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
|
public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
|
||||||
{
|
{
|
||||||
private const int _SLEEP_THRESHOLD = 100;
|
// Don't sleep indefinitely because that causes our 1ms job to become 15ms.
|
||||||
|
private const int _SLEEP_THRESHOLD = -1;
|
||||||
|
|
||||||
|
// Lock-Free constants: State mask (low 16 bits) and RC unit (1 << 16)
|
||||||
|
private const int _STATE_MASK = 0xFFFF;
|
||||||
|
private const int _RC_ONE = 0x10000;
|
||||||
|
|
||||||
private FreeList _jobDataAllocator;
|
private FreeList _jobDataAllocator;
|
||||||
private readonly ConcurrentSlotMap<JobInfo> _jobInfoPool;
|
private readonly ConcurrentSlotMap<JobInfo> _jobInfoPool;
|
||||||
private readonly ConcurrentQueue<JobHandle> _jobQueue;
|
private readonly ConcurrentQueue<JobHandle> _jobQueue;
|
||||||
private readonly WorkerThread[] _workerThreads;
|
private readonly WorkerThread[] _workerThreads;
|
||||||
|
|
||||||
private readonly Lock _lock;
|
|
||||||
private readonly SemaphoreSlim _workSignal;
|
private readonly SemaphoreSlim _workSignal;
|
||||||
private readonly CancellationTokenSource _cts;
|
private readonly CancellationTokenSource _cts;
|
||||||
|
|
||||||
@@ -221,7 +227,6 @@ public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
|
|||||||
_jobInfoPool = new();
|
_jobInfoPool = new();
|
||||||
_jobQueue = new();
|
_jobQueue = new();
|
||||||
|
|
||||||
_lock = new();
|
|
||||||
_workSignal = new(0);
|
_workSignal = new(0);
|
||||||
_cts = new();
|
_cts = new();
|
||||||
|
|
||||||
@@ -246,10 +251,11 @@ public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
|
|||||||
|
|
||||||
private void EnqueueJobIfReady(JobHandle handle)
|
private void EnqueueJobIfReady(JobHandle handle)
|
||||||
{
|
{
|
||||||
ref var jobInfo = ref _jobInfoPool.GetElementReferenceAt(handle._id, handle._generation, out var exist);
|
ref var jobInfo = ref _jobInfoPool.GetElementReferenceAt(handle.ID, handle.Generation, out var exist);
|
||||||
|
|
||||||
if (exist && Volatile.Read(ref jobInfo.dependencyCount) == 0)
|
if (exist && Volatile.Read(ref jobInfo.dependencyCount) == 0)
|
||||||
{
|
{
|
||||||
|
// Note: JobState.Created is 0, JobState.Scheduled is 1. We assume RC logic doesn't touch initial state (RC=0).
|
||||||
if (Interlocked.CompareExchange(ref jobInfo.state, JobState.Scheduled, JobState.Created) != JobState.Created)
|
if (Interlocked.CompareExchange(ref jobInfo.state, JobState.Scheduled, JobState.Created) != JobState.Created)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
@@ -293,29 +299,75 @@ public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
lock (_lock)
|
ref var depJobInfo = ref _jobInfoPool.GetElementReferenceAt(dependency.ID, dependency.Generation, out var exist);
|
||||||
{
|
if (!exist)
|
||||||
ref var depJobInfo = ref _jobInfoPool.GetElementReferenceAt(dependency._id, dependency._generation, out var exist);
|
|
||||||
if (!exist || Volatile.Read(ref Unsafe.As<JobState, int>(ref depJobInfo.state)) == (int)JobState.Completed)
|
|
||||||
{
|
{
|
||||||
|
// Dependency does not exist (likely completed already)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (depJobInfo.dependentCount >= JobInfo.MAX_DEPENDENTS)
|
// Lock-free registration: Try to acquire "Reader Lock" by incrementing RC in high bits.
|
||||||
|
// If state is already Completed, we skip (dependency met).
|
||||||
|
var registered = false;
|
||||||
|
var completed = false;
|
||||||
|
var spin = new SpinWait();
|
||||||
|
|
||||||
|
while (true)
|
||||||
{
|
{
|
||||||
// Too many dependents
|
var stateVal = Volatile.Read(ref Unsafe.As<JobState, int>(ref depJobInfo.state));
|
||||||
// TODO: Handle this case properly
|
var state = (JobState)(stateVal & _STATE_MASK);
|
||||||
|
|
||||||
|
if (state == JobState.Completed)
|
||||||
|
{
|
||||||
|
completed = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Attempt to increment RC (Reader Count)
|
||||||
|
if (Interlocked.CompareExchange(ref Unsafe.As<JobState, int>(ref depJobInfo.state), stateVal + _RC_ONE, stateVal) == stateVal)
|
||||||
|
{
|
||||||
|
// RC acquired. We are safe from "Remove" and state change.
|
||||||
|
var count = Interlocked.Increment(ref depJobInfo.dependentCount);
|
||||||
|
if (count <= JobInfo.MAX_DEPENDENTS)
|
||||||
|
{
|
||||||
|
// Safely write to the fixed buffer
|
||||||
|
depJobInfo.dependentsID[count - 1] = id;
|
||||||
|
depJobInfo.dependentsGeneration[count - 1] = generation;
|
||||||
|
registered = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Release RC
|
||||||
|
Interlocked.Add(ref Unsafe.As<JobState, int>(ref depJobInfo.state), -_RC_ONE);
|
||||||
|
|
||||||
|
if (!registered)
|
||||||
|
{
|
||||||
|
// Failed to register because MAX_DEPENDENTS reached.
|
||||||
|
// Backtrack the counter increment.
|
||||||
|
Interlocked.Decrement(ref depJobInfo.dependentCount);
|
||||||
|
|
||||||
|
// Cleanup and fail
|
||||||
_jobDataAllocator.Free(jobInfo.pJobData);
|
_jobDataAllocator.Free(jobInfo.pJobData);
|
||||||
return JobHandle.Invalid;
|
return JobHandle.Invalid;
|
||||||
}
|
}
|
||||||
|
|
||||||
depJobInfo.dependentsID[depJobInfo.dependentCount] = id;
|
break;
|
||||||
depJobInfo.dependentsGeneration[depJobInfo.dependentCount] = generation;
|
|
||||||
depJobInfo.dependentCount++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
spin.SpinOnce(-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!registered && !completed)
|
||||||
|
{
|
||||||
|
// Should not happen if logic is correct, unless loop logic changed
|
||||||
Interlocked.Increment(ref infoInPool.dependencyCount);
|
Interlocked.Increment(ref infoInPool.dependencyCount);
|
||||||
}
|
}
|
||||||
|
else if (registered)
|
||||||
|
{
|
||||||
|
// Successfully added dependency
|
||||||
|
Interlocked.Increment(ref infoInPool.dependencyCount);
|
||||||
|
}
|
||||||
|
// else: completed is true, registered is false -> Dependency is already done, so we don't increment our dependencyCount.
|
||||||
|
}
|
||||||
|
|
||||||
EnqueueJobIfReady(handle);
|
EnqueueJobIfReady(handle);
|
||||||
|
|
||||||
@@ -325,7 +377,20 @@ public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
internal bool HasWork()
|
internal bool HasWork()
|
||||||
{
|
{
|
||||||
return !_jobQueue.IsEmpty || _workerThreads.Any(w => !w.LocalQueue.IsEmpty);
|
if (!_jobQueue.IsEmpty)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (var i = 0; i < _workerThreads.Length; i++)
|
||||||
|
{
|
||||||
|
if (!_workerThreads[i].LocalQueue.IsEmpty)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
@@ -360,7 +425,7 @@ public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
|
|||||||
return ref Unsafe.NullRef<JobInfo>();
|
return ref Unsafe.NullRef<JobInfo>();
|
||||||
}
|
}
|
||||||
|
|
||||||
return ref _jobInfoPool.GetElementReferenceAt(handle._id, handle._generation, out exist);
|
return ref _jobInfoPool.GetElementReferenceAt(handle.ID, handle.Generation, out exist);
|
||||||
}
|
}
|
||||||
|
|
||||||
internal void MarkJobComplete(JobHandle handle)
|
internal void MarkJobComplete(JobHandle handle)
|
||||||
@@ -370,37 +435,77 @@ public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
ref var info = ref _jobInfoPool.GetElementReferenceAt(handle._id, handle._generation, out var exist);
|
ref var info = ref _jobInfoPool.GetElementReferenceAt(handle.ID, handle.Generation, out var exist);
|
||||||
if (!exist)
|
if (!exist)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Interlocked.CompareExchange(ref info.state, JobState.Completed, JobState.Running) != JobState.Running)
|
// Lock-free Completion:
|
||||||
|
// 1. Transition State to Completed (preserving or setting upper bits?).
|
||||||
|
// Actually, we want to block new Readers. Setting state to Completed blocks new Readers.
|
||||||
|
// 2. Wait for existing Readers (RC == 0).
|
||||||
|
var spin = new SpinWait();
|
||||||
|
while (true)
|
||||||
{
|
{
|
||||||
return;
|
var stateVal = Volatile.Read(ref Unsafe.As<JobState, int>(ref info.state));
|
||||||
|
var state = (JobState)(stateVal & _STATE_MASK);
|
||||||
|
|
||||||
|
if (state == JobState.Completed)
|
||||||
|
{
|
||||||
|
return; // Already completed (shouldn't happen for single-execution jobs)
|
||||||
}
|
}
|
||||||
|
|
||||||
var dependentsToNotify = stackalloc JobHandle[JobInfo.MAX_DEPENDENTS];
|
if (state != JobState.Running)
|
||||||
var dependentCount = 0;
|
|
||||||
|
|
||||||
lock (_lock)
|
|
||||||
{
|
{
|
||||||
dependentCount = info.dependentCount;
|
// If in valid state (e.g. Scheduled?), we still assume we can complete it.
|
||||||
|
// Usually it should be Running.
|
||||||
|
}
|
||||||
|
|
||||||
|
// Construct new value: State=Completed, preserve RC (temporarily) or strictly replace only low bits?
|
||||||
|
// We set low bits to Completed. High bits (RC) remain.
|
||||||
|
var newState = (stateVal & ~_STATE_MASK) | (int)JobState.Completed;
|
||||||
|
|
||||||
|
if (Interlocked.CompareExchange(ref Unsafe.As<JobState, int>(ref info.state), newState, stateVal) == stateVal)
|
||||||
|
{
|
||||||
|
// Successfully set State to Completed. New readers will see Completed and back off.
|
||||||
|
// Now we must wait for existing readers to finish (RC to become 0).
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
var current = Volatile.Read(ref Unsafe.As<JobState, int>(ref info.state));
|
||||||
|
if (((uint)current >> 16) == 0)
|
||||||
|
{
|
||||||
|
break; // RC is 0. Safe to proceed.
|
||||||
|
}
|
||||||
|
|
||||||
|
spin.SpinOnce(-1);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
spin.SpinOnce(-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// We now have exclusive access to dependentsID (no new readers, old readers finished).
|
||||||
|
// Safely capture dependents.
|
||||||
|
var dependentCount = info.dependentCount;
|
||||||
|
dependentCount = Math.Min(dependentCount, JobInfo.MAX_DEPENDENTS); // Safety cap
|
||||||
|
|
||||||
|
// Use stackalloc to avoid allocation, but we'll copy to notify after freeing parent.
|
||||||
|
var dependentsToNotify = stackalloc JobHandle[dependentCount];
|
||||||
for (var i = 0; i < dependentCount; i++)
|
for (var i = 0; i < dependentCount; i++)
|
||||||
{
|
{
|
||||||
dependentsToNotify[i] = new JobHandle(info.dependentsID[i], info.dependentsGeneration[i]);
|
dependentsToNotify[i] = new JobHandle(info.dependentsID[i], info.dependentsGeneration[i]);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
_jobDataAllocator.Free(info.pJobData);
|
_jobDataAllocator.Free(info.pJobData);
|
||||||
_jobInfoPool.Remove(handle._id, handle._generation);
|
_jobInfoPool.Remove(handle.ID, handle.Generation);
|
||||||
|
|
||||||
for (var i = 0; i < dependentCount; i++)
|
for (var i = 0; i < dependentCount; i++)
|
||||||
{
|
{
|
||||||
var depHandle = dependentsToNotify[i];
|
var depHandle = dependentsToNotify[i];
|
||||||
|
|
||||||
ref var depJobInfo = ref _jobInfoPool.GetElementReferenceAt(depHandle._id, depHandle._generation, out var depExist);
|
ref var depJobInfo = ref _jobInfoPool.GetElementReferenceAt(depHandle.ID, depHandle.Generation, out var depExist);
|
||||||
if (depExist && Interlocked.Decrement(ref depJobInfo.dependencyCount) == 0)
|
if (depExist && Interlocked.Decrement(ref depJobInfo.dependencyCount) == 0)
|
||||||
{
|
{
|
||||||
EnqueueJobIfReady(depHandle);
|
EnqueueJobIfReady(depHandle);
|
||||||
@@ -519,13 +624,14 @@ public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
|
|||||||
return JobState.Invalid;
|
return JobState.Invalid;
|
||||||
}
|
}
|
||||||
|
|
||||||
ref var jobInfo = ref _jobInfoPool.GetElementReferenceAt(handle._id, handle._generation, out var exist);
|
ref var jobInfo = ref _jobInfoPool.GetElementReferenceAt(handle.ID, handle.Generation, out var exist);
|
||||||
if (!exist)
|
if (!exist)
|
||||||
{
|
{
|
||||||
return JobState.Completed; // We assume completed if not found. Invalid state is reserved for error.
|
return JobState.Completed; // We assume completed if not found. Invalid state is reserved for error.
|
||||||
}
|
}
|
||||||
|
|
||||||
return (JobState)Volatile.Read(ref Unsafe.As<JobState, int>(ref jobInfo.state));
|
// Mask out the Reader Count (upper 16 bits) to return the actual State
|
||||||
|
return (JobState)(Volatile.Read(ref Unsafe.As<JobState, int>(ref jobInfo.state)) & _STATE_MASK);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void WaitComplete(JobHandle handle)
|
public void WaitComplete(JobHandle handle)
|
||||||
@@ -536,9 +642,10 @@ public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
|
|||||||
}
|
}
|
||||||
|
|
||||||
var spin = new SpinWait();
|
var spin = new SpinWait();
|
||||||
while (_jobInfoPool.TryGetElement(handle._id, handle._generation, out var jobInfo))
|
while (_jobInfoPool.TryGetElement(handle.ID, handle.Generation, out var jobInfo))
|
||||||
{
|
{
|
||||||
if (jobInfo.state == JobState.Completed)
|
// Mask out RC
|
||||||
|
if ((jobInfo.state & (JobState)_STATE_MASK) == JobState.Completed)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -549,7 +656,6 @@ public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
|
|||||||
|
|
||||||
public void WaitAll(params ReadOnlySpan<JobHandle> handles)
|
public void WaitAll(params ReadOnlySpan<JobHandle> handles)
|
||||||
{
|
{
|
||||||
var sleepThreshold = _SLEEP_THRESHOLD * handles.Length;
|
|
||||||
var spin = new SpinWait();
|
var spin = new SpinWait();
|
||||||
|
|
||||||
while (true)
|
while (true)
|
||||||
@@ -557,7 +663,7 @@ public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
|
|||||||
var completedCount = 0;
|
var completedCount = 0;
|
||||||
foreach (var handle in handles)
|
foreach (var handle in handles)
|
||||||
{
|
{
|
||||||
if (!_jobInfoPool.Contains(handle._id, handle._generation))
|
if (!_jobInfoPool.Contains(handle.ID, handle.Generation))
|
||||||
{
|
{
|
||||||
completedCount++;
|
completedCount++;
|
||||||
}
|
}
|
||||||
@@ -568,26 +674,25 @@ public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
spin.SpinOnce(sleepThreshold);
|
spin.SpinOnce(_SLEEP_THRESHOLD);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public JobHandle WaitAny(params ReadOnlySpan<JobHandle> handles)
|
public JobHandle WaitAny(params ReadOnlySpan<JobHandle> handles)
|
||||||
{
|
{
|
||||||
var sleepThreshold = _SLEEP_THRESHOLD * handles.Length;
|
|
||||||
var spin = new SpinWait();
|
var spin = new SpinWait();
|
||||||
|
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
foreach (var handle in handles)
|
foreach (var handle in handles)
|
||||||
{
|
{
|
||||||
if (!_jobInfoPool.Contains(handle._id, handle._generation))
|
if (!_jobInfoPool.Contains(handle.ID, handle.Generation))
|
||||||
{
|
{
|
||||||
return handle;
|
return handle;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
spin.SpinOnce(sleepThreshold);
|
spin.SpinOnce(_SLEEP_THRESHOLD);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,9 +1,11 @@
|
|||||||
using System.Collections.Concurrent;
|
using System.Collections.Concurrent;
|
||||||
|
|
||||||
namespace Misaki.HighPerformance.Jobs;
|
namespace Misaki.HighPerformance.Jobs;
|
||||||
|
|
||||||
internal class WorkerThread : IDisposable
|
internal class WorkerThread : IDisposable
|
||||||
{
|
{
|
||||||
|
private const int _MAX_STEAL_ATTEMPTS = 8;
|
||||||
|
|
||||||
private readonly int _index;
|
private readonly int _index;
|
||||||
private readonly Thread _thread;
|
private readonly Thread _thread;
|
||||||
private readonly ConcurrentQueue<JobHandle> _localQueue;
|
private readonly ConcurrentQueue<JobHandle> _localQueue;
|
||||||
@@ -29,54 +31,55 @@ internal class WorkerThread : IDisposable
|
|||||||
|
|
||||||
public void Start() => _thread.Start();
|
public void Start() => _thread.Start();
|
||||||
|
|
||||||
private JobHandle FindJob()
|
private bool TryFindJob(out JobHandle handle)
|
||||||
{
|
{
|
||||||
var handle = JobHandle.Invalid;
|
// 1. Check own local queue first
|
||||||
if (_localQueue.TryDequeue(out handle)
|
if (_localQueue.TryDequeue(out handle))
|
||||||
|| _scheduler.TryStealJob(-1, out handle))
|
|
||||||
{
|
{
|
||||||
return handle;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (true)
|
// 2. Check global queue
|
||||||
|
if (_scheduler.TryStealJob(-1, out handle))
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. Bounded random work stealing from other workers
|
||||||
|
for (var i = 0; i < _MAX_STEAL_ATTEMPTS; i++)
|
||||||
{
|
{
|
||||||
var randomIndex = _random.Next(0, _scheduler.WorkerCount);
|
var randomIndex = _random.Next(0, _scheduler.WorkerCount);
|
||||||
if (_scheduler.TryStealJob(randomIndex, out handle))
|
if (randomIndex != _index && _scheduler.TryStealJob(randomIndex, out handle))
|
||||||
{
|
{
|
||||||
return handle;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
handle = JobHandle.Invalid;
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
private unsafe void WorkLoop()
|
private unsafe void WorkLoop()
|
||||||
{
|
{
|
||||||
while (!_scheduler.IsCancellationRequested)
|
while (!_scheduler.IsCancellationRequested)
|
||||||
{
|
{
|
||||||
var spinner = new SpinWait();
|
// Wait for work signal directly — the semaphore already acts as
|
||||||
for (var i = 0; i < 25; i++)
|
// both a notification and a count of available work items.
|
||||||
{
|
|
||||||
spinner.SpinOnce(-1);
|
|
||||||
|
|
||||||
if (_scheduler.HasWork())
|
|
||||||
{
|
|
||||||
// Instead of goto, we still need to go through the WaitForWork to claim a release.
|
|
||||||
// This causes lock and lots of branches inside the SemaphoreSlim, which lost 0.03ms.
|
|
||||||
// goto DoWork;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
_scheduler.WaitForWork();
|
_scheduler.WaitForWork();
|
||||||
}
|
}
|
||||||
catch (OperationCanceledException)
|
catch (OperationCanceledException)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// After being signaled, try to find and execute a job.
|
||||||
|
if (!TryFindJob(out var handle))
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
//DoWork:
|
|
||||||
var handle = FindJob();
|
|
||||||
ref var jobInfo = ref _scheduler.GetJobInfoReference(handle, out var exist);
|
ref var jobInfo = ref _scheduler.GetJobInfoReference(handle, out var exist);
|
||||||
|
|
||||||
if (exist)
|
if (exist)
|
||||||
|
|||||||
80
Misaki.HighPerformance.Mathematics.SPMD/IJobSPMD.cs
Normal file
80
Misaki.HighPerformance.Mathematics.SPMD/IJobSPMD.cs
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
using Misaki.HighPerformance.Jobs;
|
||||||
|
using System.Numerics;
|
||||||
|
using System.Runtime.CompilerServices;
|
||||||
|
|
||||||
|
namespace Misaki.HighPerformance.Mathematics.SPMD;
|
||||||
|
|
||||||
|
public interface IJobSPMD<TNumber>
|
||||||
|
where TNumber : unmanaged, INumber<TNumber>, IMinMaxValue<TNumber>, IBitwiseOperators<TNumber, TNumber, TNumber>
|
||||||
|
{
|
||||||
|
void Execute<TLane>(int baseIndex, int threadIndex)
|
||||||
|
where TLane : ISPMD<TLane, TNumber>;
|
||||||
|
}
|
||||||
|
|
||||||
|
internal struct SPMDJobWrapper<T, TNumber> : IJobParallelFor
|
||||||
|
where T : unmanaged, IJobSPMD<TNumber>
|
||||||
|
where TNumber : unmanaged, INumber<TNumber>, IMinMaxValue<TNumber>, IBitwiseOperators<TNumber, TNumber, TNumber>
|
||||||
|
{
|
||||||
|
public T innerJob;
|
||||||
|
public int totalCount;
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public void Execute(int loopIndex, int threadIndex)
|
||||||
|
{
|
||||||
|
var baseIndex = loopIndex * WideLane<TNumber>.LaneWidth;
|
||||||
|
var remaining = totalCount - baseIndex;
|
||||||
|
|
||||||
|
if (remaining >= WideLane<TNumber>.LaneWidth)
|
||||||
|
{
|
||||||
|
innerJob.Execute<WideLane<TNumber>>(baseIndex, threadIndex);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (var i = 0; i < remaining; i++)
|
||||||
|
{
|
||||||
|
innerJob.Execute<ScalarLane<TNumber>>(baseIndex + i, threadIndex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class IJobParallelForSPMDExtensions
|
||||||
|
{
|
||||||
|
public static void Run<T, TNumber>(this ref T job, int totalCount, int threadIndex)
|
||||||
|
where T : unmanaged, IJobSPMD<TNumber>
|
||||||
|
where TNumber : unmanaged, INumber<TNumber>, IMinMaxValue<TNumber>, IBitwiseOperators<TNumber, TNumber, TNumber>
|
||||||
|
{
|
||||||
|
var iterations = (totalCount + WideLane<TNumber>.LaneWidth - 1) / WideLane<TNumber>.LaneWidth;
|
||||||
|
for (var loopIndex = 0; loopIndex < iterations; loopIndex++)
|
||||||
|
{
|
||||||
|
var baseIndex = loopIndex * WideLane<TNumber>.LaneWidth;
|
||||||
|
var remaining = totalCount - baseIndex;
|
||||||
|
|
||||||
|
if (remaining >= WideLane<TNumber>.LaneWidth)
|
||||||
|
{
|
||||||
|
job.Execute<WideLane<TNumber>>(baseIndex, threadIndex);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (var i = 0; i < remaining; i++)
|
||||||
|
{
|
||||||
|
job.Execute<ScalarLane<TNumber>>(baseIndex + i, threadIndex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static JobHandle ScheduleParallelSPDM<T, TNumber>(this JobScheduler jobScheduler, ref T job, int totalCount, int batchSize, int threadIndex, JobHandle dependency)
|
||||||
|
where T : unmanaged, IJobSPMD<TNumber>
|
||||||
|
where TNumber : unmanaged, INumber<TNumber>, IMinMaxValue<TNumber>, IBitwiseOperators<TNumber, TNumber, TNumber>
|
||||||
|
{
|
||||||
|
var warper = new SPMDJobWrapper<T, TNumber>
|
||||||
|
{
|
||||||
|
innerJob = job,
|
||||||
|
totalCount = totalCount,
|
||||||
|
};
|
||||||
|
|
||||||
|
var iterations = (totalCount + WideLane<TNumber>.LaneWidth - 1) / WideLane<TNumber>.LaneWidth;
|
||||||
|
return jobScheduler.ScheduleParallel(ref warper, iterations, batchSize, threadIndex, dependency);
|
||||||
|
}
|
||||||
|
}
|
||||||
589
Misaki.HighPerformance.Mathematics.SPMD/ISPMD.cs
Normal file
589
Misaki.HighPerformance.Mathematics.SPMD/ISPMD.cs
Normal file
@@ -0,0 +1,589 @@
|
|||||||
|
using System.Numerics;
|
||||||
|
using System.Runtime.CompilerServices;
|
||||||
|
using static System.Runtime.InteropServices.JavaScript.JSType;
|
||||||
|
|
||||||
|
namespace Misaki.HighPerformance.Mathematics.SPMD;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Common marker interface for SPMD lane types.
|
||||||
|
/// </summary>
|
||||||
|
public interface ISPMD
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// Gets the number of lanes (vector width) for the SPMD implementation.
|
||||||
|
/// </summary>
|
||||||
|
static abstract int LaneWidth
|
||||||
|
{
|
||||||
|
get;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Represents a single-lane or multi-lane (vectorized) SPMD value and the operations supported on it.
|
||||||
|
/// </summary>
|
||||||
|
/// <typeparam name="TSelf">The concrete SPMD lane type implementing this interface.</typeparam>
|
||||||
|
/// <typeparam name="TNumber">The underlying numeric element type.</typeparam>
|
||||||
|
public interface ISPMD<TSelf, TNumber> : ISPMD
|
||||||
|
where TSelf : ISPMD<TSelf, TNumber>
|
||||||
|
where TNumber : unmanaged, INumber<TNumber>, IMinMaxValue<TNumber>, IBitwiseOperators<TNumber, TNumber, TNumber>
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// Gets a lane value where all lanes are set to numeric zero.
|
||||||
|
/// </summary>
|
||||||
|
static abstract TSelf Zero
|
||||||
|
{
|
||||||
|
get;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Gets a lane value where all lanes are set to numeric one.
|
||||||
|
/// </summary>
|
||||||
|
static abstract TSelf One
|
||||||
|
{
|
||||||
|
get;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Gets a lane value where all lanes are set to the minimum representable value of the underlying numeric type.
|
||||||
|
/// </summary>
|
||||||
|
static abstract TSelf MinValue
|
||||||
|
{
|
||||||
|
get;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Gets a lane value where all lanes are set to the maximum representable value of the underlying numeric type.
|
||||||
|
/// </summary>
|
||||||
|
static abstract TSelf MaxValue
|
||||||
|
{
|
||||||
|
get;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Gets the element value for the specified lane index.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="index">The zero-based lane index.</param>
|
||||||
|
TNumber this[int index]
|
||||||
|
{
|
||||||
|
get;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Creates a lane value where all lanes are set to the specified value.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="value">The value to set for all lanes.</param>
|
||||||
|
/// <returns>The created lane value.</returns>
|
||||||
|
static abstract TSelf Create(TNumber value);
|
||||||
|
/// <summary>
|
||||||
|
/// Creates a new instance of the type from the specified sequence of numeric values.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="values">A parameter array of read-only spans containing the numeric values to use for initialization.</param>
|
||||||
|
/// <returns>A new instance of the type initialized with the provided numeric values.</returns>
|
||||||
|
static abstract TSelf Create(params ReadOnlySpan<TNumber> values);
|
||||||
|
/// <summary>
|
||||||
|
/// Creates a lane value from the specified vector.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="value">The vector to create the lane value from.</param>
|
||||||
|
/// <returns>The lane value built from the vector.</returns>
|
||||||
|
static abstract TSelf Create(Vector<TNumber> value);
|
||||||
|
/// <summary>
|
||||||
|
/// Creates a lane value with a sequence starting from the specified value with the given step.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="start">The starting value.</param>
|
||||||
|
/// <param name="step">The step value for the sequence.</param>
|
||||||
|
/// <returns>The lane value containing the arithmetic sequence.</returns>
|
||||||
|
/// <remarks>
|
||||||
|
/// Implementations may rely on vector creation helpers and assume that the resulting sequence length matches <see cref="LaneWidth"/>.
|
||||||
|
/// </remarks>
|
||||||
|
static abstract TSelf Sequence(TNumber start, TNumber step);
|
||||||
|
/// <summary>
|
||||||
|
/// Loads a lane value from the specified reference.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="value">The reference to load from.</param>
|
||||||
|
/// <returns>The loaded lane value.</returns>
|
||||||
|
static abstract TSelf Load(ref TNumber value);
|
||||||
|
/// <summary>
|
||||||
|
/// Loads a lane value from the specified pointer.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="pValue">The pointer to load from.</param>
|
||||||
|
/// <returns>The loaded lane value.</returns>
|
||||||
|
/// <remarks>
|
||||||
|
/// Unsafe pointer overloads are provided for scenarios where sequential lane data is already contiguous in memory.
|
||||||
|
/// </remarks>
|
||||||
|
static abstract unsafe TSelf Load(TNumber* pValue);
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Stores the lane value to the specified reference.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="destination">The reference to store to.</param>
|
||||||
|
void Store(ref TNumber destination);
|
||||||
|
/// <summary>
|
||||||
|
/// Stores the lane value to the specified pointer.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="pDestination">The pointer to store to.</param>
|
||||||
|
unsafe void Store(TNumber* pDestination);
|
||||||
|
/// <summary>
|
||||||
|
/// Compresses the data specified by the given mask and stores the compressed result in the provided destination
|
||||||
|
/// variable.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="mask">A mask value that determines which elements are included in the compression operation.</param>
|
||||||
|
/// <param name="destination">A reference to the variable where the compressed data will be stored.</param>
|
||||||
|
/// <returns>The number of elements written to the destination as a result of the compression. Returns 0 if no elements are compressed.</returns>
|
||||||
|
/// <remarks>
|
||||||
|
/// Implementations may use hardware-specific shuffle tables to reorder the selected lanes before storing, falling back to a scalar loop otherwise.
|
||||||
|
/// </remarks>
|
||||||
|
int CompressStore(TSelf mask, ref TNumber destination);
|
||||||
|
/// <summary>
|
||||||
|
/// Compresses the data specified by the given mask and stores the compressed result in the provided destination
|
||||||
|
/// variable.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="mask">A mask value that determines which elements are included in the compression operation.</param>
|
||||||
|
/// <param name="pDestination">A pointer to the variable where the compressed data will be stored.</param>
|
||||||
|
/// <returns>The number of elements written to the destination as a result of the compression. Returns 0 if no elements are compressed.</returns>
|
||||||
|
/// <remarks>
|
||||||
|
/// Implementations may use hardware-specific shuffle tables to reorder the selected lanes before storing, falling back to a scalar loop otherwise.
|
||||||
|
/// </remarks>
|
||||||
|
unsafe int CompressStore(TSelf mask, TNumber* pDestination);
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Converts the lane value to a vector.
|
||||||
|
/// </summary>
|
||||||
|
/// <returns>The backing vector representation.</returns>
|
||||||
|
Vector<TNumber> AsVector();
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Adds two lane values element-wise.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="a">The first lane value.</param>
|
||||||
|
/// <param name="b">The second lane value.</param>
|
||||||
|
/// <returns>The lane-wise sum.</returns>
|
||||||
|
static abstract TSelf operator +(TSelf a, TSelf b);
|
||||||
|
/// <summary>
|
||||||
|
/// Adds a lane value and a scalar element-wise.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="a">The lane value.</param>
|
||||||
|
/// <param name="b">The scalar value.</param>
|
||||||
|
/// <returns>The lane value with the scalar added to each element.</returns>
|
||||||
|
static abstract TSelf operator +(TSelf a, TNumber b);
|
||||||
|
/// <summary>
|
||||||
|
/// Subtracts two lane values element-wise.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="a">The first lane value.</param>
|
||||||
|
/// <param name="b">The second lane value.</param>
|
||||||
|
/// <returns>The lane-wise difference.</returns>
|
||||||
|
static abstract TSelf operator -(TSelf a, TSelf b);
|
||||||
|
/// <summary>
|
||||||
|
/// Subtracts a scalar from a lane value element-wise.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="a">The lane value.</param>
|
||||||
|
/// <param name="b">The scalar value.</param>
|
||||||
|
/// <returns>The lane value with the scalar subtracted from each element.</returns>
|
||||||
|
static abstract TSelf operator -(TSelf a, TNumber b);
|
||||||
|
/// <summary>
|
||||||
|
/// Multiplies two lane values element-wise.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="a">The first lane value.</param>
|
||||||
|
/// <param name="b">The second lane value.</param>
|
||||||
|
/// <returns>The lane-wise product.</returns>
|
||||||
|
static abstract TSelf operator *(TSelf a, TSelf b);
|
||||||
|
/// <summary>
|
||||||
|
/// Multiplies a lane value by a scalar element-wise.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="a">The lane value.</param>
|
||||||
|
/// <param name="b">The scalar value.</param>
|
||||||
|
/// <returns>The lane value scaled by the scalar.</returns>
|
||||||
|
static abstract TSelf operator *(TSelf a, TNumber b);
|
||||||
|
/// <summary>
|
||||||
|
/// Divides two lane values element-wise.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="a">The first lane value.</param>
|
||||||
|
/// <param name="b">The second lane value.</param>
|
||||||
|
/// <returns>The lane-wise quotient.</returns>
|
||||||
|
static abstract TSelf operator /(TSelf a, TSelf b);
|
||||||
|
/// <summary>
|
||||||
|
/// Divides a lane value by a scalar element-wise.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="a">The lane value.</param>
|
||||||
|
/// <param name="b">The scalar value.</param>
|
||||||
|
/// <returns>The lane value divided by the scalar.</returns>
|
||||||
|
static abstract TSelf operator /(TSelf a, TNumber b);
|
||||||
|
/// <summary>
|
||||||
|
/// Computes the modulus of two lane values element-wise.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="a">The first lane value.</param>
|
||||||
|
/// <param name="b">The second lane value.</param>
|
||||||
|
/// <returns>The lane-wise modulus.</returns>
|
||||||
|
static abstract TSelf operator %(TSelf a, TSelf b);
|
||||||
|
/// <summary>
|
||||||
|
/// Computes the modulus of a lane value and a scalar element-wise.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="a">The lane value.</param>
|
||||||
|
/// <param name="b">The scalar value.</param>
|
||||||
|
/// <returns>The lane value modulus scalar.</returns>
|
||||||
|
static abstract TSelf operator %(TSelf a, TNumber b);
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Negates the lane value element-wise.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="a">The lane value to negate.</param>
|
||||||
|
/// <returns>The negated lane value.</returns>
|
||||||
|
static abstract TSelf operator -(TSelf a);
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Computes the bitwise AND of two lane values element-wise.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="a">The first lane value.</param>
|
||||||
|
/// <param name="b">The second lane value.</param>
|
||||||
|
/// <returns>The result of the bitwise AND.</returns>
|
||||||
|
static abstract TSelf operator &(TSelf a, TSelf b);
|
||||||
|
/// <summary>
|
||||||
|
/// Computes the bitwise AND of a lane value and a scalar element-wise.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="a">The lane value.</param>
|
||||||
|
/// <param name="b">The scalar value.</param>
|
||||||
|
/// <returns>The result of the bitwise AND.</returns>
|
||||||
|
static abstract TSelf operator &(TSelf a, TNumber b);
|
||||||
|
/// <summary>
|
||||||
|
/// Computes the bitwise OR of two lane values element-wise.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="a">The first lane value.</param>
|
||||||
|
/// <param name="b">The second lane value.</param>
|
||||||
|
/// <returns>The result of the bitwise OR.</returns>
|
||||||
|
static abstract TSelf operator |(TSelf a, TSelf b);
|
||||||
|
/// <summary>
|
||||||
|
/// Computes the bitwise OR of a lane value and a scalar element-wise.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="a">The lane value.</param>
|
||||||
|
/// <param name="b">The scalar value.</param>
|
||||||
|
/// <returns>The result of the bitwise OR.</returns>
|
||||||
|
static abstract TSelf operator |(TSelf a, TNumber b);
|
||||||
|
/// <summary>
|
||||||
|
/// Computes the bitwise XOR of two lane values element-wise.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="a">The first lane value.</param>
|
||||||
|
/// <param name="b">The second lane value.</param>
|
||||||
|
/// <returns>The result of the bitwise XOR.</returns>
|
||||||
|
static abstract TSelf operator ^(TSelf a, TSelf b);
|
||||||
|
/// <summary>
|
||||||
|
/// Computes the bitwise XOR of a lane value and a scalar element-wise.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="a">The lane value.</param>
|
||||||
|
/// <param name="b">The scalar value.</param>
|
||||||
|
/// <returns>The result of the bitwise XOR.</returns>
|
||||||
|
static abstract TSelf operator ^(TSelf a, TNumber b);
|
||||||
|
/// <summary>
|
||||||
|
/// Computes the bitwise NOT of a lane value element-wise.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="a">The lane value.</param>
|
||||||
|
/// <returns>The bitwise complement of the lane value.</returns>
|
||||||
|
static abstract TSelf operator ~(TSelf a);
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Computes the absolute value of the lane value element-wise.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="x">The lane value.</param>
|
||||||
|
/// <returns>The absolute lane value.</returns>
|
||||||
|
static abstract TSelf Abs(TSelf value);
|
||||||
|
/// <summary>
|
||||||
|
/// Computes the floor of the lane value element-wise.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="x">The lane value.</param>
|
||||||
|
/// <returns>The lane value with each element rounded toward negative infinity.</returns>
|
||||||
|
static abstract TSelf Floor(TSelf value);
|
||||||
|
/// <summary>
|
||||||
|
/// Computes the fractional part of the lane value element-wise.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="x">The lane value.</param>
|
||||||
|
/// <returns>The fractional lane value.</returns>
|
||||||
|
static abstract TSelf Frac(TSelf value);
|
||||||
|
/// <summary>
|
||||||
|
/// Computes the square root of the lane value element-wise.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="x">The lane value.</param>
|
||||||
|
/// <returns>The square root lane value.</returns>
|
||||||
|
static abstract TSelf Sqrt(TSelf value);
|
||||||
|
/// <summary>
|
||||||
|
/// Performs linear interpolation between two lane values.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="a">The start lane value.</param>
|
||||||
|
/// <param name="b">The end lane value.</param>
|
||||||
|
/// <param name="t">The interpolation factor.</param>
|
||||||
|
/// <returns>The interpolated lane value.</returns>
|
||||||
|
static abstract TSelf Lerp(TSelf a, TSelf b, TSelf t);
|
||||||
|
/// <summary>
|
||||||
|
/// Computes a * b + c element-wise.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="a">The first multiplier.</param>
|
||||||
|
/// <param name="b">The second multiplier.</param>
|
||||||
|
/// <param name="c">The addend.</param>
|
||||||
|
/// <returns>The result of the fused multiply-add operation.</returns>
|
||||||
|
/// <remarks>
|
||||||
|
/// Float and double implementations should use fused multiply-add instructions when available for both accuracy and performance.
|
||||||
|
/// </remarks>
|
||||||
|
static abstract TSelf MultipleAdd(TSelf a, TSelf b, TSelf c);
|
||||||
|
/// <summary>
|
||||||
|
/// Returns the minimum of the two lane values element-wise.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="a">The first lane value.</param>
|
||||||
|
/// <param name="b">The second lane value.</param>
|
||||||
|
/// <returns>The lane value containing the minimum of each element.</returns>
|
||||||
|
static abstract TSelf Min(TSelf a, TSelf b);
|
||||||
|
/// <summary>
|
||||||
|
/// Returns the maximum of the two lane values element-wise.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="a">The first lane value.</param>
|
||||||
|
/// <param name="b">The second lane value.</param>
|
||||||
|
/// <returns>The lane value containing the maximum of each element.</returns>
|
||||||
|
static abstract TSelf Max(TSelf a, TSelf b);
|
||||||
|
/// <summary>
|
||||||
|
/// Clamps each element of the lane value between the specified minimum and maximum values.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="value">The lane value to clamp.</param>
|
||||||
|
/// <param name="min">The inclusive minimum.</param>
|
||||||
|
/// <param name="max">The inclusive maximum.</param>
|
||||||
|
/// <returns>The clamped lane value.</returns>
|
||||||
|
static abstract TSelf Clamp(TSelf value, TSelf min, TSelf max);
|
||||||
|
/// <summary>
|
||||||
|
/// Saturates each element in the lane value to the 0..1 range.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="value">The lane value to saturate.</param>
|
||||||
|
/// <returns>The saturated lane value.</returns>
|
||||||
|
static abstract TSelf Saturate(TSelf value);
|
||||||
|
/// <summary>
|
||||||
|
/// Computes the sine of each lane element.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="value">The source lane value.</param>
|
||||||
|
/// <returns>The sine of each lane element.</returns>
|
||||||
|
/// <remarks>
|
||||||
|
/// Implementations may rely on vectorized math intrinsics for float/double and approximate values for other types.
|
||||||
|
/// </remarks>
|
||||||
|
static abstract TSelf Sin(TSelf value);
|
||||||
|
/// <summary>
|
||||||
|
/// Computes the cosine of each lane element.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="value">The source lane value.</param>
|
||||||
|
/// <returns>The cosine of each lane element.</returns>
|
||||||
|
/// <remarks>
|
||||||
|
/// Implementations may rely on vectorized math intrinsics for float/double and approximate values for other types.
|
||||||
|
/// </remarks>
|
||||||
|
static abstract TSelf Cos(TSelf value);
|
||||||
|
/// <summary>
|
||||||
|
/// Computes both sine and cosine of each lane element.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="value">The source lane value.</param>
|
||||||
|
/// <returns>A tuple containing sine and cosine lane values.</returns>
|
||||||
|
/// <remarks>
|
||||||
|
/// Implementations returning both sin and cos simultaneously can reuse intermediate values for better performance.
|
||||||
|
/// </remarks>
|
||||||
|
static abstract (TSelf sin, TSelf cos) SinCos(TSelf value);
|
||||||
|
/// <summary>
|
||||||
|
/// Computes the tangent of each lane element.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="value">The source lane value.</param>
|
||||||
|
/// <returns>The tangent of each lane element.</returns>
|
||||||
|
/// <remarks>
|
||||||
|
/// Many implementations use polynomial approximations and assume the input is reduced to [-pi/4, pi/4] for accuracy.
|
||||||
|
/// </remarks>
|
||||||
|
static abstract TSelf Tan(TSelf value);
|
||||||
|
/// <summary>
|
||||||
|
/// Computes the arcsine of each lane element.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="value">The source lane value.</param>
|
||||||
|
/// <returns>The arcsine of each lane element.</returns>
|
||||||
|
/// <remarks>
|
||||||
|
/// Implementations typically assume input is within [-1, 1] and may use polynomial approximations for performance.
|
||||||
|
/// </remarks>
|
||||||
|
static abstract TSelf Asin(TSelf value);
|
||||||
|
/// <summary>
|
||||||
|
/// Computes the arccosine of each lane element.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="value">The source lane value.</param>
|
||||||
|
/// <returns>The arccosine of each lane element.</returns>
|
||||||
|
/// <remarks>
|
||||||
|
/// Input is expected to be in [-1, 1]; implementations often rely on approximation polynomials combined with range reduction.
|
||||||
|
/// </remarks>
|
||||||
|
static abstract TSelf Acos(TSelf value);
|
||||||
|
/// <summary>
|
||||||
|
/// Computes the arctangent of each lane element.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="value">The source lane value.</param>
|
||||||
|
/// <returns>The arctangent of each lane element.</returns>
|
||||||
|
/// <remarks>
|
||||||
|
/// Polynomial approximations with restricted input ranges are commonly used for performance-sensitive implementations.
|
||||||
|
/// </remarks>
|
||||||
|
static abstract TSelf Atan(TSelf value);
|
||||||
|
/// <summary>
|
||||||
|
/// Computes the arctangent of y/x for each lane element.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="y">The numerator lane value.</param>
|
||||||
|
/// <param name="x">The denominator lane value.</param>
|
||||||
|
/// <returns>The arctangent of each lane pair.</returns>
|
||||||
|
/// <remarks>
|
||||||
|
/// Implementations often rely on quadrant-aware polynomial routines and assume inputs are finite to avoid NaNs.
|
||||||
|
/// </remarks>
|
||||||
|
static abstract TSelf Atan2(TSelf y, TSelf x);
|
||||||
|
/// <summary>
|
||||||
|
/// Raises each lane element to the specified power.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="x">The base lane value.</param>
|
||||||
|
/// <param name="y">The exponent lane value. Cannot be negative.</param>
|
||||||
|
/// <returns>The power result for each lane.</returns>
|
||||||
|
static abstract TSelf Pow(TSelf x, TSelf y);
|
||||||
|
/// <summary>
|
||||||
|
/// Computes the exponential of each lane element.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="value">The source lane value.</param>
|
||||||
|
/// <returns>The exponential of each lane element.</returns>
|
||||||
|
/// <remarks>
|
||||||
|
/// Float and double implementations typically call into vectorized exp intrinsics; other types may fall back to scalar paths.
|
||||||
|
/// </remarks>
|
||||||
|
static abstract TSelf Exp(TSelf value);
|
||||||
|
/// <summary>
|
||||||
|
/// Computes 2 raised to each lane element.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="value">The source lane value.</param>
|
||||||
|
/// <returns>The base-2 exponential of each lane element.</returns>
|
||||||
|
/// <remarks>
|
||||||
|
/// This can be implemented via <see cref="Exp(TSelf)"/> when no dedicated base-2 intrinsic exists.
|
||||||
|
/// </remarks>
|
||||||
|
static abstract TSelf Exp2(TSelf value);
|
||||||
|
/// <summary>
|
||||||
|
/// Computes the natural logarithm of each lane element.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="value">The source lane value.</param>
|
||||||
|
/// <returns>The natural logarithm of each lane element.</returns>
|
||||||
|
/// <remarks>
|
||||||
|
/// Vectorized logarithm instructions may only exist for floating-point types; other types should mimic the scalar behavior.
|
||||||
|
/// </remarks>
|
||||||
|
static abstract TSelf Log(TSelf value);
|
||||||
|
/// <summary>
|
||||||
|
/// Computes the base-2 logarithm of each lane element.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="value">The source lane value.</param>
|
||||||
|
/// <returns>The base-2 logarithm of each lane element.</returns>
|
||||||
|
/// <remarks>
|
||||||
|
/// If a dedicated base-2 intrinsic is unavailable, the implementation may compute <c>Log(value)/Log(2)</c>.
|
||||||
|
/// </remarks>
|
||||||
|
static abstract TSelf Log2(TSelf value);
|
||||||
|
/// <summary>
|
||||||
|
/// Computes the ceiling of each lane element.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="x">The lane value.</param>
|
||||||
|
/// <returns>The smallest integral value greater than or equal to each element.</returns>
|
||||||
|
/// <remarks>
|
||||||
|
/// Implementations should use <see cref="Vector"/> helpers for floating-point types when available.
|
||||||
|
/// </remarks>
|
||||||
|
static abstract TSelf Ceil(TSelf value);
|
||||||
|
/// <summary>
|
||||||
|
/// Rounds each lane element to the nearest integer value.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="x">The lane value.</param>
|
||||||
|
/// <returns>The rounded lane value.</returns>
|
||||||
|
/// <remarks>
|
||||||
|
/// Implementations should prefer vectorized round intrinsics for floating-point implementations.
|
||||||
|
/// </remarks>
|
||||||
|
static abstract TSelf Round(TSelf value);
|
||||||
|
/// <summary>
|
||||||
|
/// Truncates each lane element toward zero.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="x">The lane value.</param>
|
||||||
|
/// <returns>The truncated lane value.</returns>
|
||||||
|
/// <remarks>
|
||||||
|
/// Floating-point truncation typically maps to <see cref="Vector.Truncate(Vector{TNumber})"/>.
|
||||||
|
/// </remarks>
|
||||||
|
static abstract TSelf Trunc(TSelf value);
|
||||||
|
/// <summary>
|
||||||
|
/// Returns the sign of each lane element.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="x">The lane value.</param>
|
||||||
|
/// <returns>-1, 0, or 1 per lane.</returns>
|
||||||
|
static abstract TSelf Sign(TSelf value);
|
||||||
|
/// <summary>
|
||||||
|
/// Copies the sign of the second lane value to the magnitude of the first.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="magnitude">The magnitude lane value.</param>
|
||||||
|
/// <param name="sign">The sign lane value.</param>
|
||||||
|
/// <returns>The result of merging magnitude with sign.</returns>
|
||||||
|
static abstract TSelf CopySign(TSelf magnitude, TSelf sign);
|
||||||
|
/// <summary>
|
||||||
|
/// Computes the reciprocal of each lane element.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="x">The lane value.</param>
|
||||||
|
/// <returns>The reciprocal lane value.</returns>
|
||||||
|
/// <remarks>
|
||||||
|
/// Fast paths may use <c>Sse.Reciprocal</c> or <c>Avx.Reciprocal</c> when <c>TNumber</c> is <c>float</c>.
|
||||||
|
/// </remarks>
|
||||||
|
static abstract TSelf Rcp(TSelf value);
|
||||||
|
/// <summary>
|
||||||
|
/// Computes the reciprocal square root of each lane element.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="x">The lane value.</param>
|
||||||
|
/// <returns>The reciprocal square root lane value.</returns>
|
||||||
|
/// <remarks>
|
||||||
|
/// Float implementations may prefer hardware reciprocal-sqrt intrinsics and fallback to <c>Create(TNumber.One)/Sqrt(x)</c> otherwise.
|
||||||
|
/// </remarks>
|
||||||
|
static abstract TSelf Rsqrt(TSelf value);
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Selects values from two lane values based on a condition mask.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="conditionMask">The condition mask.</param>
|
||||||
|
/// <param name="ifTrue">The value to select if true.</param>
|
||||||
|
/// <param name="ifFalse">The value to select if false.</param>
|
||||||
|
/// <returns>The selected lane value.</returns>
|
||||||
|
static abstract TSelf Select(TSelf conditionMask, TSelf ifTrue, TSelf ifFalse);
|
||||||
|
/// <summary>
|
||||||
|
/// Compares two lane values for greater than element-wise.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="a">The first lane value.</param>
|
||||||
|
/// <param name="b">The second lane value.</param>
|
||||||
|
/// <returns>The mask representing the greater than comparison result.</returns>
|
||||||
|
static abstract TSelf GreaterThan(TSelf a, TSelf b);
|
||||||
|
/// <summary>
|
||||||
|
/// Compares two lane values for greater than or equal element-wise.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="a">The first lane value.</param>
|
||||||
|
/// <param name="b">The second lane value.</param>
|
||||||
|
/// <returns>The mask representing the greater than or equal comparison result.</returns>
|
||||||
|
static abstract TSelf GreaterThanOrEqual(TSelf a, TSelf b);
|
||||||
|
/// <summary>
|
||||||
|
/// Compares two lane values for less than element-wise.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="a">The first lane value.</param>
|
||||||
|
/// <param name="b">The second lane value.</param>
|
||||||
|
/// <returns>The mask representing the less than comparison result.</returns>
|
||||||
|
static abstract TSelf LessThan(TSelf a, TSelf b);
|
||||||
|
/// <summary>
|
||||||
|
/// Compares two lane values for less than or equal element-wise.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="a">The first lane value.</param>
|
||||||
|
/// <param name="b">The second lane value.</param>
|
||||||
|
/// <returns>The mask representing the less than or equal comparison result.</returns>
|
||||||
|
static abstract TSelf LessThanOrEqual(TSelf a, TSelf b);
|
||||||
|
/// <summary>
|
||||||
|
/// Compares two lane values for equality element-wise.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="a">The first lane value.</param>
|
||||||
|
/// <param name="b">The second lane value.</param>
|
||||||
|
/// <returns>The mask representing the equality comparison result.</returns>
|
||||||
|
static abstract TSelf Equal(TSelf a, TSelf b);
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Checks if any lane in the mask is true.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="mask">The mask to check.</param>
|
||||||
|
/// <returns>True if any lane is true; otherwise, false.</returns>
|
||||||
|
static abstract bool Any(TSelf mask);
|
||||||
|
/// <summary>
|
||||||
|
/// Checks if all lanes in the mask are true.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="mask">The mask to check.</param>
|
||||||
|
/// <returns>True if all lanes are true; otherwise, false.</returns>
|
||||||
|
static abstract bool All(TSelf mask);
|
||||||
|
/// <summary>
|
||||||
|
/// Checks if no lanes in the mask are true.
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="mask">The mask to check.</param>
|
||||||
|
/// <returns>True if no lanes are true; otherwise, false.</returns>
|
||||||
|
static abstract bool None(TSelf mask);
|
||||||
|
}
|
||||||
@@ -0,0 +1,14 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<TargetFramework>net10.0</TargetFramework>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<ProjectReference Include="..\Misaki.HighPerformance.Jobs\Misaki.HighPerformance.Jobs.csproj" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
||||||
564
Misaki.HighPerformance.Mathematics.SPMD/ScalerLane.cs
Normal file
564
Misaki.HighPerformance.Mathematics.SPMD/ScalerLane.cs
Normal file
@@ -0,0 +1,564 @@
|
|||||||
|
using System.Numerics;
|
||||||
|
using System.Runtime.CompilerServices;
|
||||||
|
using System.Runtime.InteropServices;
|
||||||
|
|
||||||
|
namespace Misaki.HighPerformance.Mathematics.SPMD;
|
||||||
|
|
||||||
|
[StructLayout(LayoutKind.Sequential)]
|
||||||
|
public readonly unsafe struct ScalarLane<T> : ISPMD<ScalarLane<T>, T>
|
||||||
|
where T : unmanaged, INumber<T>, IMinMaxValue<T>, IBitwiseOperators<T, T, T>
|
||||||
|
{
|
||||||
|
public readonly T value;
|
||||||
|
|
||||||
|
public static int LaneWidth
|
||||||
|
{
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
get => 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ScalarLane<T> Zero
|
||||||
|
{
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
get => new(T.Zero);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ScalarLane<T> One
|
||||||
|
{
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
get => new(T.One);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ScalarLane<T> MinValue
|
||||||
|
{
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
get => new(T.MinValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ScalarLane<T> MaxValue
|
||||||
|
{
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
get => new(T.MaxValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
public readonly T this[int index]
|
||||||
|
{
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
get => value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ScalarLane(T value)
|
||||||
|
{
|
||||||
|
this.value = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Create(T value) => new(value);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Create(params ReadOnlySpan<T> values) => new(values[0]);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Create(Vector<T> value) => new(value[0]);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Sequence(T start, T step) => new(start);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Load(ref T value) => new(value);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Load(T* pValue) => new(*pValue);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public readonly void Store(ref T destination) => destination = value;
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public readonly void Store(T* pDestination) => *pDestination = value;
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public int CompressStore(ScalarLane<T> mask, ref T destination)
|
||||||
|
{
|
||||||
|
return CompressStore(mask, (T*)Unsafe.AsPointer(in destination));
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public int CompressStore(ScalarLane<T> mask, T* pDestination)
|
||||||
|
{
|
||||||
|
if (mask.value != T.Zero)
|
||||||
|
{
|
||||||
|
*pDestination = value;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public readonly Vector<T> AsVector() => Vector.Create(value);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> operator +(ScalarLane<T> a, ScalarLane<T> b) => new(a.value + b.value);
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> operator +(ScalarLane<T> a, T b) => new(a.value + b);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> operator -(ScalarLane<T> a, ScalarLane<T> b) => new(a.value - b.value);
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> operator -(ScalarLane<T> a, T b) => new(a.value - b);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> operator *(ScalarLane<T> a, ScalarLane<T> b) => new(a.value * b.value);
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> operator *(ScalarLane<T> a, T b) => new(a.value * b);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> operator /(ScalarLane<T> a, ScalarLane<T> b) => new(a.value / b.value);
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> operator /(ScalarLane<T> a, T b) => new(a.value / b);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> operator %(ScalarLane<T> a, ScalarLane<T> b) => new(a.value % b.value);
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> operator %(ScalarLane<T> a, T b) => new(a.value % b);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> operator -(ScalarLane<T> a) => new(-a.value);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> operator &(ScalarLane<T> a, ScalarLane<T> b) => new(a.value & b.value);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> operator &(ScalarLane<T> a, T b) => new(a.value & b);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> operator |(ScalarLane<T> a, ScalarLane<T> b) => new(a.value | b.value);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> operator |(ScalarLane<T> a, T b) => new(a.value | b);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> operator ^(ScalarLane<T> a, ScalarLane<T> b) => new(a.value ^ b.value);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> operator ^(ScalarLane<T> a, T b) => new(a.value ^ b);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> operator ~(ScalarLane<T> a) => new(~a.value);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Abs(ScalarLane<T> value) => new(T.Abs(value.value));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Floor(ScalarLane<T> value)
|
||||||
|
{
|
||||||
|
// Note: INumber<T> does not provide Floor method, so we need to handle float and double specifically.
|
||||||
|
// This is acceptable for performance because JIT generates specialized code for each T as long as they are struct.
|
||||||
|
// Which mean for ScalarLane<float>, typeof(T) == typeof(float) is always true and jit will optimize away the other branches.
|
||||||
|
if (typeof(T) == typeof(float))
|
||||||
|
{
|
||||||
|
var f = Unsafe.As<ScalarLane<T>, float>(ref value);
|
||||||
|
var result = MathF.Floor(f);
|
||||||
|
return Unsafe.As<float, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(double))
|
||||||
|
{
|
||||||
|
var d = Unsafe.As<ScalarLane<T>, double>(ref value);
|
||||||
|
var result = Math.Floor(d);
|
||||||
|
return Unsafe.As<double, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Frac(ScalarLane<T> value) => new(value.value - T.CreateTruncating(value.value));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Sqrt(ScalarLane<T> value)
|
||||||
|
{
|
||||||
|
if (typeof(T) == typeof(float))
|
||||||
|
{
|
||||||
|
var f = Unsafe.As<ScalarLane<T>, float>(ref value);
|
||||||
|
var result = MathF.Sqrt(f);
|
||||||
|
return Unsafe.As<float, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(double))
|
||||||
|
{
|
||||||
|
var d = Unsafe.As<ScalarLane<T>, double>(ref value);
|
||||||
|
var result = Math.Sqrt(d);
|
||||||
|
return Unsafe.As<double, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Lerp(ScalarLane<T> a, ScalarLane<T> b, ScalarLane<T> t) => new(a.value + (b.value - a.value) * t.value);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> MultipleAdd(ScalarLane<T> a, ScalarLane<T> b, ScalarLane<T> c) => new(T.MultiplyAddEstimate(a.value, b.value, c.value));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Min(ScalarLane<T> a, ScalarLane<T> b) => new(T.Min(a.value, b.value));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Max(ScalarLane<T> a, ScalarLane<T> b) => new(T.Max(a.value, b.value));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Clamp(ScalarLane<T> value, ScalarLane<T> min, ScalarLane<T> max) => new(T.Clamp(value.value, min.value, max.value));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Saturate(ScalarLane<T> value) => Clamp(value, new(T.Zero), new(T.One));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Sin(ScalarLane<T> value)
|
||||||
|
{
|
||||||
|
if (typeof(T) == typeof(float))
|
||||||
|
{
|
||||||
|
var f = Unsafe.As<ScalarLane<T>, float>(ref value);
|
||||||
|
var result = MathF.Sin(f);
|
||||||
|
return Unsafe.As<float, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(double))
|
||||||
|
{
|
||||||
|
var d = Unsafe.As<ScalarLane<T>, double>(ref value);
|
||||||
|
var result = Math.Sin(d);
|
||||||
|
return Unsafe.As<double, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Cos(ScalarLane<T> value)
|
||||||
|
{
|
||||||
|
if (typeof(T) == typeof(float))
|
||||||
|
{
|
||||||
|
var f = Unsafe.As<ScalarLane<T>, float>(ref value);
|
||||||
|
var result = MathF.Cos(f);
|
||||||
|
return Unsafe.As<float, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(double))
|
||||||
|
{
|
||||||
|
var d = Unsafe.As<ScalarLane<T>, double>(ref value);
|
||||||
|
var result = Math.Cos(d);
|
||||||
|
return Unsafe.As<double, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static (ScalarLane<T> sin, ScalarLane<T> cos) SinCos(ScalarLane<T> value) => (Sin(value), Cos(value));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Tan(ScalarLane<T> value)
|
||||||
|
{
|
||||||
|
if (typeof(T) == typeof(float))
|
||||||
|
{
|
||||||
|
var f = Unsafe.As<ScalarLane<T>, float>(ref value);
|
||||||
|
var result = MathF.Tan(f);
|
||||||
|
return Unsafe.As<float, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(double))
|
||||||
|
{
|
||||||
|
var d = Unsafe.As<ScalarLane<T>, double>(ref value);
|
||||||
|
var result = Math.Tan(d);
|
||||||
|
return Unsafe.As<double, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Asin(ScalarLane<T> value)
|
||||||
|
{
|
||||||
|
if (typeof(T) == typeof(float))
|
||||||
|
{
|
||||||
|
var f = Unsafe.As<ScalarLane<T>, float>(ref value);
|
||||||
|
var result = MathF.Asin(f);
|
||||||
|
return Unsafe.As<float, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(double))
|
||||||
|
{
|
||||||
|
var d = Unsafe.As<ScalarLane<T>, double>(ref value);
|
||||||
|
var result = Math.Asin(d);
|
||||||
|
return Unsafe.As<double, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Acos(ScalarLane<T> value)
|
||||||
|
{
|
||||||
|
if (typeof(T) == typeof(float))
|
||||||
|
{
|
||||||
|
var f = Unsafe.As<ScalarLane<T>, float>(ref value);
|
||||||
|
var result = MathF.Acos(f);
|
||||||
|
return Unsafe.As<float, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(double))
|
||||||
|
{
|
||||||
|
var d = Unsafe.As<ScalarLane<T>, double>(ref value);
|
||||||
|
var result = Math.Acos(d);
|
||||||
|
return Unsafe.As<double, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Atan(ScalarLane<T> value)
|
||||||
|
{
|
||||||
|
if (typeof(T) == typeof(float))
|
||||||
|
{
|
||||||
|
var f = Unsafe.As<ScalarLane<T>, float>(ref value);
|
||||||
|
var result = MathF.Atan(f);
|
||||||
|
return Unsafe.As<float, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(double))
|
||||||
|
{
|
||||||
|
var d = Unsafe.As<ScalarLane<T>, double>(ref value);
|
||||||
|
var result = Math.Atan(d);
|
||||||
|
return Unsafe.As<double, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Atan2(ScalarLane<T> y, ScalarLane<T> x)
|
||||||
|
{
|
||||||
|
if (typeof(T) == typeof(float))
|
||||||
|
{
|
||||||
|
var fy = Unsafe.As<ScalarLane<T>, float>(ref y);
|
||||||
|
var fx = Unsafe.As<ScalarLane<T>, float>(ref x);
|
||||||
|
var result = MathF.Atan2(fy, fx);
|
||||||
|
return Unsafe.As<float, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(double))
|
||||||
|
{
|
||||||
|
var dy = Unsafe.As<ScalarLane<T>, double>(ref y);
|
||||||
|
var dx = Unsafe.As<ScalarLane<T>, double>(ref x);
|
||||||
|
var result = Math.Atan2(dy, dx);
|
||||||
|
return Unsafe.As<double, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
|
||||||
|
return y;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Pow(ScalarLane<T> x, ScalarLane<T> y)
|
||||||
|
{
|
||||||
|
if (typeof(T) == typeof(float))
|
||||||
|
{
|
||||||
|
var fx = Unsafe.As<ScalarLane<T>, float>(ref x);
|
||||||
|
var fy = Unsafe.As<ScalarLane<T>, float>(ref y);
|
||||||
|
var result = MathF.Pow(fx, fy);
|
||||||
|
return Unsafe.As<float, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(double))
|
||||||
|
{
|
||||||
|
var dx = Unsafe.As<ScalarLane<T>, double>(ref x);
|
||||||
|
var dy = Unsafe.As<ScalarLane<T>, double>(ref y);
|
||||||
|
var result = Math.Pow(dx, dy);
|
||||||
|
return Unsafe.As<double, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Exp(ScalarLane<T> value)
|
||||||
|
{
|
||||||
|
if (typeof(T) == typeof(float))
|
||||||
|
{
|
||||||
|
var f = Unsafe.As<ScalarLane<T>, float>(ref value);
|
||||||
|
var result = MathF.Exp(f);
|
||||||
|
return Unsafe.As<float, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(double))
|
||||||
|
{
|
||||||
|
var d = Unsafe.As<ScalarLane<T>, double>(ref value);
|
||||||
|
var result = Math.Log(d);
|
||||||
|
return Unsafe.As<double, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Exp2(ScalarLane<T> value)
|
||||||
|
{
|
||||||
|
return Pow(new ScalarLane<T>(T.CreateChecked(2)), value);
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Log(ScalarLane<T> value)
|
||||||
|
{
|
||||||
|
if (typeof(T) == typeof(float))
|
||||||
|
{
|
||||||
|
var f = Unsafe.As<ScalarLane<T>, float>(ref value);
|
||||||
|
var result = MathF.Log(f);
|
||||||
|
return Unsafe.As<float, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(double))
|
||||||
|
{
|
||||||
|
var d = Unsafe.As<ScalarLane<T>, double>(ref value);
|
||||||
|
var result = Math.Log(d);
|
||||||
|
return Unsafe.As<double, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Log2(ScalarLane<T> value)
|
||||||
|
{
|
||||||
|
if (typeof(T) == typeof(float))
|
||||||
|
{
|
||||||
|
var f = Unsafe.As<ScalarLane<T>, float>(ref value);
|
||||||
|
var result = MathF.Log2(f);
|
||||||
|
return Unsafe.As<float, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(double))
|
||||||
|
{
|
||||||
|
var d = Unsafe.As<ScalarLane<T>, double>(ref value);
|
||||||
|
var result = Math.Log2(d);
|
||||||
|
return Unsafe.As<double, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Ceil(ScalarLane<T> value)
|
||||||
|
{
|
||||||
|
if (typeof(T) == typeof(float))
|
||||||
|
{
|
||||||
|
var f = Unsafe.As<ScalarLane<T>, float>(ref value);
|
||||||
|
var result = MathF.Ceiling(f);
|
||||||
|
return Unsafe.As<float, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(double))
|
||||||
|
{
|
||||||
|
var d = Unsafe.As<ScalarLane<T>, double>(ref value);
|
||||||
|
var result = Math.Ceiling(d);
|
||||||
|
return Unsafe.As<double, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(decimal))
|
||||||
|
{
|
||||||
|
var d = Unsafe.As<ScalarLane<T>, decimal>(ref value);
|
||||||
|
var result = Math.Ceiling(d);
|
||||||
|
return Unsafe.As<decimal, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Round(ScalarLane<T> value)
|
||||||
|
{
|
||||||
|
if (typeof(T) == typeof(float))
|
||||||
|
{
|
||||||
|
var f = Unsafe.As<ScalarLane<T>, float>(ref value);
|
||||||
|
var result = MathF.Round(f);
|
||||||
|
return Unsafe.As<float, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(double))
|
||||||
|
{
|
||||||
|
var d = Unsafe.As<ScalarLane<T>, double>(ref value);
|
||||||
|
var result = Math.Round(d);
|
||||||
|
return Unsafe.As<double, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(decimal))
|
||||||
|
{
|
||||||
|
var d = Unsafe.As<ScalarLane<T>, decimal>(ref value);
|
||||||
|
var result = Math.Round(d);
|
||||||
|
return Unsafe.As<decimal, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Trunc(ScalarLane<T> value)
|
||||||
|
{
|
||||||
|
if (typeof(T) == typeof(float))
|
||||||
|
{
|
||||||
|
var f = Unsafe.As<ScalarLane<T>, float>(ref value);
|
||||||
|
var result = MathF.Truncate(f);
|
||||||
|
return Unsafe.As<float, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(double))
|
||||||
|
{
|
||||||
|
var d = Unsafe.As<ScalarLane<T>, double>(ref value);
|
||||||
|
var result = Math.Truncate(d);
|
||||||
|
return Unsafe.As<double, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(decimal))
|
||||||
|
{
|
||||||
|
var d = Unsafe.As<ScalarLane<T>, decimal>(ref value);
|
||||||
|
var result = Math.Truncate(d);
|
||||||
|
return Unsafe.As<decimal, ScalarLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Sign(ScalarLane<T> value) => new((value.value > T.Zero) ? T.One : (value.value < T.Zero) ? ~T.Zero : T.Zero);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> CopySign(ScalarLane<T> magnitude, ScalarLane<T> sign) => new(T.CopySign(magnitude.value, sign.value));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Rcp(ScalarLane<T> value) => new(T.One / value.value);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Rsqrt(ScalarLane<T> value) => Sqrt(Rcp(value));
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Select(ScalarLane<T> conditionMask, ScalarLane<T> ifTrue, ScalarLane<T> ifFalse) => new(conditionMask.value != T.Zero ? ifTrue.value : ifFalse.value);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> GreaterThan(ScalarLane<T> a, ScalarLane<T> b) => new(a.value > b.value ? ~T.Zero : T.Zero);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> GreaterThanOrEqual(ScalarLane<T> a, ScalarLane<T> b) => new(a.value >= b.value ? ~T.Zero : T.Zero);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> LessThan(ScalarLane<T> a, ScalarLane<T> b) => new(a.value < b.value ? ~T.Zero : T.Zero);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> LessThanOrEqual(ScalarLane<T> a, ScalarLane<T> b) => new(a.value <= b.value ? ~T.Zero : T.Zero);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static ScalarLane<T> Equal(ScalarLane<T> a, ScalarLane<T> b) => new(a.value == b.value ? ~T.Zero : T.Zero);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static bool Any(ScalarLane<T> mask) => mask.value != T.Zero;
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static bool All(ScalarLane<T> mask) => mask.value != T.Zero;
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static bool None(ScalarLane<T> mask) => mask.value == T.Zero;
|
||||||
|
|
||||||
|
public override string ToString()
|
||||||
|
{
|
||||||
|
return value.ToString() ?? string.Empty;
|
||||||
|
}
|
||||||
|
}
|
||||||
206
Misaki.HighPerformance.Mathematics.SPMD/ShuffleTableGenerator.cs
Normal file
206
Misaki.HighPerformance.Mathematics.SPMD/ShuffleTableGenerator.cs
Normal file
@@ -0,0 +1,206 @@
|
|||||||
|
using System.Runtime.InteropServices;
|
||||||
|
|
||||||
|
namespace Misaki.HighPerformance.Mathematics.SPMD;
|
||||||
|
|
||||||
|
public static unsafe class ShuffleTableGenerator
|
||||||
|
{
|
||||||
|
public static uint* ComputeShuffleTable512_32Bit()
|
||||||
|
{
|
||||||
|
const nuint entryCount = 512;
|
||||||
|
const int elementCount = 16;
|
||||||
|
|
||||||
|
// Align to 64 bytes for AVX-512 performance
|
||||||
|
var table = (uint*)NativeMemory.AlignedAlloc(entryCount * elementCount * sizeof(uint), 64);
|
||||||
|
|
||||||
|
for (var mask = 0u; mask < entryCount; mask++)
|
||||||
|
{
|
||||||
|
// We are filling 16 integers for this mask
|
||||||
|
var pRow = table + (mask * elementCount);
|
||||||
|
var outputIndex = 0;
|
||||||
|
// 1. Pack the valid indices to the front
|
||||||
|
for (var bit = 0; bit < 16; bit++)
|
||||||
|
{
|
||||||
|
// Check if the i-th bit is set
|
||||||
|
if ((mask & (1 << bit)) != 0)
|
||||||
|
{
|
||||||
|
pRow[outputIndex] = (uint)bit; // Write the Source Index
|
||||||
|
outputIndex++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// 2. Fill the remaining slots (Pad with 0 or similar)
|
||||||
|
// It doesn't strictly matter what these are, as we won't read them,
|
||||||
|
// but filling with 0 is clean.
|
||||||
|
while (outputIndex < 16)
|
||||||
|
{
|
||||||
|
pRow[outputIndex] = 0;
|
||||||
|
outputIndex++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return table;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ulong* ComputeShuffleTable512_64Bit()
|
||||||
|
{
|
||||||
|
const nuint entryCount = 256;
|
||||||
|
const int elementCount = 8;
|
||||||
|
|
||||||
|
// Align to 64 bytes for AVX-512 performance
|
||||||
|
var table = (ulong*)NativeMemory.AlignedAlloc(entryCount * elementCount * sizeof(ulong), 64);
|
||||||
|
for (var mask = 0u; mask < entryCount; mask++)
|
||||||
|
{
|
||||||
|
// We are filling 8 integers for this mask
|
||||||
|
var pRow = table + (mask * elementCount);
|
||||||
|
var outputIndex = 0;
|
||||||
|
|
||||||
|
// 1. Pack the valid indices to the front
|
||||||
|
for (var bit = 0; bit < 8; bit++)
|
||||||
|
{
|
||||||
|
// Check if the i-th bit is set
|
||||||
|
if ((mask & (1 << bit)) != 0)
|
||||||
|
{
|
||||||
|
pRow[outputIndex] = (ulong)bit; // Write the Source Index
|
||||||
|
outputIndex++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. Fill the remaining slots (Pad with 0 or similar)
|
||||||
|
while (outputIndex < 8)
|
||||||
|
{
|
||||||
|
pRow[outputIndex] = 0;
|
||||||
|
outputIndex++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return table;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static uint* ComputeShuffleTable256_32Bit()
|
||||||
|
{
|
||||||
|
const nuint entryCount = 256;
|
||||||
|
const nuint elementCount = 8;
|
||||||
|
|
||||||
|
// Align to 32 bytes for AVX performance
|
||||||
|
var table = (uint*)NativeMemory.AlignedAlloc(entryCount * elementCount * sizeof(uint), 32);
|
||||||
|
|
||||||
|
for (var mask = 0u; mask < entryCount; mask++)
|
||||||
|
{
|
||||||
|
// We are filling 8 integers for this mask
|
||||||
|
var pRow = table + (mask * elementCount);
|
||||||
|
|
||||||
|
var outputIndex = 0;
|
||||||
|
|
||||||
|
for (var bit = 0; bit < 8; bit++)
|
||||||
|
{
|
||||||
|
if ((mask & (1 << bit)) != 0)
|
||||||
|
{
|
||||||
|
pRow[outputIndex] = (uint)bit;
|
||||||
|
outputIndex++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
while (outputIndex < 8)
|
||||||
|
{
|
||||||
|
pRow[outputIndex] = 0;
|
||||||
|
outputIndex++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return table;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ulong* ComputeShuffleTable256_64Bit()
|
||||||
|
{
|
||||||
|
const nuint entryCount = 16;
|
||||||
|
const nuint elementCount = 4;
|
||||||
|
|
||||||
|
var table = (ulong*)NativeMemory.AlignedAlloc(entryCount * elementCount * sizeof(ulong), 32);
|
||||||
|
|
||||||
|
for (var mask = 0u; mask < entryCount; mask++)
|
||||||
|
{
|
||||||
|
var pRow = table + (mask * elementCount);
|
||||||
|
var outputIndex = 0;
|
||||||
|
|
||||||
|
// We only check 4 bits because there are only 4 ulongs in a Vector256
|
||||||
|
for (var bit = 0; bit < 4; bit++)
|
||||||
|
{
|
||||||
|
if ((mask & (1 << bit)) != 0)
|
||||||
|
{
|
||||||
|
pRow[outputIndex] = (ulong)bit;
|
||||||
|
outputIndex++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fill remaining slots with 0 (or a specific 'clear' index)
|
||||||
|
while (outputIndex < 4)
|
||||||
|
{
|
||||||
|
pRow[outputIndex] = 0;
|
||||||
|
outputIndex++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return table;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static uint* ComputeShuffleTable128_32Bit()
|
||||||
|
{
|
||||||
|
const nuint entryCount = 16;
|
||||||
|
const nuint elementCount = 4;
|
||||||
|
|
||||||
|
var table = (uint*)NativeMemory.AlignedAlloc(entryCount * elementCount * sizeof(uint), 16);
|
||||||
|
|
||||||
|
for (var mask = 0u; mask < entryCount; mask++)
|
||||||
|
{
|
||||||
|
var pRow = table + (mask * elementCount);
|
||||||
|
var outputIndex = 0;
|
||||||
|
|
||||||
|
for (var bit = 0; bit < 4; bit++)
|
||||||
|
{
|
||||||
|
if ((mask & (1 << bit)) != 0)
|
||||||
|
{
|
||||||
|
pRow[outputIndex] = (uint)bit;
|
||||||
|
outputIndex++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
while (outputIndex < 4)
|
||||||
|
{
|
||||||
|
pRow[outputIndex] = 0;
|
||||||
|
outputIndex++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return table;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ulong* ComputeShuffleTable128_64Bit()
|
||||||
|
{
|
||||||
|
const nuint entryCount = 8;
|
||||||
|
const nuint elementCount = 2;
|
||||||
|
|
||||||
|
var table = (ulong*)NativeMemory.AlignedAlloc(entryCount * elementCount * sizeof(ulong), 16);
|
||||||
|
|
||||||
|
for (var mask = 0u; mask < entryCount; mask++)
|
||||||
|
{
|
||||||
|
var pRow = table + (mask * elementCount);
|
||||||
|
var outputIndex = 0;
|
||||||
|
|
||||||
|
for (var bit = 0; bit < 2; bit++)
|
||||||
|
{
|
||||||
|
if ((mask & (1 << bit)) != 0)
|
||||||
|
{
|
||||||
|
pRow[outputIndex] = (byte)bit;
|
||||||
|
outputIndex++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
while (outputIndex < 2)
|
||||||
|
{
|
||||||
|
pRow[outputIndex] = 0;
|
||||||
|
outputIndex++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return table;
|
||||||
|
}
|
||||||
|
}
|
||||||
794
Misaki.HighPerformance.Mathematics.SPMD/WideLane.cs
Normal file
794
Misaki.HighPerformance.Mathematics.SPMD/WideLane.cs
Normal file
@@ -0,0 +1,794 @@
|
|||||||
|
using System.Numerics;
|
||||||
|
using System.Runtime.CompilerServices;
|
||||||
|
using System.Runtime.InteropServices;
|
||||||
|
using System.Runtime.Intrinsics;
|
||||||
|
using System.Runtime.Intrinsics.X86;
|
||||||
|
|
||||||
|
namespace Misaki.HighPerformance.Mathematics.SPMD;
|
||||||
|
|
||||||
|
[StructLayout(LayoutKind.Sequential)]
|
||||||
|
public readonly unsafe struct WideLane<T> : ISPMD<WideLane<T>, T>
|
||||||
|
where T : unmanaged, INumber<T>, IMinMaxValue<T>, IBitwiseOperators<T, T, T>
|
||||||
|
{
|
||||||
|
private static readonly Vector<T> s_indices;
|
||||||
|
|
||||||
|
private static readonly uint* s_shuffleTable512_32bit;
|
||||||
|
private static readonly ulong* s_shuffleTable512_64bit;
|
||||||
|
private static readonly uint* s_shuffleTable256_32bit;
|
||||||
|
private static readonly ulong* s_shuffleTable256_64bit;
|
||||||
|
private static readonly uint* s_shuffleTable128_32bit;
|
||||||
|
private static readonly ulong* s_shuffleTable128_64bit;
|
||||||
|
|
||||||
|
public readonly Vector<T> value;
|
||||||
|
|
||||||
|
public static int LaneWidth
|
||||||
|
{
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
get => Vector<T>.Count;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static WideLane<T> Zero
|
||||||
|
{
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
get => new(Vector<T>.Zero);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static WideLane<T> One
|
||||||
|
{
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
get => new(Vector<T>.One);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static WideLane<T> MinValue
|
||||||
|
{
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
get => Create(T.MinValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static WideLane<T> MaxValue
|
||||||
|
{
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
get => Create(T.MaxValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
public readonly T this[int index]
|
||||||
|
{
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
get => value[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
static WideLane()
|
||||||
|
{
|
||||||
|
var pValues = stackalloc T[LaneWidth];
|
||||||
|
for (var i = 0; i < LaneWidth; i++)
|
||||||
|
{
|
||||||
|
pValues[i] = T.CreateChecked(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
s_indices = Vector.Load(pValues);
|
||||||
|
|
||||||
|
s_shuffleTable512_32bit = ShuffleTableGenerator.ComputeShuffleTable512_32Bit();
|
||||||
|
s_shuffleTable512_64bit = ShuffleTableGenerator.ComputeShuffleTable512_64Bit();
|
||||||
|
s_shuffleTable256_32bit = ShuffleTableGenerator.ComputeShuffleTable256_32Bit();
|
||||||
|
s_shuffleTable256_64bit = ShuffleTableGenerator.ComputeShuffleTable256_64Bit();
|
||||||
|
s_shuffleTable128_32bit = ShuffleTableGenerator.ComputeShuffleTable128_32Bit();
|
||||||
|
s_shuffleTable128_64bit = ShuffleTableGenerator.ComputeShuffleTable128_64Bit();
|
||||||
|
}
|
||||||
|
|
||||||
|
public WideLane(Vector<T> value)
|
||||||
|
{
|
||||||
|
this.value = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
private static Vector<T> VectorFloor(Vector<T> vector)
|
||||||
|
{
|
||||||
|
if (typeof(T) == typeof(float))
|
||||||
|
{
|
||||||
|
ref var v = ref Unsafe.As<Vector<T>, Vector<float>>(ref vector);
|
||||||
|
var floored = Vector.Floor(v);
|
||||||
|
return Unsafe.As<Vector<float>, Vector<T>>(ref floored);
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(double))
|
||||||
|
{
|
||||||
|
ref var v = ref Unsafe.As<Vector<T>, Vector<double>>(ref vector);
|
||||||
|
var floored = Vector.Floor(v);
|
||||||
|
return Unsafe.As<Vector<double>, Vector<T>>(ref floored);
|
||||||
|
}
|
||||||
|
|
||||||
|
return vector;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
private static Vector<T> VectorTruncate(Vector<T> vector)
|
||||||
|
{
|
||||||
|
if (typeof(T) == typeof(float))
|
||||||
|
{
|
||||||
|
ref var v = ref Unsafe.As<Vector<T>, Vector<float>>(ref vector);
|
||||||
|
var truncated = Vector.Truncate(v);
|
||||||
|
return Unsafe.As<Vector<float>, Vector<T>>(ref truncated);
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(double))
|
||||||
|
{
|
||||||
|
ref var v = ref Unsafe.As<Vector<T>, Vector<double>>(ref vector);
|
||||||
|
var truncated = Vector.Truncate(v);
|
||||||
|
return Unsafe.As<Vector<double>, Vector<T>>(ref truncated);
|
||||||
|
}
|
||||||
|
|
||||||
|
return vector;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Create(T value) => new(Vector.Create(value));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Create(params ReadOnlySpan<T> values) => new(Vector.Create(values));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Create(Vector<T> value) => new(value);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Sequence(T start, T step) => new(Vector.Create(start) + (Vector.Create(step) * s_indices));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Load(ref T value) => new(Vector.LoadUnsafe(ref value));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Load(T* pValue) => new(Vector.Load(pValue));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> CastFrom<U>(WideLane<U> value)
|
||||||
|
where U : unmanaged, INumber<U>, IMinMaxValue<U>, IBitwiseOperators<U, U, U>
|
||||||
|
{
|
||||||
|
return new(Unsafe.As<WideLane<U>, Vector<T>>(ref value));
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public readonly void Store(ref T destination) => value.StoreUnsafe(ref destination);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public readonly void Store(T* pDestination) => value.Store(pDestination);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public int CompressStore(WideLane<T> mask, ref T destination)
|
||||||
|
{
|
||||||
|
return CompressStore(mask, (T*)Unsafe.AsPointer(in destination));
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public int CompressStore(WideLane<T> mask, T* pDestination)
|
||||||
|
{
|
||||||
|
var size = sizeof(T);
|
||||||
|
|
||||||
|
if (LaneWidth == Vector512<T>.Count && Vector512.IsHardwareAccelerated)
|
||||||
|
{
|
||||||
|
if (size == 4)
|
||||||
|
{
|
||||||
|
ref var vec = ref Unsafe.As<WideLane<T>, Vector512<uint>>(ref Unsafe.AsRef(in this));
|
||||||
|
var m = Unsafe.As<WideLane<T>, Vector512<uint>>(ref mask);
|
||||||
|
|
||||||
|
var moveMask = m.ExtractMostSignificantBits();
|
||||||
|
// Offset is (moveMask * 16) because each control vector has 16 elements
|
||||||
|
var shuffle = Vector512.Load(s_shuffleTable512_32bit + (moveMask * 16));
|
||||||
|
var compressed = Vector512.Shuffle(vec, shuffle);
|
||||||
|
|
||||||
|
compressed.Store((uint*)pDestination);
|
||||||
|
return BitOperations.PopCount(moveMask);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (size == 8)
|
||||||
|
{
|
||||||
|
ref var vec = ref Unsafe.As<WideLane<T>, Vector512<ulong>>(ref Unsafe.AsRef(in this));
|
||||||
|
var m = Unsafe.As<WideLane<T>, Vector512<ulong>>(ref mask);
|
||||||
|
|
||||||
|
var moveMask = m.ExtractMostSignificantBits();
|
||||||
|
// Offset is (moveMask * 8) because each control vector has 8 elements
|
||||||
|
var shuffle = Vector512.Load(s_shuffleTable512_64bit + (moveMask * 8));
|
||||||
|
var compressed = Vector512.Shuffle(vec, shuffle);
|
||||||
|
|
||||||
|
compressed.Store((ulong*)pDestination);
|
||||||
|
return BitOperations.PopCount(moveMask);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (LaneWidth == Vector256<T>.Count && Vector256.IsHardwareAccelerated)
|
||||||
|
{
|
||||||
|
if (size == 4)
|
||||||
|
{
|
||||||
|
ref var vec = ref Unsafe.As<WideLane<T>, Vector256<uint>>(ref Unsafe.AsRef(in this));
|
||||||
|
var m = Unsafe.As<WideLane<T>, Vector256<uint>>(ref mask);
|
||||||
|
|
||||||
|
var moveMask = m.ExtractMostSignificantBits();
|
||||||
|
// Offset is (moveMask * 8) because each control vector has 8 elements
|
||||||
|
var shuffle = Vector256.Load(s_shuffleTable256_32bit + (moveMask * 8));
|
||||||
|
var compressed = Vector256.Shuffle(vec, shuffle);
|
||||||
|
|
||||||
|
compressed.Store((uint*)pDestination);
|
||||||
|
return BitOperations.PopCount(moveMask);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (size == 8)
|
||||||
|
{
|
||||||
|
ref var vec = ref Unsafe.As<WideLane<T>, Vector256<ulong>>(ref Unsafe.AsRef(in this));
|
||||||
|
var m = Unsafe.As<WideLane<T>, Vector256<ulong>>(ref mask);
|
||||||
|
|
||||||
|
// For 64-bit, ExtractMostSignificantBits only populates 4 bits (0-15)
|
||||||
|
var moveMask = m.ExtractMostSignificantBits();
|
||||||
|
|
||||||
|
// Offset is (moveMask * 4) because each control vector has 4 elements
|
||||||
|
var shuffle = Vector256.Load(s_shuffleTable256_64bit + (moveMask * 4));
|
||||||
|
var compressed = Vector256.Shuffle(vec, shuffle);
|
||||||
|
|
||||||
|
compressed.Store((ulong*)pDestination);
|
||||||
|
return BitOperations.PopCount(moveMask);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (LaneWidth == Vector128<T>.Count && Vector128.IsHardwareAccelerated)
|
||||||
|
{
|
||||||
|
if (size == 4)
|
||||||
|
{
|
||||||
|
ref var vec = ref Unsafe.As<WideLane<T>, Vector128<uint>>(ref Unsafe.AsRef(in this));
|
||||||
|
var m = Unsafe.As<WideLane<T>, Vector128<uint>>(ref mask);
|
||||||
|
|
||||||
|
var moveMask = m.ExtractMostSignificantBits();
|
||||||
|
// Offset is (moveMask * 4) because each control vector has 4 elements
|
||||||
|
var shuffle = Vector128.Load(s_shuffleTable128_32bit + (moveMask * 4));
|
||||||
|
var compressed = Vector128.Shuffle(vec, shuffle);
|
||||||
|
|
||||||
|
compressed.Store((uint*)pDestination);
|
||||||
|
return BitOperations.PopCount(moveMask);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (size == 8)
|
||||||
|
{
|
||||||
|
ref var vec = ref Unsafe.As<WideLane<T>, Vector128<ulong>>(ref Unsafe.AsRef(in this));
|
||||||
|
var m = Unsafe.As<WideLane<T>, Vector128<ulong>>(ref mask);
|
||||||
|
var moveMask = m.ExtractMostSignificantBits();
|
||||||
|
// Offset is (moveMask * 2) because each control vector has 2 elements
|
||||||
|
var shuffle = Vector128.Load(s_shuffleTable128_64bit + (moveMask * 2));
|
||||||
|
var compressed = Vector128.Shuffle(vec, shuffle);
|
||||||
|
compressed.Store((ulong*)pDestination);
|
||||||
|
return BitOperations.PopCount(moveMask);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is slow, but correct on ANY hardware.
|
||||||
|
// Check sign bit of the mask lane
|
||||||
|
var count = 0;
|
||||||
|
for (var i = 0; i < LaneWidth; i++)
|
||||||
|
{
|
||||||
|
if (mask.value[i] == ~T.Zero)
|
||||||
|
{
|
||||||
|
pDestination[count++] = value[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public readonly Vector<T> AsVector() => value;
|
||||||
|
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> operator +(WideLane<T> a, WideLane<T> b) => new(a.value + b.value);
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> operator +(WideLane<T> a, T b) => new(a.value + Vector.Create(b));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> operator -(WideLane<T> a, WideLane<T> b) => new(a.value - b.value);
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> operator -(WideLane<T> a, T b) => new(a.value - Vector.Create(b));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> operator *(WideLane<T> a, WideLane<T> b) => new(a.value * b.value);
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> operator *(WideLane<T> a, T b) => new(a.value * Vector.Create(b));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> operator /(WideLane<T> a, WideLane<T> b) => new(a.value / b.value);
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> operator /(WideLane<T> a, T b) => new(a.value / Vector.Create(b));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> operator %(WideLane<T> a, WideLane<T> b) => new(a.value - VectorFloor(a.value / b.value) * b.value);
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> operator %(WideLane<T> a, T b)
|
||||||
|
{
|
||||||
|
var vb = Vector.Create(b);
|
||||||
|
return new(a.value - VectorFloor(a.value / vb) * vb);
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> operator -(WideLane<T> a) => new(-a.value);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> operator &(WideLane<T> a, WideLane<T> b) => new(a.value & b.value);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> operator &(WideLane<T> a, T b) => new(a.value & Vector.Create(b));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> operator |(WideLane<T> a, WideLane<T> b) => new(a.value | b.value);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> operator |(WideLane<T> a, T b) => new(a.value | Vector.Create(b));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> operator ^(WideLane<T> a, WideLane<T> b) => new(a.value ^ b.value);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> operator ^(WideLane<T> a, T b) => new(a.value ^ Vector.Create(b));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> operator ~(WideLane<T> a) => new(~a.value);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Abs(WideLane<T> value) => new(Vector.Abs(value.value));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Floor(WideLane<T> value)
|
||||||
|
{
|
||||||
|
if (typeof(T) == typeof(float))
|
||||||
|
{
|
||||||
|
ref var v = ref Unsafe.As<WideLane<T>, Vector<float>>(ref value);
|
||||||
|
var floored = Vector.Floor(v);
|
||||||
|
return new WideLane<T>(Unsafe.As<Vector<float>, Vector<T>>(ref floored));
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(double))
|
||||||
|
{
|
||||||
|
ref var v = ref Unsafe.As<WideLane<T>, Vector<double>>(ref value);
|
||||||
|
var floored = Vector.Floor(v);
|
||||||
|
return new WideLane<T>(Unsafe.As<Vector<double>, Vector<T>>(ref floored));
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Frac(WideLane<T> value) => new(value.value - VectorFloor(value.value));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Sqrt(WideLane<T> value) => new(Vector.SquareRoot(value.value));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Lerp(WideLane<T> a, WideLane<T> b, WideLane<T> t) => new(a.value + (b.value - a.value) * t.value);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> MultipleAdd(WideLane<T> a, WideLane<T> b, WideLane<T> c)
|
||||||
|
{
|
||||||
|
if (typeof(T) == typeof(float))
|
||||||
|
{
|
||||||
|
ref var va = ref Unsafe.As<WideLane<T>, Vector<float>>(ref a);
|
||||||
|
ref var vb = ref Unsafe.As<WideLane<T>, Vector<float>>(ref b);
|
||||||
|
ref var vc = ref Unsafe.As<WideLane<T>, Vector<float>>(ref c);
|
||||||
|
var result = Vector.FusedMultiplyAdd(va, vb, vc);
|
||||||
|
return new WideLane<T>(Unsafe.As<Vector<float>, Vector<T>>(ref result));
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(double))
|
||||||
|
{
|
||||||
|
ref var va = ref Unsafe.As<WideLane<T>, Vector<double>>(ref a);
|
||||||
|
ref var vb = ref Unsafe.As<WideLane<T>, Vector<double>>(ref b);
|
||||||
|
ref var vc = ref Unsafe.As<WideLane<T>, Vector<double>>(ref c);
|
||||||
|
var result = Vector.FusedMultiplyAdd(va, vb, vc);
|
||||||
|
return new WideLane<T>(Unsafe.As<Vector<double>, Vector<T>>(ref result));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return new((a.value * b.value) + c.value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Min(WideLane<T> a, WideLane<T> b) => new(Vector.Min(a.value, b.value));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Max(WideLane<T> a, WideLane<T> b) => new(Vector.Max(a.value, b.value));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Clamp(WideLane<T> value, WideLane<T> min, WideLane<T> max) => new(Vector.Clamp(value.value, min.value, max.value));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Saturate(WideLane<T> value) => Clamp(value, Create(T.Zero), Create(T.One));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Sin(WideLane<T> value)
|
||||||
|
{
|
||||||
|
if (typeof(T) == typeof(float))
|
||||||
|
{
|
||||||
|
ref var v = ref Unsafe.As<WideLane<T>, Vector<float>>(ref value);
|
||||||
|
var result = Vector.Sin(v);
|
||||||
|
return new WideLane<T>(Unsafe.As<Vector<float>, Vector<T>>(ref result));
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(double))
|
||||||
|
{
|
||||||
|
ref var v = ref Unsafe.As<WideLane<T>, Vector<double>>(ref value);
|
||||||
|
var result = Vector.Sin(v);
|
||||||
|
return new WideLane<T>(Unsafe.As<Vector<double>, Vector<T>>(ref result));
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Cos(WideLane<T> value)
|
||||||
|
{
|
||||||
|
if (typeof(T) == typeof(float))
|
||||||
|
{
|
||||||
|
ref var v = ref Unsafe.As<WideLane<T>, Vector<float>>(ref value);
|
||||||
|
var result = Vector.Cos(v);
|
||||||
|
return new WideLane<T>(Unsafe.As<Vector<float>, Vector<T>>(ref result));
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(double))
|
||||||
|
{
|
||||||
|
ref var v = ref Unsafe.As<WideLane<T>, Vector<double>>(ref value);
|
||||||
|
var result = Vector.Cos(v);
|
||||||
|
return new WideLane<T>(Unsafe.As<Vector<double>, Vector<T>>(ref result));
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static (WideLane<T> sin, WideLane<T> cos) SinCos(WideLane<T> value)
|
||||||
|
{
|
||||||
|
if (typeof(T) == typeof(float))
|
||||||
|
{
|
||||||
|
ref var v = ref Unsafe.As<WideLane<T>, Vector<float>>(ref value);
|
||||||
|
var (sin, cos) = Vector.SinCos(v);
|
||||||
|
return (new WideLane<T>(Unsafe.As<Vector<float>, Vector<T>>(ref sin)), new WideLane<T>(Unsafe.As<Vector<float>, Vector<T>>(ref cos)));
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(double))
|
||||||
|
{
|
||||||
|
ref var v = ref Unsafe.As<WideLane<T>, Vector<double>>(ref value);
|
||||||
|
var (sin, cos) = Vector.SinCos(v);
|
||||||
|
return (new WideLane<T>(Unsafe.As<Vector<double>, Vector<T>>(ref sin)), new WideLane<T>(Unsafe.As<Vector<double>, Vector<T>>(ref cos)));
|
||||||
|
}
|
||||||
|
|
||||||
|
return (value, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Tan(WideLane<T> value)
|
||||||
|
{
|
||||||
|
// 1. Range Reduction
|
||||||
|
// Transform value into range [-pi/4, pi/4].
|
||||||
|
// This is complex to do right (Payne-Hanek), but for games
|
||||||
|
// a simple approximation: value = value - (PI * Round(value / PI)) is good enough.
|
||||||
|
|
||||||
|
var pi = Create(T.CreateChecked(Math.PI));
|
||||||
|
var x = value - pi * Round(value / pi);
|
||||||
|
|
||||||
|
// 2. The Approximation (Remez Polynomial)
|
||||||
|
// tan(value) ~= value + c1*value^3 + c2*value^5
|
||||||
|
// Factored (Horner's Method) for fewer ops: value * (1 + value^2 * (c1 + c2*value^2))
|
||||||
|
|
||||||
|
var x2 = x * x;
|
||||||
|
var vc1 = Create(T.CreateChecked(0.3333314036)); // 1/3
|
||||||
|
var vc2 = Create(T.CreateChecked(0.1333923995)); // 2/15
|
||||||
|
|
||||||
|
// x2 * (c1 + c2 * x2)
|
||||||
|
var poly = MultipleAdd(x2, vc2, vc1);
|
||||||
|
// value * (1 + x2 * poly)
|
||||||
|
return MultipleAdd(x, MultipleAdd(x2, poly, One), Zero);
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Asin(WideLane<T> value)
|
||||||
|
{
|
||||||
|
// asin(value) = pi/2 - acos(value)
|
||||||
|
|
||||||
|
var piOver2 = Create(T.CreateChecked(Math.PI / 2));
|
||||||
|
return piOver2 - Acos(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Acos(WideLane<T> value)
|
||||||
|
{
|
||||||
|
// 0 <= value <= 1 : acos(value) = sqrt(1 - value) * (c0 + c1*value + c2*value^2 + c3*value^3)
|
||||||
|
// value < 0 : acos(value) = pi - acos(-value)
|
||||||
|
|
||||||
|
var x = Abs(value);
|
||||||
|
|
||||||
|
var c0 = Create(T.CreateChecked(1.5707288f)); // pi/2
|
||||||
|
var c1 = Create(T.CreateChecked(-0.2121144f));
|
||||||
|
var c2 = Create(T.CreateChecked(0.0742610f));
|
||||||
|
var c3 = Create(T.CreateChecked(-0.0187293f));
|
||||||
|
|
||||||
|
var term1 = MultipleAdd(x, c3, c2);
|
||||||
|
var term2 = MultipleAdd(x, term1, c1);
|
||||||
|
var poly = MultipleAdd(x, term2, c0);
|
||||||
|
|
||||||
|
var sqrtTerm = Sqrt(One - x);
|
||||||
|
var result = poly * sqrtTerm;
|
||||||
|
|
||||||
|
var pi = Create(T.CreateChecked(Math.PI));
|
||||||
|
var isNegative = LessThan(value, Zero);
|
||||||
|
|
||||||
|
return Select(isNegative, pi - result, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Atan(WideLane<T> value)
|
||||||
|
{
|
||||||
|
// atan(value) = value * (c1 + c2*value^2)
|
||||||
|
|
||||||
|
var c1 = Create(T.CreateChecked(0.97239411f));
|
||||||
|
var c2 = Create(T.CreateChecked(-0.19194795f));
|
||||||
|
|
||||||
|
var x2 = value * value;
|
||||||
|
var poly = MultipleAdd(x2, c2, c1);
|
||||||
|
return value * poly;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Atan2(WideLane<T> y, WideLane<T> x)
|
||||||
|
{
|
||||||
|
var absX = Abs(x);
|
||||||
|
var absY = Abs(y);
|
||||||
|
|
||||||
|
// 1. Determine the ratio (input to Atan)
|
||||||
|
// If |value| > |y|, we are in the "shallow" region, ratio = y/value
|
||||||
|
// If |y| > |value|, we are in the "steep" region, ratio = value/y (and we transform result)
|
||||||
|
var yGtX = GreaterThan(absY, absX);
|
||||||
|
|
||||||
|
// Select numerator and denominator to ensure ratio is always in [-1, 1]
|
||||||
|
var num = Select(yGtX, absX, absY);
|
||||||
|
var den = Select(yGtX, absY, absX);
|
||||||
|
|
||||||
|
var t = num / den; // t is now in [0, 1]
|
||||||
|
var t2 = t * t;
|
||||||
|
|
||||||
|
// 2. Polynomial Approximation (Odd function: value * (c1 + c2*value^2))
|
||||||
|
var c1 = Create(T.CreateChecked(0.97239411f));
|
||||||
|
var c2 = Create(T.CreateChecked(-0.19194795f));
|
||||||
|
|
||||||
|
// (c1 + c2 * t2)
|
||||||
|
var poly = MultipleAdd(c2, t2, c1);
|
||||||
|
|
||||||
|
// result = t * poly
|
||||||
|
var result = t * poly;
|
||||||
|
|
||||||
|
// 3. Reconstruct the angle
|
||||||
|
// If we swapped value/y (yGtX), the identity is: atan(value/y) = PI/2 - atan(y/value)
|
||||||
|
var halfPi = Create(T.CreateChecked(1.570796327f));
|
||||||
|
result = Select(yGtX, halfPi - result, result);
|
||||||
|
|
||||||
|
// 4. Adjust for Quadrants (Signs)
|
||||||
|
// If value < 0, we are in quadrants 2 or 3, so we need to add PI
|
||||||
|
var pi = Create(T.CreateChecked(3.141592654f));
|
||||||
|
var xLtZero = LessThan(x, Zero);
|
||||||
|
result = Select(xLtZero, pi - result, result);
|
||||||
|
|
||||||
|
// If y < 0, the result should be negative (standard atan2 convention)
|
||||||
|
// NOTE: This sign flip strategy depends on exact polynomial range mapping,
|
||||||
|
// but typically just copy the sign of Y to the result.
|
||||||
|
var yLtZero = LessThan(y, Zero);
|
||||||
|
// If original Y was negative, negate the result
|
||||||
|
// (This works because our ratio logic effectively computed atan(|y|/|value|) above)
|
||||||
|
var negativeResult = -result;
|
||||||
|
return Select(yLtZero, negativeResult, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Pow(WideLane<T> x, WideLane<T> y) => Exp(y * Log(x));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Exp(WideLane<T> value)
|
||||||
|
{
|
||||||
|
if (typeof(T) == typeof(float))
|
||||||
|
{
|
||||||
|
ref var v = ref Unsafe.As<WideLane<T>, Vector<float>>(ref value);
|
||||||
|
var result = Vector.Exp(v);
|
||||||
|
return new WideLane<T>(Unsafe.As<Vector<float>, Vector<T>>(ref result));
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(double))
|
||||||
|
{
|
||||||
|
ref var v = ref Unsafe.As<WideLane<T>, Vector<double>>(ref value);
|
||||||
|
var result = Vector.Exp(v);
|
||||||
|
return new WideLane<T>(Unsafe.As<Vector<double>, Vector<T>>(ref result));
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Exp2(WideLane<T> value)
|
||||||
|
{
|
||||||
|
return Pow(Create(T.CreateChecked(2)), value);
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Log(WideLane<T> value)
|
||||||
|
{
|
||||||
|
if (typeof(T) == typeof(float))
|
||||||
|
{
|
||||||
|
ref var v = ref Unsafe.As<WideLane<T>, Vector<float>>(ref value);
|
||||||
|
var result = Vector.Log(v);
|
||||||
|
return new WideLane<T>(Unsafe.As<Vector<float>, Vector<T>>(ref result));
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(double))
|
||||||
|
{
|
||||||
|
ref var v = ref Unsafe.As<WideLane<T>, Vector<double>>(ref value);
|
||||||
|
var result = Vector.Log(v);
|
||||||
|
return new WideLane<T>(Unsafe.As<Vector<double>, Vector<T>>(ref result));
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Log2(WideLane<T> value)
|
||||||
|
{
|
||||||
|
if (typeof(T) == typeof(float))
|
||||||
|
{
|
||||||
|
ref var v = ref Unsafe.As<WideLane<T>, Vector<float>>(ref value);
|
||||||
|
var result = Vector.Log2(v);
|
||||||
|
return new WideLane<T>(Unsafe.As<Vector<float>, Vector<T>>(ref result));
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(double))
|
||||||
|
{
|
||||||
|
ref var v = ref Unsafe.As<WideLane<T>, Vector<double>>(ref value);
|
||||||
|
var result = Vector.Log2(v);
|
||||||
|
return new WideLane<T>(Unsafe.As<Vector<double>, Vector<T>>(ref result));
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Ceil(WideLane<T> value)
|
||||||
|
{
|
||||||
|
if (typeof(T) == typeof(float))
|
||||||
|
{
|
||||||
|
ref var v = ref Unsafe.As<WideLane<T>, Vector<float>>(ref value);
|
||||||
|
var result = Vector.Ceiling(v);
|
||||||
|
return new WideLane<T>(Unsafe.As<Vector<float>, Vector<T>>(ref result));
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(double))
|
||||||
|
{
|
||||||
|
ref var v = ref Unsafe.As<WideLane<T>, Vector<double>>(ref value);
|
||||||
|
var result = Vector.Ceiling(v);
|
||||||
|
return new WideLane<T>(Unsafe.As<Vector<double>, Vector<T>>(ref result));
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Round(WideLane<T> value)
|
||||||
|
{
|
||||||
|
if (typeof(T) == typeof(float))
|
||||||
|
{
|
||||||
|
ref var v = ref Unsafe.As<WideLane<T>, Vector<float>>(ref value);
|
||||||
|
var result = Vector.Round(v);
|
||||||
|
return new WideLane<T>(Unsafe.As<Vector<float>, Vector<T>>(ref result));
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(double))
|
||||||
|
{
|
||||||
|
ref var v = ref Unsafe.As<WideLane<T>, Vector<double>>(ref value);
|
||||||
|
var result = Vector.Round(v);
|
||||||
|
return new WideLane<T>(Unsafe.As<Vector<double>, Vector<T>>(ref result));
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Trunc(WideLane<T> value)
|
||||||
|
{
|
||||||
|
if (typeof(T) == typeof(float))
|
||||||
|
{
|
||||||
|
ref var v = ref Unsafe.As<WideLane<T>, Vector<float>>(ref value);
|
||||||
|
var result = Vector.Truncate(v);
|
||||||
|
return new WideLane<T>(Unsafe.As<Vector<float>, Vector<T>>(ref result));
|
||||||
|
}
|
||||||
|
else if (typeof(T) == typeof(double))
|
||||||
|
{
|
||||||
|
ref var v = ref Unsafe.As<WideLane<T>, Vector<double>>(ref value);
|
||||||
|
var result = Vector.Truncate(v);
|
||||||
|
return new WideLane<T>(Unsafe.As<Vector<double>, Vector<T>>(ref result));
|
||||||
|
}
|
||||||
|
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Sign(WideLane<T> value) => Select(
|
||||||
|
GreaterThan(value, Zero),
|
||||||
|
One,
|
||||||
|
Select(
|
||||||
|
LessThan(value, Zero),
|
||||||
|
~Zero,
|
||||||
|
Zero));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> CopySign(WideLane<T> magnitude, WideLane<T> sign) => new(Vector.CopySign(magnitude.value, sign.value));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Rcp(WideLane<T> value)
|
||||||
|
{
|
||||||
|
if (typeof(T) == typeof(float))
|
||||||
|
{
|
||||||
|
if (Sse.IsSupported && LaneWidth == Vector128<float>.Count)
|
||||||
|
{
|
||||||
|
var vf = Unsafe.As<WideLane<T>, Vector128<float>>(ref value);
|
||||||
|
var result = Sse.Reciprocal(vf);
|
||||||
|
return Unsafe.As<Vector128<float>, WideLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
else if (Avx.IsSupported && LaneWidth == Vector256<float>.Count)
|
||||||
|
{
|
||||||
|
var vf = Unsafe.As<WideLane<T>, Vector256<float>>(ref value);
|
||||||
|
var result = Avx.Reciprocal(vf);
|
||||||
|
return Unsafe.As<Vector256<float>, WideLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Create(T.One) / value;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Rsqrt(WideLane<T> value)
|
||||||
|
{
|
||||||
|
if (typeof(T) == typeof(float))
|
||||||
|
{
|
||||||
|
if (Sse.IsSupported && LaneWidth == Vector128<float>.Count)
|
||||||
|
{
|
||||||
|
var vf = Unsafe.As<WideLane<T>, Vector128<float>>(ref value);
|
||||||
|
var result = Sse.ReciprocalSqrt(vf);
|
||||||
|
return Unsafe.As<Vector128<float>, WideLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
else if (Avx.IsSupported && LaneWidth == Vector256<float>.Count)
|
||||||
|
{
|
||||||
|
var vf = Unsafe.As<WideLane<T>, Vector256<float>>(ref value);
|
||||||
|
var result = Avx.ReciprocalSqrt(vf);
|
||||||
|
return Unsafe.As<Vector256<float>, WideLane<T>>(ref result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Create(T.One) / Sqrt(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Select(WideLane<T> conditionMask, WideLane<T> ifTrue, WideLane<T> ifFalse)
|
||||||
|
=> new(Vector.ConditionalSelect(
|
||||||
|
conditionMask.value,
|
||||||
|
ifTrue.value,
|
||||||
|
ifFalse.value));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> GreaterThan(WideLane<T> a, WideLane<T> b) => new(Vector.GreaterThan(a.value, b.value));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> GreaterThanOrEqual(WideLane<T> a, WideLane<T> b) => new(Vector.GreaterThanOrEqual(a.value, b.value));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> LessThan(WideLane<T> a, WideLane<T> b) => new(Vector.LessThan(a.value, b.value));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> LessThanOrEqual(WideLane<T> a, WideLane<T> b) => new(Vector.LessThanOrEqual(a.value, b.value));
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static WideLane<T> Equal(WideLane<T> a, WideLane<T> b) => new(Vector.Equals(a.value, b.value));
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static bool Any(WideLane<T> mask) => !Vector.EqualsAll(mask.value, Vector<T>.Zero);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static bool All(WideLane<T> mask) => Vector.EqualsAll(mask.value, Vector<T>.AllBitsSet);
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static bool None(WideLane<T> mask) => Vector.EqualsAll(mask.value, Vector<T>.Zero);
|
||||||
|
|
||||||
|
|
||||||
|
public override string ToString()
|
||||||
|
{
|
||||||
|
return value.ToString();
|
||||||
|
}
|
||||||
|
}
|
||||||
9
Misaki.HighPerformance.Mathematics/AutoSIMDAttribute.cs
Normal file
9
Misaki.HighPerformance.Mathematics/AutoSIMDAttribute.cs
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Text;
|
||||||
|
|
||||||
|
namespace Misaki.HighPerformance.Mathematics;
|
||||||
|
|
||||||
|
internal class AutoSIMDAttribute
|
||||||
|
{
|
||||||
|
}
|
||||||
@@ -2503,7 +2503,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static float tan(float x)
|
public static float tan(float x)
|
||||||
{
|
{
|
||||||
return (float)System.Math.Tan(x);
|
return MathF.Tan(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise tangent of a float2 vector.</summary>
|
/// <summary>Returns the componentwise tangent of a float2 vector.</summary>
|
||||||
@@ -2540,7 +2540,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static double tan(double x)
|
public static double tan(double x)
|
||||||
{
|
{
|
||||||
return System.Math.Tan(x);
|
return Math.Tan(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise tangent of a double2 vector.</summary>
|
/// <summary>Returns the componentwise tangent of a double2 vector.</summary>
|
||||||
@@ -2577,7 +2577,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static float tanh(float x)
|
public static float tanh(float x)
|
||||||
{
|
{
|
||||||
return (float)System.Math.Tanh(x);
|
return MathF.Tanh(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise hyperbolic tangent of a float2 vector.</summary>
|
/// <summary>Returns the componentwise hyperbolic tangent of a float2 vector.</summary>
|
||||||
@@ -2614,7 +2614,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static double tanh(double x)
|
public static double tanh(double x)
|
||||||
{
|
{
|
||||||
return System.Math.Tanh(x);
|
return Math.Tanh(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise hyperbolic tangent of a double2 vector.</summary>
|
/// <summary>Returns the componentwise hyperbolic tangent of a double2 vector.</summary>
|
||||||
@@ -2651,7 +2651,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static float atan(float x)
|
public static float atan(float x)
|
||||||
{
|
{
|
||||||
return (float)System.Math.Atan(x);
|
return MathF.Atan(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise arctangent of a float2 vector.</summary>
|
/// <summary>Returns the componentwise arctangent of a float2 vector.</summary>
|
||||||
@@ -2688,7 +2688,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static double atan(double x)
|
public static double atan(double x)
|
||||||
{
|
{
|
||||||
return System.Math.Atan(x);
|
return Math.Atan(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise arctangent of a double2 vector.</summary>
|
/// <summary>Returns the componentwise arctangent of a double2 vector.</summary>
|
||||||
@@ -2726,7 +2726,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static float atan2(float y, float x)
|
public static float atan2(float y, float x)
|
||||||
{
|
{
|
||||||
return (float)System.Math.Atan2(y, x);
|
return MathF.Atan2(y, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise 2-argument arctangent of a pair of floats2 vectors.</summary>
|
/// <summary>Returns the componentwise 2-argument arctangent of a pair of floats2 vectors.</summary>
|
||||||
@@ -2767,7 +2767,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static double atan2(double y, double x)
|
public static double atan2(double y, double x)
|
||||||
{
|
{
|
||||||
return System.Math.Atan2(y, x);
|
return Math.Atan2(y, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the 2-argument arctangent of a pair of double2 vectors.</summary>
|
/// <summary>Returns the 2-argument arctangent of a pair of double2 vectors.</summary>
|
||||||
@@ -2807,7 +2807,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static float cos(float x)
|
public static float cos(float x)
|
||||||
{
|
{
|
||||||
return (float)System.Math.Cos(x);
|
return MathF.Cos(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise cosine of a float2 vector.</summary>
|
/// <summary>Returns the componentwise cosine of a float2 vector.</summary>
|
||||||
@@ -2844,7 +2844,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static double cos(double x)
|
public static double cos(double x)
|
||||||
{
|
{
|
||||||
return System.Math.Cos(x);
|
return Math.Cos(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise cosine of a double2 vector.</summary>
|
/// <summary>Returns the componentwise cosine of a double2 vector.</summary>
|
||||||
@@ -2881,7 +2881,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static float cosh(float x)
|
public static float cosh(float x)
|
||||||
{
|
{
|
||||||
return (float)System.Math.Cosh(x);
|
return MathF.Cosh(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise hyperbolic cosine of a float2 vector.</summary>
|
/// <summary>Returns the componentwise hyperbolic cosine of a float2 vector.</summary>
|
||||||
@@ -2918,7 +2918,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static double cosh(double x)
|
public static double cosh(double x)
|
||||||
{
|
{
|
||||||
return System.Math.Cosh(x);
|
return Math.Cosh(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise hyperbolic cosine of a double2 vector.</summary>
|
/// <summary>Returns the componentwise hyperbolic cosine of a double2 vector.</summary>
|
||||||
@@ -2955,7 +2955,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static float acos(float x)
|
public static float acos(float x)
|
||||||
{
|
{
|
||||||
return (float)System.Math.Acos((float)x);
|
return MathF.Acos(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise arccosine of a float2 vector.</summary>
|
/// <summary>Returns the componentwise arccosine of a float2 vector.</summary>
|
||||||
@@ -2992,7 +2992,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static double acos(double x)
|
public static double acos(double x)
|
||||||
{
|
{
|
||||||
return System.Math.Acos(x);
|
return Math.Acos(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise arccosine of a double2 vector.</summary>
|
/// <summary>Returns the componentwise arccosine of a double2 vector.</summary>
|
||||||
@@ -3029,7 +3029,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static float sin(float x)
|
public static float sin(float x)
|
||||||
{
|
{
|
||||||
return (float)System.Math.Sin((float)x);
|
return MathF.Sin(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise sine of a float2 vector.</summary>
|
/// <summary>Returns the componentwise sine of a float2 vector.</summary>
|
||||||
@@ -3066,7 +3066,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static double sin(double x)
|
public static double sin(double x)
|
||||||
{
|
{
|
||||||
return System.Math.Sin(x);
|
return Math.Sin(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise sine of a double2 vector.</summary>
|
/// <summary>Returns the componentwise sine of a double2 vector.</summary>
|
||||||
@@ -3103,7 +3103,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static float sinh(float x)
|
public static float sinh(float x)
|
||||||
{
|
{
|
||||||
return (float)System.Math.Sinh((float)x);
|
return MathF.Sinh(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise hyperbolic sine of a float2 vector.</summary>
|
/// <summary>Returns the componentwise hyperbolic sine of a float2 vector.</summary>
|
||||||
@@ -3140,7 +3140,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static double sinh(double x)
|
public static double sinh(double x)
|
||||||
{
|
{
|
||||||
return System.Math.Sinh(x);
|
return Math.Sinh(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise hyperbolic sine of a double2 vector.</summary>
|
/// <summary>Returns the componentwise hyperbolic sine of a double2 vector.</summary>
|
||||||
@@ -3177,7 +3177,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static float asin(float x)
|
public static float asin(float x)
|
||||||
{
|
{
|
||||||
return (float)System.Math.Asin((float)x);
|
return MathF.Asin(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise arcsine of a float2 vector.</summary>
|
/// <summary>Returns the componentwise arcsine of a float2 vector.</summary>
|
||||||
@@ -3214,7 +3214,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static double asin(double x)
|
public static double asin(double x)
|
||||||
{
|
{
|
||||||
return System.Math.Asin(x);
|
return Math.Asin(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise arcsine of a double2 vector.</summary>
|
/// <summary>Returns the componentwise arcsine of a double2 vector.</summary>
|
||||||
@@ -3251,7 +3251,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static float floor(float x)
|
public static float floor(float x)
|
||||||
{
|
{
|
||||||
return (float)System.Math.Floor((float)x);
|
return MathF.Floor(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the result of rounding each component of a float2 vector value down to the nearest value less or equal to the original value.</summary>
|
/// <summary>Returns the result of rounding each component of a float2 vector value down to the nearest value less or equal to the original value.</summary>
|
||||||
@@ -3288,7 +3288,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static double floor(double x)
|
public static double floor(double x)
|
||||||
{
|
{
|
||||||
return System.Math.Floor(x);
|
return Math.Floor(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the result of rounding each component of a double2 vector value down to the nearest value less or equal to the original value.</summary>
|
/// <summary>Returns the result of rounding each component of a double2 vector value down to the nearest value less or equal to the original value.</summary>
|
||||||
@@ -3325,7 +3325,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static float ceil(float x)
|
public static float ceil(float x)
|
||||||
{
|
{
|
||||||
return (float)System.Math.Ceiling((float)x);
|
return MathF.Ceiling(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the result of rounding each component of a float2 vector value up to the nearest value greater or equal to the original value.</summary>
|
/// <summary>Returns the result of rounding each component of a float2 vector value up to the nearest value greater or equal to the original value.</summary>
|
||||||
@@ -3362,7 +3362,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static double ceil(double x)
|
public static double ceil(double x)
|
||||||
{
|
{
|
||||||
return System.Math.Ceiling(x);
|
return Math.Ceiling(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the result of rounding each component of a double2 vector value up to the nearest integral value greater or equal to the original value.</summary>
|
/// <summary>Returns the result of rounding each component of a double2 vector value up to the nearest integral value greater or equal to the original value.</summary>
|
||||||
@@ -3399,7 +3399,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static float round(float x)
|
public static float round(float x)
|
||||||
{
|
{
|
||||||
return (float)System.Math.Round((float)x);
|
return MathF.Round(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the result of rounding each component of a float2 vector value to the nearest integral value.</summary>
|
/// <summary>Returns the result of rounding each component of a float2 vector value to the nearest integral value.</summary>
|
||||||
@@ -3436,7 +3436,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static double round(double x)
|
public static double round(double x)
|
||||||
{
|
{
|
||||||
return System.Math.Round(x);
|
return Math.Round(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the result of rounding each component of a double2 vector value to the nearest integral value.</summary>
|
/// <summary>Returns the result of rounding each component of a double2 vector value to the nearest integral value.</summary>
|
||||||
@@ -3473,7 +3473,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static float trunc(float x)
|
public static float trunc(float x)
|
||||||
{
|
{
|
||||||
return (float)System.Math.Truncate((float)x);
|
return MathF.Truncate(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the result of a componentwise truncation of a float2 value to an integral float2 value.</summary>
|
/// <summary>Returns the result of a componentwise truncation of a float2 value to an integral float2 value.</summary>
|
||||||
@@ -3500,7 +3500,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static float4 trunc(float4 x)
|
public static float4 trunc(float4 x)
|
||||||
{
|
{
|
||||||
return Vector128.Truncate(x.AsVector128()).Asfloat4();
|
return Vector128.Truncate(x.__v).Asfloat4();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -3510,7 +3510,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static double trunc(double x)
|
public static double trunc(double x)
|
||||||
{
|
{
|
||||||
return System.Math.Truncate(x);
|
return Math.Truncate(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the result of a componentwise truncation of a double2 value to an integral double2 value.</summary>
|
/// <summary>Returns the result of a componentwise truncation of a double2 value to an integral double2 value.</summary>
|
||||||
@@ -3537,7 +3537,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static double4 trunc(double4 x)
|
public static double4 trunc(double4 x)
|
||||||
{
|
{
|
||||||
return Vector256.Truncate(x.AsVector256()).Asdouble4();
|
return Vector256.Truncate(x.__v).Asdouble4();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -3805,7 +3805,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static float pow(float x, float y)
|
public static float pow(float x, float y)
|
||||||
{
|
{
|
||||||
return (float)System.Math.Pow((float)x, (float)y);
|
return MathF.Pow(x, (float)y);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise result of raising x to the power y.</summary>
|
/// <summary>Returns the componentwise result of raising x to the power y.</summary>
|
||||||
@@ -3846,7 +3846,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static double pow(double x, double y)
|
public static double pow(double x, double y)
|
||||||
{
|
{
|
||||||
return System.Math.Pow(x, y);
|
return Math.Pow(x, y);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise result of raising x to the power y.</summary>
|
/// <summary>Returns the componentwise result of raising x to the power y.</summary>
|
||||||
@@ -3886,7 +3886,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static float exp(float x)
|
public static float exp(float x)
|
||||||
{
|
{
|
||||||
return (float)System.Math.Exp((float)x);
|
return MathF.Exp(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise base-e exponential of x.</summary>
|
/// <summary>Returns the componentwise base-e exponential of x.</summary>
|
||||||
@@ -3923,7 +3923,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static double exp(double x)
|
public static double exp(double x)
|
||||||
{
|
{
|
||||||
return System.Math.Exp(x);
|
return Math.Exp(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise base-e exponential of x.</summary>
|
/// <summary>Returns the componentwise base-e exponential of x.</summary>
|
||||||
@@ -3960,7 +3960,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static float exp2(float x)
|
public static float exp2(float x)
|
||||||
{
|
{
|
||||||
return (float)System.Math.Exp((float)x * 0.69314718f);
|
return MathF.Exp(x * 0.69314718f);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise base-2 exponential of x.</summary>
|
/// <summary>Returns the componentwise base-2 exponential of x.</summary>
|
||||||
@@ -3997,7 +3997,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static double exp2(double x)
|
public static double exp2(double x)
|
||||||
{
|
{
|
||||||
return System.Math.Exp(x * 0.693147180559945309);
|
return Math.Exp(x * 0.693147180559945309);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise base-2 exponential of x.</summary>
|
/// <summary>Returns the componentwise base-2 exponential of x.</summary>
|
||||||
@@ -4034,7 +4034,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static float exp10(float x)
|
public static float exp10(float x)
|
||||||
{
|
{
|
||||||
return (float)System.Math.Exp((float)x * 2.30258509f);
|
return MathF.Exp(x * 2.30258509f);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise base-10 exponential of x.</summary>
|
/// <summary>Returns the componentwise base-10 exponential of x.</summary>
|
||||||
@@ -4071,7 +4071,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static double exp10(double x)
|
public static double exp10(double x)
|
||||||
{
|
{
|
||||||
return System.Math.Exp(x * 2.302585092994045684);
|
return Math.Exp(x * 2.302585092994045684);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise base-10 exponential of x.</summary>
|
/// <summary>Returns the componentwise base-10 exponential of x.</summary>
|
||||||
@@ -4108,7 +4108,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static float log(float x)
|
public static float log(float x)
|
||||||
{
|
{
|
||||||
return (float)System.Math.Log((float)x);
|
return MathF.Log(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise natural logarithm of a float2 vector.</summary>
|
/// <summary>Returns the componentwise natural logarithm of a float2 vector.</summary>
|
||||||
@@ -4145,7 +4145,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static double log(double x)
|
public static double log(double x)
|
||||||
{
|
{
|
||||||
return System.Math.Log(x);
|
return Math.Log(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise natural logarithm of a double2 vector.</summary>
|
/// <summary>Returns the componentwise natural logarithm of a double2 vector.</summary>
|
||||||
@@ -4182,7 +4182,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static float log2(float x)
|
public static float log2(float x)
|
||||||
{
|
{
|
||||||
return (float)System.Math.Log((float)x, 2.0f);
|
return MathF.Log(x, 2.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise base-2 logarithm of a float2 vector.</summary>
|
/// <summary>Returns the componentwise base-2 logarithm of a float2 vector.</summary>
|
||||||
@@ -4219,7 +4219,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static double log2(double x)
|
public static double log2(double x)
|
||||||
{
|
{
|
||||||
return System.Math.Log(x, 2.0);
|
return Math.Log(x, 2.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise base-2 logarithm of a double2 vector.</summary>
|
/// <summary>Returns the componentwise base-2 logarithm of a double2 vector.</summary>
|
||||||
@@ -4255,7 +4255,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static float log10(float x)
|
public static float log10(float x)
|
||||||
{
|
{
|
||||||
return (float)System.Math.Log10((float)x);
|
return MathF.Log10(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise base-10 logarithm of a float2 vector.</summary>
|
/// <summary>Returns the componentwise base-10 logarithm of a float2 vector.</summary>
|
||||||
@@ -4292,7 +4292,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static double log10(double x)
|
public static double log10(double x)
|
||||||
{
|
{
|
||||||
return System.Math.Log10(x);
|
return Math.Log10(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise base-10 logarithm of a double2 vector.</summary>
|
/// <summary>Returns the componentwise base-10 logarithm of a double2 vector.</summary>
|
||||||
@@ -4519,7 +4519,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static float sqrt(float x)
|
public static float sqrt(float x)
|
||||||
{
|
{
|
||||||
return (float)System.Math.Sqrt((float)x);
|
return MathF.Sqrt(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise square root of a float2 vector.</summary>
|
/// <summary>Returns the componentwise square root of a float2 vector.</summary>
|
||||||
@@ -4556,7 +4556,7 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static double sqrt(double x)
|
public static double sqrt(double x)
|
||||||
{
|
{
|
||||||
return System.Math.Sqrt(x);
|
return Math.Sqrt(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>Returns the componentwise square root of a double2 vector.</summary>
|
/// <summary>Returns the componentwise square root of a double2 vector.</summary>
|
||||||
@@ -4727,8 +4727,8 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static float2 normalizesafe(float2 x, float2 defaultvalue = new float2())
|
public static float2 normalizesafe(float2 x, float2 defaultvalue = new float2())
|
||||||
{
|
{
|
||||||
var len = math.dot(x, x);
|
var len = dot(x, x);
|
||||||
return math.select(defaultvalue, x * math.rsqrt(len), len > FLT_MIN_NORMAL);
|
return select(defaultvalue, x * rsqrt(len), len > FLT_MIN_NORMAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@@ -4741,8 +4741,8 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static float3 normalizesafe(float3 x, float3 defaultvalue = new float3())
|
public static float3 normalizesafe(float3 x, float3 defaultvalue = new float3())
|
||||||
{
|
{
|
||||||
var len = math.dot(x, x);
|
var len = dot(x, x);
|
||||||
return math.select(defaultvalue, x * math.rsqrt(len), len > FLT_MIN_NORMAL);
|
return select(defaultvalue, x * rsqrt(len), len > FLT_MIN_NORMAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@@ -4755,8 +4755,8 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static float4 normalizesafe(float4 x, float4 defaultvalue = new float4())
|
public static float4 normalizesafe(float4 x, float4 defaultvalue = new float4())
|
||||||
{
|
{
|
||||||
var len = math.dot(x, x);
|
var len = dot(x, x);
|
||||||
return math.select(defaultvalue, x * math.rsqrt(len), len > FLT_MIN_NORMAL);
|
return select(defaultvalue, x * rsqrt(len), len > FLT_MIN_NORMAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -4770,8 +4770,8 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static double2 normalizesafe(double2 x, double2 defaultvalue = new double2())
|
public static double2 normalizesafe(double2 x, double2 defaultvalue = new double2())
|
||||||
{
|
{
|
||||||
var len = math.dot(x, x);
|
var len = dot(x, x);
|
||||||
return math.select(defaultvalue, x * math.rsqrt(len), len > FLT_MIN_NORMAL);
|
return select(defaultvalue, x * rsqrt(len), len > FLT_MIN_NORMAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@@ -4784,8 +4784,8 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static double3 normalizesafe(double3 x, double3 defaultvalue = new double3())
|
public static double3 normalizesafe(double3 x, double3 defaultvalue = new double3())
|
||||||
{
|
{
|
||||||
var len = math.dot(x, x);
|
var len = dot(x, x);
|
||||||
return math.select(defaultvalue, x * math.rsqrt(len), len > FLT_MIN_NORMAL);
|
return select(defaultvalue, x * rsqrt(len), len > FLT_MIN_NORMAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@@ -4798,8 +4798,8 @@ public static partial class math
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static double4 normalizesafe(double4 x, double4 defaultvalue = new double4())
|
public static double4 normalizesafe(double4 x, double4 defaultvalue = new double4())
|
||||||
{
|
{
|
||||||
var len = math.dot(x, x);
|
var len = dot(x, x);
|
||||||
return math.select(defaultvalue, x * math.rsqrt(len), len > FLT_MIN_NORMAL);
|
return select(defaultvalue, x * rsqrt(len), len > FLT_MIN_NORMAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -8665,7 +8665,7 @@ public static partial class math
|
|||||||
var d1 = qv * qv.wwww * float4(2f); //xw, yw, zw, ww
|
var d1 = qv * qv.wwww * float4(2f); //xw, yw, zw, ww
|
||||||
var d2 = qv * qv.yzxw * float4(2f); //xy, yz, zx, ww
|
var d2 = qv * qv.yzxw * float4(2f); //xy, yz, zx, ww
|
||||||
var d3 = qv * qv;
|
var d3 = qv * qv;
|
||||||
var euler = Misaki.HighPerformance.Mathematics.float3.zero;
|
var euler = Mathematics.float3.zero;
|
||||||
|
|
||||||
var y1 = d2.z - d1.y;
|
var y1 = d2.z - d1.y;
|
||||||
if (y1 * y1 < cutoff)
|
if (y1 * y1 < cutoff)
|
||||||
@@ -8705,7 +8705,7 @@ public static partial class math
|
|||||||
var d1 = qv * qv.wwww * float4(2f); //xw, yw, zw, ww
|
var d1 = qv * qv.wwww * float4(2f); //xw, yw, zw, ww
|
||||||
var d2 = qv * qv.yzxw * float4(2f); //xy, yz, zx, ww
|
var d2 = qv * qv.yzxw * float4(2f); //xy, yz, zx, ww
|
||||||
var d3 = qv * qv;
|
var d3 = qv * qv;
|
||||||
var euler = Misaki.HighPerformance.Mathematics.float3.zero;
|
var euler = Mathematics.float3.zero;
|
||||||
|
|
||||||
var y1 = d2.x + d1.z;
|
var y1 = d2.x + d1.z;
|
||||||
if (y1 * y1 < cutoff)
|
if (y1 * y1 < cutoff)
|
||||||
@@ -8745,7 +8745,7 @@ public static partial class math
|
|||||||
var d1 = qv * qv.wwww * float4(2f); //xw, yw, zw, ww
|
var d1 = qv * qv.wwww * float4(2f); //xw, yw, zw, ww
|
||||||
var d2 = qv * qv.yzxw * float4(2f); //xy, yz, zx, ww
|
var d2 = qv * qv.yzxw * float4(2f); //xy, yz, zx, ww
|
||||||
var d3 = qv * qv;
|
var d3 = qv * qv;
|
||||||
var euler = Misaki.HighPerformance.Mathematics.float3.zero;
|
var euler = Mathematics.float3.zero;
|
||||||
|
|
||||||
var y1 = d2.y + d1.x;
|
var y1 = d2.y + d1.x;
|
||||||
if (y1 * y1 < cutoff)
|
if (y1 * y1 < cutoff)
|
||||||
@@ -8785,7 +8785,7 @@ public static partial class math
|
|||||||
var d1 = qv * qv.wwww * float4(2f); //xw, yw, zw, ww
|
var d1 = qv * qv.wwww * float4(2f); //xw, yw, zw, ww
|
||||||
var d2 = qv * qv.yzxw * float4(2f); //xy, yz, zx, ww
|
var d2 = qv * qv.yzxw * float4(2f); //xy, yz, zx, ww
|
||||||
var d3 = qv * qv;
|
var d3 = qv * qv;
|
||||||
var euler = Misaki.HighPerformance.Mathematics.float3.zero;
|
var euler = Mathematics.float3.zero;
|
||||||
|
|
||||||
var y1 = d2.x - d1.z;
|
var y1 = d2.x - d1.z;
|
||||||
if (y1 * y1 < cutoff)
|
if (y1 * y1 < cutoff)
|
||||||
@@ -8825,7 +8825,7 @@ public static partial class math
|
|||||||
var d1 = qv * qv.wwww * float4(2f); //xw, yw, zw, ww
|
var d1 = qv * qv.wwww * float4(2f); //xw, yw, zw, ww
|
||||||
var d2 = qv * qv.yzxw * float4(2f); //xy, yz, zx, ww
|
var d2 = qv * qv.yzxw * float4(2f); //xy, yz, zx, ww
|
||||||
var d3 = qv * qv;
|
var d3 = qv * qv;
|
||||||
var euler = Misaki.HighPerformance.Mathematics.float3.zero;
|
var euler = Mathematics.float3.zero;
|
||||||
|
|
||||||
var y1 = d2.y - d1.x;
|
var y1 = d2.y - d1.x;
|
||||||
if (y1 * y1 < cutoff)
|
if (y1 * y1 < cutoff)
|
||||||
@@ -8864,7 +8864,7 @@ public static partial class math
|
|||||||
var d1 = qv * qv.wwww * float4(2f); //xw, yw, zw, ww
|
var d1 = qv * qv.wwww * float4(2f); //xw, yw, zw, ww
|
||||||
var d2 = qv * qv.yzxw * float4(2f); //xy, yz, zx, ww
|
var d2 = qv * qv.yzxw * float4(2f); //xy, yz, zx, ww
|
||||||
var d3 = qv * qv;
|
var d3 = qv * qv;
|
||||||
var euler = Misaki.HighPerformance.Mathematics.float3.zero;
|
var euler = Mathematics.float3.zero;
|
||||||
|
|
||||||
var y1 = d2.z + d1.y;
|
var y1 = d2.z + d1.y;
|
||||||
if (y1 * y1 < cutoff)
|
if (y1 * y1 < cutoff)
|
||||||
@@ -8898,24 +8898,24 @@ public static partial class math
|
|||||||
/// <param name="order">The order in which the rotations are applied.</param>
|
/// <param name="order">The order in which the rotations are applied.</param>
|
||||||
/// <returns>The Euler angle representation of the quaternion in the specified order.</returns>
|
/// <returns>The Euler angle representation of the quaternion in the specified order.</returns>
|
||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static float3 Euler(quaternion q, math.RotationOrder order = math.RotationOrder.Default)
|
public static float3 Euler(quaternion q, math.RotationOrder order = RotationOrder.Default)
|
||||||
{
|
{
|
||||||
switch (order)
|
switch (order)
|
||||||
{
|
{
|
||||||
case math.RotationOrder.XYZ:
|
case RotationOrder.XYZ:
|
||||||
return EulerXYZ(q);
|
return EulerXYZ(q);
|
||||||
case math.RotationOrder.XZY:
|
case RotationOrder.XZY:
|
||||||
return EulerXZY(q);
|
return EulerXZY(q);
|
||||||
case math.RotationOrder.YXZ:
|
case RotationOrder.YXZ:
|
||||||
return EulerYXZ(q);
|
return EulerYXZ(q);
|
||||||
case math.RotationOrder.YZX:
|
case RotationOrder.YZX:
|
||||||
return EulerYZX(q);
|
return EulerYZX(q);
|
||||||
case math.RotationOrder.ZXY:
|
case RotationOrder.ZXY:
|
||||||
return EulerZXY(q);
|
return EulerZXY(q);
|
||||||
case math.RotationOrder.ZYX:
|
case RotationOrder.ZYX:
|
||||||
return EulerZYX(q);
|
return EulerZYX(q);
|
||||||
default:
|
default:
|
||||||
return Misaki.HighPerformance.Mathematics.float3.zero;
|
return Mathematics.float3.zero;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
#define NOISE_BENCHMARK
|
#define ADD_BENCHMARK
|
||||||
|
|
||||||
using BenchmarkDotNet.Attributes;
|
using BenchmarkDotNet.Attributes;
|
||||||
using Misaki.HighPerformance.Mathematics;
|
using Misaki.HighPerformance.Mathematics;
|
||||||
|
using Misaki.HighPerformance.Mathematics.SPMD;
|
||||||
using System.Numerics;
|
using System.Numerics;
|
||||||
using System.Runtime.Intrinsics;
|
using System.Runtime.Intrinsics;
|
||||||
|
|
||||||
@@ -9,7 +10,7 @@ namespace Misaki.HighPerformance.Test.Benchmark;
|
|||||||
|
|
||||||
public class MathematicsBenchmark
|
public class MathematicsBenchmark
|
||||||
{
|
{
|
||||||
#if VECTOR_BENCHMARK
|
#if ADD_BENCHMARK
|
||||||
private Vector4 _va = new Vector4(1, 2, 1, 2);
|
private Vector4 _va = new Vector4(1, 2, 1, 2);
|
||||||
private Vector4 _vb = new Vector4(3, 4, 3, 4);
|
private Vector4 _vb = new Vector4(3, 4, 3, 4);
|
||||||
|
|
||||||
@@ -39,61 +40,52 @@ public class MathematicsBenchmark
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if NOISE_BENCHMARK
|
#if FMA_BENCHMARK
|
||||||
|
private Vector4 _va = new Vector4(1, 2, 1, 2);
|
||||||
|
private Vector4 _vb = new Vector4(3, 4, 3, 4);
|
||||||
|
private Vector4 _vc = new Vector4(5, 6, 5, 6);
|
||||||
|
|
||||||
private const int _SIZE = 32;
|
private Vector128<float> _va128 = Vector128.Create(1f, 2f, 1f, 2f);
|
||||||
|
private Vector128<float> _vb128 = Vector128.Create(3f, 4f, 3f, 4f);
|
||||||
|
private Vector128<float> _vc128 = Vector128.Create(5f, 6f, 5f, 6f);
|
||||||
|
|
||||||
|
private float4 _fa = new float4(1, 2, 1, 2);
|
||||||
|
private float4 _fb = new float4(3, 4, 3, 4);
|
||||||
|
private float4 _fc = new float4(5, 6, 5, 6);
|
||||||
|
|
||||||
[Benchmark]
|
[Benchmark]
|
||||||
public unsafe void VectorNoise()
|
public Vector4 Vector4()
|
||||||
{
|
{
|
||||||
var buf = stackalloc float[_SIZE * _SIZE];
|
for (var i = 0; i < 10; i++)
|
||||||
var job = new Misaki.HighPerformance.Test.Jobs.NoiseJobVector
|
|
||||||
{
|
{
|
||||||
buffers = buf,
|
_va = _vb * _vc + _va;
|
||||||
width = _SIZE,
|
|
||||||
height = _SIZE,
|
|
||||||
};
|
|
||||||
|
|
||||||
for (var i = 0; i < _SIZE * _SIZE; i++)
|
|
||||||
{
|
|
||||||
job.Execute(i, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return _va;
|
||||||
}
|
}
|
||||||
|
|
||||||
[Benchmark]
|
[Benchmark]
|
||||||
public unsafe void MathNoise()
|
public Vector128<float> VectorFMA()
|
||||||
{
|
{
|
||||||
var buf = stackalloc float[_SIZE * _SIZE];
|
for (var i = 0; i < 10; i++)
|
||||||
var job = new Misaki.HighPerformance.Test.Jobs.NoiseJobMath
|
|
||||||
{
|
{
|
||||||
buffers = buf,
|
_va128 = System.Runtime.Intrinsics.X86.Fma.MultiplyAdd(_vb128, _vc128, _va128);
|
||||||
width = _SIZE,
|
|
||||||
height = _SIZE,
|
|
||||||
};
|
|
||||||
|
|
||||||
for (var i = 0; i < _SIZE * _SIZE; i++)
|
|
||||||
{
|
|
||||||
job.Execute(i, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return _va128;
|
||||||
}
|
}
|
||||||
|
|
||||||
[Benchmark]
|
[Benchmark]
|
||||||
// This is 10x faster than VectorNoise and MathNoise, but writing a burst like compiler to compile MathNoise into this is incredibly hard.
|
public float4 floatFMA()
|
||||||
public unsafe void MathVNoise()
|
|
||||||
{
|
{
|
||||||
var buf = stackalloc float[_SIZE * _SIZE];
|
for (var i = 0; i < 10; i++)
|
||||||
var job = new Misaki.HighPerformance.Test.Jobs.NoiseJobMathV
|
|
||||||
{
|
{
|
||||||
buffers = buf,
|
_fa = _fb * _fc + _fa;
|
||||||
width = _SIZE,
|
}
|
||||||
height = _SIZE,
|
|
||||||
};
|
|
||||||
|
|
||||||
for (var i = 0; i < _SIZE * _SIZE / 8; i++)
|
return _fa;
|
||||||
{
|
|
||||||
job.Execute(i, 0);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if MATRIX_BENCHMARK
|
#if MATRIX_BENCHMARK
|
||||||
|
|||||||
98
Misaki.HighPerformance.Test/Benchmark/SPMDBenchmark.cs
Normal file
98
Misaki.HighPerformance.Test/Benchmark/SPMDBenchmark.cs
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
using BenchmarkDotNet.Attributes;
|
||||||
|
using Misaki.HighPerformance.Jobs;
|
||||||
|
using Misaki.HighPerformance.Mathematics.SPMD;
|
||||||
|
using System.Runtime.InteropServices;
|
||||||
|
|
||||||
|
namespace Misaki.HighPerformance.Test.Benchmark;
|
||||||
|
|
||||||
|
public unsafe class SPMDBenchmark
|
||||||
|
{
|
||||||
|
private const int _SIZE = 512;
|
||||||
|
|
||||||
|
private JobScheduler _scheduler = null!;
|
||||||
|
private float* _buf;
|
||||||
|
|
||||||
|
[GlobalSetup]
|
||||||
|
public void Setup()
|
||||||
|
{
|
||||||
|
_scheduler = new JobScheduler(Environment.ProcessorCount);
|
||||||
|
_buf = (float*)NativeMemory.Alloc(sizeof(float) * _SIZE * _SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
[GlobalCleanup]
|
||||||
|
public void Cleanup()
|
||||||
|
{
|
||||||
|
_scheduler.Dispose();
|
||||||
|
NativeMemory.Free(_buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Benchmark]
|
||||||
|
public void VectorNoiseSingleThread()
|
||||||
|
{
|
||||||
|
var job = new Jobs.NoiseJobVector
|
||||||
|
{
|
||||||
|
buffers = _buf,
|
||||||
|
width = _SIZE,
|
||||||
|
height = _SIZE,
|
||||||
|
};
|
||||||
|
|
||||||
|
job.Run(_SIZE * _SIZE, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
//[Benchmark]
|
||||||
|
public void VectorNoise()
|
||||||
|
{
|
||||||
|
var job = new Jobs.NoiseJobVector
|
||||||
|
{
|
||||||
|
buffers = _buf,
|
||||||
|
width = _SIZE,
|
||||||
|
height = _SIZE,
|
||||||
|
};
|
||||||
|
|
||||||
|
var handle = _scheduler.ScheduleParallel(ref job, _SIZE * _SIZE, 64);
|
||||||
|
_scheduler.WaitComplete(handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
//[Benchmark]
|
||||||
|
public void MathNoise()
|
||||||
|
{
|
||||||
|
var job = new Jobs.NoiseJobMath
|
||||||
|
{
|
||||||
|
buffers = _buf,
|
||||||
|
width = _SIZE,
|
||||||
|
height = _SIZE,
|
||||||
|
};
|
||||||
|
|
||||||
|
var handle = _scheduler.ScheduleParallel(ref job, _SIZE * _SIZE, 64);
|
||||||
|
_scheduler.WaitComplete(handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
//[Benchmark(Baseline = true)]
|
||||||
|
public void ManualSPMDNoise()
|
||||||
|
{
|
||||||
|
var job = new Jobs.NoiseJobMathV
|
||||||
|
{
|
||||||
|
buffers = _buf,
|
||||||
|
width = _SIZE,
|
||||||
|
height = _SIZE,
|
||||||
|
};
|
||||||
|
|
||||||
|
var iterations = (_SIZE * _SIZE + 8 - 1) / 8;
|
||||||
|
var handle = _scheduler.ScheduleParallel(ref job, iterations, 64);
|
||||||
|
_scheduler.WaitComplete(handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Benchmark(Baseline = true)]
|
||||||
|
public void SPMDNoise()
|
||||||
|
{
|
||||||
|
var job = new Jobs.NoiseJobMathSPMD
|
||||||
|
{
|
||||||
|
buffers = _buf,
|
||||||
|
width = _SIZE,
|
||||||
|
height = _SIZE,
|
||||||
|
};
|
||||||
|
|
||||||
|
var handle = _scheduler.ScheduleParallelSPDM<Jobs.NoiseJobMathSPMD, float>(ref job, _SIZE * _SIZE, 64, -1, JobHandle.Invalid);
|
||||||
|
_scheduler.WaitComplete(handle);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,9 +1,9 @@
|
|||||||
using Misaki.HighPerformance.Jobs;
|
using Misaki.HighPerformance.Jobs;
|
||||||
using Misaki.HighPerformance.Mathematics;
|
using Misaki.HighPerformance.Mathematics;
|
||||||
|
using Misaki.HighPerformance.Mathematics.SPMD;
|
||||||
using System.Numerics;
|
using System.Numerics;
|
||||||
using System.Runtime.CompilerServices;
|
using System.Runtime.CompilerServices;
|
||||||
using System.Runtime.Intrinsics;
|
using System.Runtime.Intrinsics;
|
||||||
using System.Runtime.Intrinsics.X86;
|
|
||||||
|
|
||||||
namespace Misaki.HighPerformance.Test.Jobs;
|
namespace Misaki.HighPerformance.Test.Jobs;
|
||||||
|
|
||||||
@@ -16,7 +16,7 @@ internal unsafe struct NoiseJobVector : IJobParallelFor
|
|||||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
public static float Frac(float x)
|
public static float Frac(float x)
|
||||||
{
|
{
|
||||||
return x - MathF.Truncate(x);
|
return x - MathF.Floor(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Vector2 GradientNoiseDirect(Vector2 uv)
|
private static Vector2 GradientNoiseDirect(Vector2 uv)
|
||||||
@@ -101,7 +101,7 @@ internal unsafe struct NoiseJobMathV : IJobParallelFor
|
|||||||
private static Vector256<float> Mod289(Vector256<float> x)
|
private static Vector256<float> Mod289(Vector256<float> x)
|
||||||
{
|
{
|
||||||
var div = x / Vector256.Create(289.0f);
|
var div = x / Vector256.Create(289.0f);
|
||||||
var flr = Vector256.Floor(div);
|
var flr = Vector256.Truncate(div);
|
||||||
return x - (flr * Vector256.Create(289.0f));
|
return x - (flr * Vector256.Create(289.0f));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -119,12 +119,13 @@ internal unsafe struct NoiseJobMathV : IJobParallelFor
|
|||||||
var hy = Mod289(iy);
|
var hy = Mod289(iy);
|
||||||
|
|
||||||
var p = hx * Vector256.Create(34.0f) + Vector256.Create(1.0f);
|
var p = hx * Vector256.Create(34.0f) + Vector256.Create(1.0f);
|
||||||
p = Mod289(p * hx + hy);
|
p = Mod289(p * hx) + hy;
|
||||||
|
var pPrev = p;
|
||||||
p = p * Vector256.Create(34.0f) + Vector256.Create(1.0f);
|
p = p * Vector256.Create(34.0f) + Vector256.Create(1.0f);
|
||||||
p = Mod289(p * hx);
|
p = Mod289(p * pPrev);
|
||||||
|
|
||||||
var r = (p / 41.0f);
|
var r = (p / 41.0f);
|
||||||
r = (r - Vector256.Floor(r)) * 2.0f - Vector256<float>.One;
|
r = (r - Vector256.Truncate(r)) * 2.0f - Vector256<float>.One;
|
||||||
|
|
||||||
var gx = r - Vector256.Floor(r + Vector256.Create(0.5f));
|
var gx = r - Vector256.Floor(r + Vector256.Create(0.5f));
|
||||||
var gy = Vector256.Abs(r) - Vector256.Create(0.5f);
|
var gy = Vector256.Abs(r) - Vector256.Create(0.5f);
|
||||||
@@ -153,10 +154,10 @@ internal unsafe struct NoiseJobMathV : IJobParallelFor
|
|||||||
var d10 = GradDot(ipX + Vector256<float>.One, ipY, fpX - Vector256<float>.One, fpY);
|
var d10 = GradDot(ipX + Vector256<float>.One, ipY, fpX - Vector256<float>.One, fpY);
|
||||||
var d11 = GradDot(ipX + Vector256<float>.One, ipY + Vector256<float>.One, fpX - Vector256<float>.One, fpY - Vector256<float>.One);
|
var d11 = GradDot(ipX + Vector256<float>.One, ipY + Vector256<float>.One, fpX - Vector256<float>.One, fpY - Vector256<float>.One);
|
||||||
|
|
||||||
var lerpX1 = d00 + (d10 - d00) * uX;
|
var lerpY1 = d00 + (d10 - d00) * uY;
|
||||||
var lerpX2 = d01 + (d11 - d01) * uX;
|
var lerpY2 = d01 + (d11 - d01) * uY;
|
||||||
|
|
||||||
return lerpX1 + (lerpX2 - lerpX1) * uY;
|
return lerpY1 + (lerpY2 - lerpY1) * uX;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void Execute(int loopIndex, int threadIndex)
|
public void Execute(int loopIndex, int threadIndex)
|
||||||
@@ -164,15 +165,17 @@ internal unsafe struct NoiseJobMathV : IJobParallelFor
|
|||||||
// ---------------------------------------------------------
|
// ---------------------------------------------------------
|
||||||
// IMPORTANT: Loop Stride is now 8!
|
// IMPORTANT: Loop Stride is now 8!
|
||||||
// ---------------------------------------------------------
|
// ---------------------------------------------------------
|
||||||
int baseIndex = loopIndex * 8;
|
var baseIndex = loopIndex * 8;
|
||||||
|
|
||||||
// Safety check
|
// Safety check
|
||||||
if (baseIndex + 7 >= width * height)
|
if (baseIndex + 7 >= width * height)
|
||||||
|
{
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Calculate Coords
|
// Calculate Coords
|
||||||
int y = baseIndex / width;
|
var y = baseIndex / width;
|
||||||
int x = baseIndex % width;
|
var x = baseIndex % width;
|
||||||
|
|
||||||
// Sequence: 0, 1, 2, 3, 4, 5, 6, 7
|
// Sequence: 0, 1, 2, 3, 4, 5, 6, 7
|
||||||
var vSeqX = Vector256.Create(0f, 1f, 2f, 3f, 4f, 5f, 6f, 7f);
|
var vSeqX = Vector256.Create(0f, 1f, 2f, 3f, 4f, 5f, 6f, 7f);
|
||||||
@@ -185,6 +188,81 @@ internal unsafe struct NoiseJobMathV : IJobParallelFor
|
|||||||
var result = GradientNoiseAVX(vBaseX / vWidth, vBaseY / vHeight);
|
var result = GradientNoiseAVX(vBaseX / vWidth, vBaseY / vHeight);
|
||||||
|
|
||||||
// Store 8 floats (32 bytes)
|
// Store 8 floats (32 bytes)
|
||||||
Avx.Store(buffers + baseIndex, result);
|
result.Store(buffers + baseIndex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
internal unsafe struct NoiseJobMathSPMD : IJobSPMD<float>
|
||||||
|
{
|
||||||
|
public float* buffers;
|
||||||
|
public int width;
|
||||||
|
public int height;
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
private static T GradDot<T>(T ix, T iy, T fx, T fy)
|
||||||
|
where T : ISPMD<T, float>
|
||||||
|
{
|
||||||
|
var c289 = T.Create(289f);
|
||||||
|
var c34 = T.Create(34f);
|
||||||
|
var c1 = T.Create(1f);
|
||||||
|
var c41 = T.Create(41f);
|
||||||
|
var c2 = T.Create(2f);
|
||||||
|
var half = T.Create(0.5f);
|
||||||
|
|
||||||
|
ix %= c289;
|
||||||
|
iy %= c289;
|
||||||
|
var x = (c34 * ix + c1) * ix % c289 + iy;
|
||||||
|
x = (c34 * x + c1) * x % c289;
|
||||||
|
x = T.Frac(x / c41) * c2 - c1;
|
||||||
|
|
||||||
|
var gx = x - T.Floor(x + half);
|
||||||
|
var gy = T.Abs(x) - half;
|
||||||
|
|
||||||
|
// normalize
|
||||||
|
var len = T.Sqrt(gx * gx + gy * gy);
|
||||||
|
gx /= len;
|
||||||
|
gy /= len;
|
||||||
|
|
||||||
|
return gx * fx + gy * fy;
|
||||||
|
}
|
||||||
|
|
||||||
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||||
|
public static T Noise<T>(T uvX, T uvY)
|
||||||
|
where T : ISPMD<T, float>
|
||||||
|
{
|
||||||
|
var c1 = T.Create(1f);
|
||||||
|
var c6 = T.Create(6f);
|
||||||
|
var c10 = T.Create(10f);
|
||||||
|
var c15 = T.Create(15f);
|
||||||
|
|
||||||
|
var ipX = T.Floor(uvX);
|
||||||
|
var ipY = T.Floor(uvY);
|
||||||
|
var fpX = uvX - ipX;
|
||||||
|
var fpY = uvY - ipY;
|
||||||
|
|
||||||
|
var d00 = GradDot(ipX, ipY, fpX, fpY);
|
||||||
|
var d01 = GradDot(ipX, ipY + c1, fpX, fpY - c1);
|
||||||
|
var d10 = GradDot(ipX + c1, ipY, fpX - c1, fpY);
|
||||||
|
var d11 = GradDot(ipX + c1, ipY + c1, fpX - c1, fpY - c1);
|
||||||
|
|
||||||
|
// fade
|
||||||
|
var uX = fpX * fpX * fpX * (fpX * (fpX * c6 - c15) + c10);
|
||||||
|
var uY = fpY * fpY * fpY * (fpY * (fpY * c6 - c15) + c10);
|
||||||
|
|
||||||
|
return T.Lerp(T.Lerp(d00, d10, uY), T.Lerp(d01, d11, uY), uX);
|
||||||
|
}
|
||||||
|
|
||||||
|
public readonly void Execute<TLane>(int baseIndex, int threadIndex)
|
||||||
|
where TLane : ISPMD<TLane, float>
|
||||||
|
{
|
||||||
|
var indices = TLane.Sequence(baseIndex, 1f);
|
||||||
|
var w = TLane.Create(width);
|
||||||
|
var h = TLane.Create(height);
|
||||||
|
|
||||||
|
var uvX = (indices % w) / w;
|
||||||
|
var uvY = TLane.Floor(indices / w) / h;
|
||||||
|
|
||||||
|
var result = Noise(uvX, uvY);
|
||||||
|
result.Store(buffers + baseIndex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -24,6 +24,7 @@
|
|||||||
<ProjectReference Include="..\Misaki.HighPerformance.Image\Misaki.HighPerformance.Image.csproj" />
|
<ProjectReference Include="..\Misaki.HighPerformance.Image\Misaki.HighPerformance.Image.csproj" />
|
||||||
<ProjectReference Include="..\Misaki.HighPerformance.Jobs\Misaki.HighPerformance.Jobs.csproj" />
|
<ProjectReference Include="..\Misaki.HighPerformance.Jobs\Misaki.HighPerformance.Jobs.csproj" />
|
||||||
<ProjectReference Include="..\Misaki.HighPerformance.LowLevel\Misaki.HighPerformance.LowLevel.csproj" />
|
<ProjectReference Include="..\Misaki.HighPerformance.LowLevel\Misaki.HighPerformance.LowLevel.csproj" />
|
||||||
|
<ProjectReference Include="..\Misaki.HighPerformance.Mathematics.SPMD\Misaki.HighPerformance.Mathematics.SPMD.csproj" />
|
||||||
<ProjectReference Include="..\Misaki.HighPerformance.Mathematics\Misaki.HighPerformance.Mathematics.csproj" />
|
<ProjectReference Include="..\Misaki.HighPerformance.Mathematics\Misaki.HighPerformance.Mathematics.csproj" />
|
||||||
<ProjectReference Include="..\Misaki.HighPerformance\Misaki.HighPerformance.csproj" />
|
<ProjectReference Include="..\Misaki.HighPerformance\Misaki.HighPerformance.csproj" />
|
||||||
<ProjectReference Include="..\Misaki.HighPerformance.Analyzer\Misaki.HighPerformance.Analyzer\Misaki.HighPerformance.Analyzer.csproj" OutputItemType="Analyzer" ReferenceOutputAssembly="false" />
|
<ProjectReference Include="..\Misaki.HighPerformance.Analyzer\Misaki.HighPerformance.Analyzer\Misaki.HighPerformance.Analyzer.csproj" OutputItemType="Analyzer" ReferenceOutputAssembly="false" />
|
||||||
|
|||||||
@@ -1,9 +1,15 @@
|
|||||||
using Misaki.HighPerformance;
|
using Misaki.HighPerformance;
|
||||||
|
using Misaki.HighPerformance.Jobs;
|
||||||
using Misaki.HighPerformance.LowLevel;
|
using Misaki.HighPerformance.LowLevel;
|
||||||
using Misaki.HighPerformance.LowLevel.Utilities;
|
using Misaki.HighPerformance.LowLevel.Utilities;
|
||||||
|
using Misaki.HighPerformance.Mathematics.SPMD;
|
||||||
|
using Misaki.HighPerformance.Test.UnitTest.Jobs;
|
||||||
|
using System.Numerics;
|
||||||
|
using System.Runtime.Intrinsics;
|
||||||
|
using System.Text;
|
||||||
|
|
||||||
BenchmarkDotNet.Running.BenchmarkRunner.Run<Misaki.HighPerformance.Test.Benchmark.MathematicsBenchmark>();
|
BenchmarkDotNet.Running.BenchmarkRunner.Run<Misaki.HighPerformance.Test.Benchmark.SPMDBenchmark>();
|
||||||
|
//return;
|
||||||
//using Misaki.HighPerformance.Collections;
|
//using Misaki.HighPerformance.Collections;
|
||||||
//using Misaki.HighPerformance.LowLevel.Buffer;
|
//using Misaki.HighPerformance.LowLevel.Buffer;
|
||||||
//using Misaki.HighPerformance.LowLevel.Collections;
|
//using Misaki.HighPerformance.LowLevel.Collections;
|
||||||
|
|||||||
114
Misaki.HighPerformance.Test/UnitTest/Jobs/CompressStoreTest.cs
Normal file
114
Misaki.HighPerformance.Test/UnitTest/Jobs/CompressStoreTest.cs
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
using Misaki.HighPerformance.Mathematics.SPMD;
|
||||||
|
using System.Numerics;
|
||||||
|
|
||||||
|
namespace Misaki.HighPerformance.Test.UnitTest.Jobs;
|
||||||
|
|
||||||
|
public static class CompressStoreTest
|
||||||
|
{
|
||||||
|
public static void Run()
|
||||||
|
{
|
||||||
|
Console.WriteLine("--- Testing CompressStore (Double) ---");
|
||||||
|
|
||||||
|
// Test 1: Simple Pattern (True, False, True, False...)
|
||||||
|
TestPattern_Double(
|
||||||
|
input: new double[] { 1, 2, 3, 4, 5, 6, 7, 8 },
|
||||||
|
// Mask: Keep only even numbers (values > 0)
|
||||||
|
// We simulate a mask by comparing against 0 or -1
|
||||||
|
keepPattern: new bool[] { true, false, true, false, true, false, true, false }
|
||||||
|
);
|
||||||
|
|
||||||
|
// Test 2: All True
|
||||||
|
TestPattern_Double(
|
||||||
|
input: new double[] { 10, 20, 30, 40, 50, 60, 70, 80 },
|
||||||
|
keepPattern: new bool[] { true, true, true, true, true, true, true, true }
|
||||||
|
);
|
||||||
|
|
||||||
|
// Test 3: All False
|
||||||
|
TestPattern_Double(
|
||||||
|
input: new double[] { 10, 20, 30, 40, 50, 60, 70, 80 },
|
||||||
|
keepPattern: new bool[] { false, false, false, false, false, false, false, false }
|
||||||
|
);
|
||||||
|
|
||||||
|
// Test 4: Sparse (First and Last only)
|
||||||
|
TestPattern_Double(
|
||||||
|
input: new double[] { 1, 2, 3, 4, 5, 6, 7, 8 },
|
||||||
|
keepPattern: new bool[] { true, false, false, false, false, false, false, true }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private unsafe static void TestPattern_Double(double[] input, bool[] keepPattern)
|
||||||
|
{
|
||||||
|
// 1. Setup Input Vector
|
||||||
|
// Handle case where Vector<T> is smaller than 8 (e.g. 2 or 4)
|
||||||
|
var vecSize = Vector<double>.Count;
|
||||||
|
var safeInput = new double[vecSize];
|
||||||
|
var safeMaskVal = new double[vecSize];
|
||||||
|
|
||||||
|
// Expected Output Calculation
|
||||||
|
var expected = new double[vecSize];
|
||||||
|
var expectedCount = 0;
|
||||||
|
|
||||||
|
for (var i = 0; i < vecSize; i++)
|
||||||
|
{
|
||||||
|
safeInput[i] = input[i];
|
||||||
|
// If we want to keep it, make mask "GreaterThan" true
|
||||||
|
// We'll compare X > 0.
|
||||||
|
// If keep=true, val=1. If keep=false, val=-1.
|
||||||
|
safeMaskVal[i] = keepPattern[i] ? 1 : -1;
|
||||||
|
|
||||||
|
if (keepPattern[i])
|
||||||
|
{
|
||||||
|
expected[expectedCount++] = input[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. Create WideLanes
|
||||||
|
var vInput = WideLane<double>.Load(ref safeInput.AsSpan().GetPinnableReference());
|
||||||
|
|
||||||
|
// Create Mask: greater than 0
|
||||||
|
var vMaskVal = WideLane<double>.Load(ref safeMaskVal.AsSpan().GetPinnableReference());
|
||||||
|
var vZero = WideLane<double>.Create(0);
|
||||||
|
var vMask = WideLane<double>.GreaterThan(vMaskVal, vZero);
|
||||||
|
|
||||||
|
// 3. Run CompressStore
|
||||||
|
var outputBuffer = new double[vecSize];
|
||||||
|
var actualCount = 0;
|
||||||
|
|
||||||
|
fixed (double* ptr = outputBuffer)
|
||||||
|
{
|
||||||
|
actualCount = vInput.CompressStore(vMask, ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. Verify
|
||||||
|
var pass = actualCount == expectedCount;
|
||||||
|
for (var i = 0; i < expectedCount; i++)
|
||||||
|
{
|
||||||
|
if (outputBuffer[i] != expected[i])
|
||||||
|
pass = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 5. Report
|
||||||
|
var hardware = (vecSize == 4) ? "AVX2 (256-bit)" : (vecSize == 2) ? "SSE/NEON (128-bit)" : "Scalar";
|
||||||
|
Console.Write($"[{hardware}] Pattern: ");
|
||||||
|
for (var i = 0; i < vecSize; i++)
|
||||||
|
Console.Write(keepPattern[i] ? "1" : "0");
|
||||||
|
|
||||||
|
if (pass)
|
||||||
|
{
|
||||||
|
Console.WriteLine($" -> PASS (Count: {actualCount})");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Console.WriteLine($" -> FAIL!");
|
||||||
|
Console.WriteLine($" Expected Count: {expectedCount}, Actual: {actualCount}");
|
||||||
|
Console.Write(" Expected Data: ");
|
||||||
|
foreach (var d in expected)
|
||||||
|
Console.Write($"{d} ");
|
||||||
|
Console.WriteLine();
|
||||||
|
Console.Write(" Actual Data: ");
|
||||||
|
foreach (var d in outputBuffer)
|
||||||
|
Console.Write($"{d} ");
|
||||||
|
Console.WriteLine();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,19 +1,28 @@
|
|||||||
using Misaki.HighPerformance.Jobs;
|
using Misaki.HighPerformance.Jobs;
|
||||||
using Misaki.HighPerformance.LowLevel.Buffer;
|
using Misaki.HighPerformance.LowLevel.Buffer;
|
||||||
using Misaki.HighPerformance.LowLevel.Collections;
|
using Misaki.HighPerformance.LowLevel.Collections;
|
||||||
using Misaki.HighPerformance.LowLevel.Utilities;
|
using Misaki.HighPerformance.LowLevel.Utilities;
|
||||||
|
using Misaki.HighPerformance.Mathematics.SPMD;
|
||||||
|
using System.Runtime.InteropServices;
|
||||||
|
|
||||||
namespace Misaki.HighPerformance.Test.UnitTest.Jobs;
|
namespace Misaki.HighPerformance.Test.UnitTest.Jobs;
|
||||||
|
|
||||||
[TestClass]
|
[TestClass]
|
||||||
|
[DoNotParallelize]
|
||||||
public unsafe class TestJobSystem
|
public unsafe class TestJobSystem
|
||||||
{
|
{
|
||||||
private JobScheduler _jobScheduler = null!;
|
private JobScheduler _jobScheduler = null!;
|
||||||
|
|
||||||
|
public TestContext TestContext
|
||||||
|
{
|
||||||
|
get;
|
||||||
|
set;
|
||||||
|
}
|
||||||
|
|
||||||
[TestInitialize]
|
[TestInitialize]
|
||||||
public void Initialize()
|
public void Initialize()
|
||||||
{
|
{
|
||||||
_jobScheduler = new JobScheduler(Environment.ProcessorCount);
|
_jobScheduler = new JobScheduler(3);
|
||||||
}
|
}
|
||||||
|
|
||||||
[TestCleanup]
|
[TestCleanup]
|
||||||
@@ -251,4 +260,102 @@ public unsafe class TestJobSystem
|
|||||||
|
|
||||||
Assert.AreEqual(JobState.Completed, _jobScheduler.GetJobStatus(completedHandle));
|
Assert.AreEqual(JobState.Completed, _jobScheduler.GetJobStatus(completedHandle));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[TestMethod]
|
||||||
|
public void RaceConditionTest()
|
||||||
|
{
|
||||||
|
const int jobCount = 20000;
|
||||||
|
|
||||||
|
var pExecutedCount = (int*)NativeMemory.Alloc(sizeof(int));
|
||||||
|
*pExecutedCount = 0;
|
||||||
|
|
||||||
|
var startSignal = false;
|
||||||
|
|
||||||
|
// 1. Create a "Gatekeeper" vectorJob that spins/blocks a worker thread until signaled.
|
||||||
|
// This allows us to control exactly when the dependency completes.
|
||||||
|
var rootJob = new WaitJob { pSignal = &startSignal };
|
||||||
|
var rootHandle = _jobScheduler.Schedule(ref rootJob);
|
||||||
|
|
||||||
|
// 2. Start a background task to flood the scheduler with dependencies on the Gatekeeper.
|
||||||
|
using var barrier = new Barrier(2);
|
||||||
|
var scheduleTask = Task.Run(() =>
|
||||||
|
{
|
||||||
|
var depJob = new IncrementJob { pCounter = pExecutedCount };
|
||||||
|
barrier.SignalAndWait(TestContext.CancellationTokenSource.Token); // Synchronize start with main thread
|
||||||
|
|
||||||
|
for (var i = 0; i < jobCount; i++)
|
||||||
|
{
|
||||||
|
// CONTENTION POINT:
|
||||||
|
// Trying to add a dependency to 'rootHandle'.
|
||||||
|
// Eventually, this will happen exactly while 'rootHandle' is transitioning to Completed.
|
||||||
|
_jobScheduler.Schedule(ref depJob, rootHandle);
|
||||||
|
}
|
||||||
|
}, TestContext.CancellationTokenSource.Token);
|
||||||
|
|
||||||
|
barrier.SignalAndWait(TestContext.CancellationTokenSource.Token); // Wait for scheduler task to be ready
|
||||||
|
|
||||||
|
// Allow the scheduling loop to get a head start and queue some readers
|
||||||
|
Thread.Sleep(5);
|
||||||
|
|
||||||
|
// 3. Open the gate.
|
||||||
|
// This triggers the Gatekeeper to complete. It will change its State and iterate its dependency list.
|
||||||
|
// This happens CONCURRENTLY with the loop above adding more items to that same list.
|
||||||
|
startSignal = true;
|
||||||
|
|
||||||
|
scheduleTask.Wait(TestContext.CancellationTokenSource.Token);
|
||||||
|
|
||||||
|
// 4. Validate results
|
||||||
|
// If the lock-free logic works, every single dependent vectorJob must eventually execute.
|
||||||
|
// If there is a race (e.g., missed notification), pExecutedCount will stick below jobCount.
|
||||||
|
var spin = new SpinWait();
|
||||||
|
var timeout = DateTime.Now.AddSeconds(10);
|
||||||
|
|
||||||
|
while (Volatile.Read(ref *pExecutedCount) < jobCount)
|
||||||
|
{
|
||||||
|
if (DateTime.Now > timeout)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
spin.SpinOnce();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure the root vectorJob is officially cleaned up
|
||||||
|
_jobScheduler.WaitComplete(rootHandle);
|
||||||
|
|
||||||
|
Assert.AreEqual(jobCount, *pExecutedCount, "Race condition detected: Some dependent jobs failed to execute (Wait timeout).");
|
||||||
|
|
||||||
|
NativeMemory.Free(pExecutedCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
[TestMethod]
|
||||||
|
public void SPMDCorrectness()
|
||||||
|
{
|
||||||
|
const int size = 8;
|
||||||
|
|
||||||
|
var vectorBuf = stackalloc float[size * size];
|
||||||
|
var vs = new Span<float>(vectorBuf, size * size);
|
||||||
|
var vectorJob = new Misaki.HighPerformance.Test.Jobs.NoiseJobVector
|
||||||
|
{
|
||||||
|
buffers = vectorBuf,
|
||||||
|
width = size,
|
||||||
|
height = size,
|
||||||
|
};
|
||||||
|
|
||||||
|
vectorJob.Run(size * size, -1);
|
||||||
|
|
||||||
|
var spmdBuf = stackalloc float[size * size];
|
||||||
|
var ss = new Span<float>(spmdBuf, size * size);
|
||||||
|
var spmdJob = new Misaki.HighPerformance.Test.Jobs.NoiseJobMath
|
||||||
|
{
|
||||||
|
buffers = spmdBuf,
|
||||||
|
width = size,
|
||||||
|
height = size,
|
||||||
|
};
|
||||||
|
|
||||||
|
spmdJob.Run(size * size, -1);
|
||||||
|
|
||||||
|
var eq = vs.SequenceCompareTo(ss);
|
||||||
|
Assert.AreEqual(0, eq);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
using Misaki.HighPerformance.Jobs;
|
using Misaki.HighPerformance.Jobs;
|
||||||
|
|
||||||
namespace Misaki.HighPerformance.Test.UnitTest.Jobs;
|
namespace Misaki.HighPerformance.Test.UnitTest.Jobs;
|
||||||
|
|
||||||
@@ -71,3 +71,27 @@ internal unsafe struct ParallelMultiplyJob : IJobParallelFor
|
|||||||
inout[loopIndex] *= multiplier;
|
inout[loopIndex] *= multiplier;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public unsafe struct WaitJob : IJob
|
||||||
|
{
|
||||||
|
public bool* pSignal;
|
||||||
|
|
||||||
|
public void Execute(int loopIndex)
|
||||||
|
{
|
||||||
|
var spin = new SpinWait();
|
||||||
|
while (!Volatile.Read(ref *pSignal))
|
||||||
|
{
|
||||||
|
spin.SpinOnce();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public unsafe struct IncrementJob : IJob
|
||||||
|
{
|
||||||
|
public int* pCounter;
|
||||||
|
|
||||||
|
public void Execute(int loopIndex)
|
||||||
|
{
|
||||||
|
Interlocked.Increment(ref *pCounter);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
|
|
||||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||||
# Visual Studio Version 18
|
# Visual Studio Version 18
|
||||||
VisualStudioVersion = 18.3.11218.70 d18.3
|
VisualStudioVersion = 18.3.11218.70
|
||||||
MinimumVisualStudioVersion = 10.0.40219.1
|
MinimumVisualStudioVersion = 10.0.40219.1
|
||||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Misaki.HighPerformance", "Misaki.HighPerformance\Misaki.HighPerformance.csproj", "{275B2E80-9B2A-4567-A157-F147A6B28A0F}"
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Misaki.HighPerformance", "Misaki.HighPerformance\Misaki.HighPerformance.csproj", "{275B2E80-9B2A-4567-A157-F147A6B28A0F}"
|
||||||
EndProject
|
EndProject
|
||||||
@@ -25,6 +25,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Misaki.HighPerformance.Anal
|
|||||||
EndProject
|
EndProject
|
||||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Analyzer", "Analyzer", "{457CB43B-38FA-4221-BCC2-BE866D0A2A06}"
|
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Analyzer", "Analyzer", "{457CB43B-38FA-4221-BCC2-BE866D0A2A06}"
|
||||||
EndProject
|
EndProject
|
||||||
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Misaki.HighPerformance.Mathematics.SPMD", "Misaki.HighPerformance.Mathematics.SPMD\Misaki.HighPerformance.Mathematics.SPMD.csproj", "{09DF7B3B-F36D-4925-B086-E8827E228FFC}"
|
||||||
|
EndProject
|
||||||
Global
|
Global
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
Debug|Any CPU = Debug|Any CPU
|
Debug|Any CPU = Debug|Any CPU
|
||||||
@@ -71,6 +73,10 @@ Global
|
|||||||
{446213EF-4E44-4EEF-970A-536F0AFC7AEB}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
{446213EF-4E44-4EEF-970A-536F0AFC7AEB}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
{446213EF-4E44-4EEF-970A-536F0AFC7AEB}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
{446213EF-4E44-4EEF-970A-536F0AFC7AEB}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
{446213EF-4E44-4EEF-970A-536F0AFC7AEB}.Release|Any CPU.Build.0 = Release|Any CPU
|
{446213EF-4E44-4EEF-970A-536F0AFC7AEB}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
{09DF7B3B-F36D-4925-B086-E8827E228FFC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{09DF7B3B-F36D-4925-B086-E8827E228FFC}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{09DF7B3B-F36D-4925-B086-E8827E228FFC}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{09DF7B3B-F36D-4925-B086-E8827E228FFC}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
EndGlobalSection
|
EndGlobalSection
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
GlobalSection(SolutionProperties) = preSolution
|
||||||
HideSolutionNode = FALSE
|
HideSolutionNode = FALSE
|
||||||
|
|||||||
398
Misaki.HighPerformance/Result.cs
Normal file
398
Misaki.HighPerformance/Result.cs
Normal file
@@ -0,0 +1,398 @@
|
|||||||
|
using System.Runtime.CompilerServices;
|
||||||
|
|
||||||
|
namespace Misaki.HighPerformance;
|
||||||
|
|
||||||
|
public readonly struct Result
|
||||||
|
{
|
||||||
|
private readonly string? _message;
|
||||||
|
private readonly bool _isSuccess;
|
||||||
|
|
||||||
|
public readonly string? Message => _message;
|
||||||
|
public readonly bool IsSuccess => _isSuccess;
|
||||||
|
public readonly bool IsFailure => !IsSuccess;
|
||||||
|
|
||||||
|
public Result(bool success, string? message = null)
|
||||||
|
{
|
||||||
|
_isSuccess = success;
|
||||||
|
_message = message;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Result Success()
|
||||||
|
{
|
||||||
|
return new Result(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Result Failure(string? message = null)
|
||||||
|
{
|
||||||
|
return new Result(false, message);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Result Failure(Error status)
|
||||||
|
{
|
||||||
|
return new Result(false, status.ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Result<T> Success<T>(T value)
|
||||||
|
{
|
||||||
|
return Result<T>.Success(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Result<T> Failure<T>(string? message = null)
|
||||||
|
{
|
||||||
|
return Result<T>.Failure(message);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Result<T> Failure<T>(Error status)
|
||||||
|
{
|
||||||
|
return Result<T>.Failure(status.ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Deconstruct(out bool success, out string? message)
|
||||||
|
{
|
||||||
|
success = IsSuccess;
|
||||||
|
message = Message;
|
||||||
|
}
|
||||||
|
|
||||||
|
public override string ToString() => IsSuccess ? "OK" : $"Error: {Message}";
|
||||||
|
|
||||||
|
public static implicit operator bool(Result result) => result.IsSuccess;
|
||||||
|
}
|
||||||
|
|
||||||
|
public readonly struct Result<T>
|
||||||
|
{
|
||||||
|
private readonly T _value;
|
||||||
|
private readonly string? _message;
|
||||||
|
private readonly bool _isSuccess;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Gets the value. Undefined if the result is a failure.
|
||||||
|
/// </summary>
|
||||||
|
public T Value
|
||||||
|
{
|
||||||
|
get
|
||||||
|
{
|
||||||
|
#if DEBUG
|
||||||
|
if (IsFailure)
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException($"Cannot access Value when Result is a failure. {_message}");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return _value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public readonly string? Message => _message;
|
||||||
|
public readonly bool IsSuccess => _isSuccess;
|
||||||
|
public readonly bool IsFailure => !IsSuccess;
|
||||||
|
|
||||||
|
public Result(bool success, T value, string? message = null)
|
||||||
|
{
|
||||||
|
_isSuccess = success;
|
||||||
|
_value = value;
|
||||||
|
_message = message;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Result<T> Success(T value)
|
||||||
|
{
|
||||||
|
return new Result<T>(true, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Result<T> Failure(string? message = null)
|
||||||
|
{
|
||||||
|
return new Result<T>(false, default!, message);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Deconstruct(out bool success, out T value, out string? message)
|
||||||
|
{
|
||||||
|
success = IsSuccess;
|
||||||
|
value = Value;
|
||||||
|
message = Message;
|
||||||
|
}
|
||||||
|
|
||||||
|
public override string ToString() => IsSuccess ? $"OK: {Value}" : $"Error: {Message}";
|
||||||
|
|
||||||
|
public static implicit operator Result<T>(T? data) => data is not null ? Success(data) : Failure(null);
|
||||||
|
public static implicit operator Result<T>(Result result) => result.IsSuccess ? Success(default!) : Failure(result.Message);
|
||||||
|
public static implicit operator bool(Result<T> result) => result.IsSuccess;
|
||||||
|
}
|
||||||
|
|
||||||
|
public enum Error : byte
|
||||||
|
{
|
||||||
|
None,
|
||||||
|
NotFound,
|
||||||
|
InvalidArgument,
|
||||||
|
InvalidState,
|
||||||
|
InternalError,
|
||||||
|
PermissionDenied,
|
||||||
|
NotSupported,
|
||||||
|
OutOfMemory,
|
||||||
|
Timeout,
|
||||||
|
Cancelled,
|
||||||
|
UnknownError,
|
||||||
|
|
||||||
|
Success = None,
|
||||||
|
}
|
||||||
|
|
||||||
|
public readonly struct Result<T, E>
|
||||||
|
where E : struct, Enum
|
||||||
|
{
|
||||||
|
private readonly T _value;
|
||||||
|
private readonly E _error;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Gets the value. Undefined if the result is a failure.
|
||||||
|
/// </summary>
|
||||||
|
public T Value
|
||||||
|
{
|
||||||
|
get
|
||||||
|
{
|
||||||
|
#if DEBUG
|
||||||
|
if (IsFailure)
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException($"Cannot access Value when Result is a failure. Error: {_error}");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return _value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public E Error => _error;
|
||||||
|
public bool IsSuccess => EqualityComparer<E>.Default.Equals(_error, default);
|
||||||
|
public bool IsFailure => !IsSuccess;
|
||||||
|
|
||||||
|
public Result(T value, E status)
|
||||||
|
{
|
||||||
|
_value = value;
|
||||||
|
_error = status;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Result<T, E> Success(T value)
|
||||||
|
{
|
||||||
|
return new Result<T, E>(value, default);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Result<T, E> Failure(E status)
|
||||||
|
{
|
||||||
|
return new Result<T, E>(default!, status);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Deconstruct(out T value, out E status)
|
||||||
|
{
|
||||||
|
value = Value;
|
||||||
|
status = Error;
|
||||||
|
}
|
||||||
|
|
||||||
|
public override string ToString() => $"Value: {_value}, Status: {_error}";
|
||||||
|
|
||||||
|
public static implicit operator Result<T, E>(T data) => new(data, default);
|
||||||
|
public static implicit operator Result<T, E>(E status) => new(default!, status);
|
||||||
|
public static implicit operator bool(Result<T, E> result) => result.IsSuccess;
|
||||||
|
}
|
||||||
|
|
||||||
|
public readonly ref struct RefResult<T, E>
|
||||||
|
where E : struct, Enum
|
||||||
|
{
|
||||||
|
private readonly ref T _value;
|
||||||
|
private readonly E _error;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Gets a reference to the value. Undefined if the result is a failure.
|
||||||
|
/// </summary>
|
||||||
|
public ref T Value
|
||||||
|
{
|
||||||
|
get
|
||||||
|
{
|
||||||
|
#if DEBUG
|
||||||
|
if (IsFailure)
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException($"Cannot access Value when Result is a failure. Error: {_error}");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return ref _value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public E Error => _error;
|
||||||
|
public bool IsSuccess => EqualityComparer<E>.Default.Equals(_error, default);
|
||||||
|
public bool IsFailure => !IsSuccess;
|
||||||
|
|
||||||
|
public RefResult(ref T value, E error)
|
||||||
|
{
|
||||||
|
_value = ref value;
|
||||||
|
_error = error;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static RefResult<T, E> Success(ref T value)
|
||||||
|
{
|
||||||
|
return new RefResult<T, E>(ref value, default);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static RefResult<T, E> Failure(E error)
|
||||||
|
{
|
||||||
|
return new RefResult<T, E>(ref Unsafe.NullRef<T>(), error);
|
||||||
|
}
|
||||||
|
|
||||||
|
public override string ToString() => $"Value: {_value}, Status: {_error}";
|
||||||
|
|
||||||
|
public static implicit operator RefResult<T, E>(E error) => new(ref Unsafe.NullRef<T>(), error);
|
||||||
|
public static implicit operator bool(RefResult<T, E> result) => result.IsSuccess;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class ResultExtensions
|
||||||
|
{
|
||||||
|
public static void ThrowIfFailed(this Error result, [CallerArgumentExpression(nameof(result))] string? op = null)
|
||||||
|
{
|
||||||
|
if (result != Error.None)
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException($"{op} failed: {result}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void ThrowIfFailed(this Result result, [CallerArgumentExpression(nameof(result))] string? op = null)
|
||||||
|
{
|
||||||
|
if (!result.IsSuccess)
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException($"{op} failed: {result.Message}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static T GetValueOrThrow<T>(this Result<T> result, [CallerArgumentExpression(nameof(result))] string? op = null)
|
||||||
|
{
|
||||||
|
if (!result.IsSuccess)
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException($"{op} failed: {result.Message}");
|
||||||
|
}
|
||||||
|
|
||||||
|
return result.Value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static T GetValueOrThrow<T, S>(this Result<T, S> result, [CallerArgumentExpression(nameof(result))] string? op = null)
|
||||||
|
where S : struct, Enum
|
||||||
|
{
|
||||||
|
if (!result.IsSuccess)
|
||||||
|
{
|
||||||
|
throw new InvalidOperationException($"{op} failed: status {result.Error}");
|
||||||
|
}
|
||||||
|
|
||||||
|
return result.Value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static T? GetValueOrDefault<T>(this Result<T> result, T? defaultValue = default)
|
||||||
|
{
|
||||||
|
return result.IsSuccess ? result.Value : defaultValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static T? GetValueOrDefault<T, S>(this Result<T, S> result, T? defaultValue = default)
|
||||||
|
where S : struct, Enum
|
||||||
|
{
|
||||||
|
return result.IsSuccess ? result.Value : defaultValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static bool TryGetValue<T>(this Result<T> result, out T value)
|
||||||
|
{
|
||||||
|
if (result.IsSuccess)
|
||||||
|
{
|
||||||
|
value = result.Value;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
value = default!;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static bool TryGetValue<T, S>(this Result<T, S> result, out T value)
|
||||||
|
where S : struct, Enum
|
||||||
|
{
|
||||||
|
if (result.IsSuccess)
|
||||||
|
{
|
||||||
|
value = result.Value;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
value = default!;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Result OnSuccess(this Result result, Action action)
|
||||||
|
{
|
||||||
|
if (result.IsSuccess)
|
||||||
|
{
|
||||||
|
action();
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Result<T> OnSuccess<T>(this Result<T> result, Action<T> action)
|
||||||
|
{
|
||||||
|
if (result.IsSuccess)
|
||||||
|
{
|
||||||
|
action(result.Value);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Result<T, E> OnSuccess<T, E>(this Result<T, E> result, Action<T> action)
|
||||||
|
where E : struct, Enum
|
||||||
|
{
|
||||||
|
if (result.IsSuccess)
|
||||||
|
{
|
||||||
|
action(result.Value);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Result OnFailed(this Result result, Action<string?> action)
|
||||||
|
{
|
||||||
|
if (result.IsFailure)
|
||||||
|
{
|
||||||
|
action(result.Message);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Result<T> OnFailed<T>(this Result<T> result, Action<string?> action)
|
||||||
|
{
|
||||||
|
if (result.IsFailure)
|
||||||
|
{
|
||||||
|
action(result.Message);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Result<T, E> OnFailed<T, E>(this Result<T, E> result, Action<E> action)
|
||||||
|
where E : struct, Enum
|
||||||
|
{
|
||||||
|
if (result.IsFailure)
|
||||||
|
{
|
||||||
|
action(result.Error);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Result<U> Then<T, U>(this Result<T> result, Func<T, Result<U>> func)
|
||||||
|
{
|
||||||
|
if (result.IsFailure)
|
||||||
|
{
|
||||||
|
return Result<U>.Failure(result.Message);
|
||||||
|
}
|
||||||
|
|
||||||
|
return func(result.Value);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Result<U, E> Then<T, U, E>(this Result<T, E> result, Func<T, Result<U, E>> func)
|
||||||
|
where E : struct, Enum
|
||||||
|
{
|
||||||
|
if (result.IsFailure)
|
||||||
|
{
|
||||||
|
return Result<U, E>.Failure(result.Error);
|
||||||
|
}
|
||||||
|
|
||||||
|
return func(result.Value);
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user