SPMD SIMD math library & lock-free job system integration

- Add new SPMD SIMD math project with scalar/vector lanes
- Integrate SPMD jobs and scheduling into job system
- Implement lock-free job dependency management
- Update math functions for .NET 10 and SIMD performance
- Add SPMD benchmarks, compress-store tests, and race tests
- Introduce generic Result<T> error handling utilities
- Solution/project file updates and code cleanup
This commit is contained in:
2026-02-11 22:44:30 +09:00
parent c36405645b
commit a9c143c2a2
22 changed files with 3433 additions and 221 deletions

View File

@@ -1,4 +1,4 @@
namespace Misaki.HighPerformance.Jobs; namespace Misaki.HighPerformance.Jobs;
/// <summary> /// <summary>
/// Represents a job that performs a single unit of work. /// Represents a job that performs a single unit of work.
@@ -24,3 +24,24 @@ public interface IJobParallelFor
/// <param name="threadIndex">The index of the thread executing the job, useful for thread-specific operations.</param> /// <param name="threadIndex">The index of the thread executing the job, useful for thread-specific operations.</param>
void Execute(int loopIndex, int threadIndex); void Execute(int loopIndex, int threadIndex);
} }
public static class IJobExtensions
{
public static void Run<T>(this ref T job, int threadIndex)
where T : unmanaged, IJob
{
job.Execute(threadIndex);
}
}
public static class IJobParallelForExtensions
{
public static void Run<T>(this ref T job, int totalIterations, int threadIndex)
where T : unmanaged, IJobParallelFor
{
for (var i = 0; i < totalIterations; i++)
{
job.Execute(i, threadIndex);
}
}
}

View File

@@ -1,18 +1,21 @@
namespace Misaki.HighPerformance.Jobs; namespace Misaki.HighPerformance.Jobs;
public readonly struct JobHandle : IEquatable<JobHandle> public readonly struct JobHandle : IEquatable<JobHandle>
{ {
internal readonly int _id; private readonly int _id;
internal readonly int _generation; private readonly int _generation;
public static JobHandle Invalid => new(-1, -1); public int ID => _id - 1;
public int Generation => _generation - 1;
public static JobHandle Invalid => default;
public bool IsValid => this != Invalid; public bool IsValid => this != Invalid;
internal JobHandle(int id, int generation) internal JobHandle(int id, int generation)
{ {
_id = id; _id = id + 1;
_generation = generation; _generation = generation + 1;
} }
public bool Equals(JobHandle other) public bool Equals(JobHandle other)

View File

@@ -87,7 +87,7 @@ public interface IJobScheduler
/// <param name="threadIndex">The index of the thread that will execute the job. This is used to assign thread-specific data. Use -1 to allow any thread to execute the job.</param> /// <param name="threadIndex">The index of the thread that will execute the job. This is used to assign thread-specific data. Use -1 to allow any thread to execute the job.</param>
/// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job. /// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job.
/// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns> /// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns>
public JobHandle ScheduleParallel<T>(ref T job, int totalIteration, int batchSize, int threadIndex) JobHandle ScheduleParallel<T>(ref T job, int totalIteration, int batchSize, int threadIndex)
where T : unmanaged, IJobParallelFor; where T : unmanaged, IJobParallelFor;
/// <summary> /// <summary>
@@ -100,7 +100,7 @@ public interface IJobScheduler
/// <param name="threadIndex">The index of the thread that will execute the job. This is used to assign thread-specific data. Use -1 to allow any thread to execute the job.</param> /// <param name="threadIndex">The index of the thread that will execute the job. This is used to assign thread-specific data. Use -1 to allow any thread to execute the job.</param>
/// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job. /// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job.
/// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns> /// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns>
public JobHandle ScheduleParallel<T>(ref T job, int totalIteration, int batchSize, JobHandle dependency) JobHandle ScheduleParallel<T>(ref T job, int totalIteration, int batchSize, JobHandle dependency)
where T : unmanaged, IJobParallelFor; where T : unmanaged, IJobParallelFor;
/// <summary> /// <summary>
@@ -113,7 +113,7 @@ public interface IJobScheduler
/// <param name="threadIndex">The index of the thread that will execute the job. This is used to assign thread-specific data. Use -1 to allow any thread to execute the job.</param> /// <param name="threadIndex">The index of the thread that will execute the job. This is used to assign thread-specific data. Use -1 to allow any thread to execute the job.</param>
/// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job. /// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job.
/// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns> /// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns>
public JobHandle ScheduleParallel<T>(ref T job, int totalIteration, int batchSize) JobHandle ScheduleParallel<T>(ref T job, int totalIteration, int batchSize)
where T : unmanaged, IJobParallelFor; where T : unmanaged, IJobParallelFor;
/// <summary> /// <summary>
@@ -122,7 +122,7 @@ public interface IJobScheduler
/// <param name="dependencies">A collection of <see cref="JobHandle"/> instances representing the dependencies to combine.</param> /// <param name="dependencies">A collection of <see cref="JobHandle"/> instances representing the dependencies to combine.</param>
/// <returns>A <see cref="JobHandle"/> that represents the combined dependencies. The returned handle can be used to ensure /// <returns>A <see cref="JobHandle"/> that represents the combined dependencies. The returned handle can be used to ensure
/// that all specified dependencies are completed before proceeding.</returns> /// that all specified dependencies are completed before proceeding.</returns>
public JobHandle CombineDependencies(params ReadOnlySpan<JobHandle> dependencies); JobHandle CombineDependencies(params ReadOnlySpan<JobHandle> dependencies);
/// <summary> /// <summary>
/// Retrieves the current status of a job identified by the specified handle. /// Retrieves the current status of a job identified by the specified handle.
@@ -130,13 +130,13 @@ public interface IJobScheduler
/// <param name="handle">The handle representing the job whose status is to be retrieved. The handle must be valid.</param> /// <param name="handle">The handle representing the job whose status is to be retrieved. The handle must be valid.</param>
/// <returns>The current status of the job as a <see cref="JobState"/> value. /// <returns>The current status of the job as a <see cref="JobState"/> value.
/// Returns <see cref="JobState.Invalid"/> if the handle is invalid or the job does not exist.</returns> /// Returns <see cref="JobState.Invalid"/> if the handle is invalid or the job does not exist.</returns>
public JobState GetJobStatus(JobHandle handle); JobState GetJobStatus(JobHandle handle);
/// <summary> /// <summary>
/// Blocks the calling thread until the specified job is completed. /// Blocks the calling thread until the specified job is completed.
/// </summary> /// </summary>
/// <param name="handle">The handle of the job to wait for.</param> /// <param name="handle">The handle of the job to wait for.</param>
public void WaitComplete(JobHandle handle); void WaitComplete(JobHandle handle);
/// <summary> /// <summary>
/// Blocks the calling thread until all specified job handles have completed. /// Blocks the calling thread until all specified job handles have completed.
@@ -147,7 +147,7 @@ public interface IJobScheduler
/// concurrently from multiple threads.</remarks> /// concurrently from multiple threads.</remarks>
/// <param name="handles">A collection of job handles to wait for. Each handle represents an asynchronous job whose completion is awaited. /// <param name="handles">A collection of job handles to wait for. Each handle represents an asynchronous job whose completion is awaited.
/// The collection must not be empty.</param> /// The collection must not be empty.</param>
public void WaitAll(params ReadOnlySpan<JobHandle> handles); void WaitAll(params ReadOnlySpan<JobHandle> handles);
/// <summary> /// <summary>
/// Waits until any of the specified job handles has completed and returns the first completed handle. /// Waits until any of the specified job handles has completed and returns the first completed handle.
@@ -158,12 +158,14 @@ public interface IJobScheduler
/// <param name="handles">A read-only span containing the job handles to monitor for completion. Each handle represents a job whose /// <param name="handles">A read-only span containing the job handles to monitor for completion. Each handle represents a job whose
/// completion status will be checked.</param> /// completion status will be checked.</param>
/// <returns>The first job handle from the provided collection that has completed.</returns> /// <returns>The first job handle from the provided collection that has completed.</returns>
public JobHandle WaitAny(params ReadOnlySpan<JobHandle> handles); JobHandle WaitAny(params ReadOnlySpan<JobHandle> handles);
} }
public unsafe partial class JobScheduler public unsafe partial class JobScheduler
{ {
public static readonly TempJobAllocator* pTempAllocator; public static int MainThreadIndex => -1;
public static TempJobAllocator* pTempAllocator;
/// <summary> /// <summary>
/// Gets the allocation handle for the temporary job allocator. /// Gets the allocation handle for the temporary job allocator.
@@ -173,7 +175,7 @@ public unsafe partial class JobScheduler
/// </remarks> /// </remarks>
public static AllocationHandle TempAllocatorHandle => pTempAllocator->Handle; public static AllocationHandle TempAllocatorHandle => pTempAllocator->Handle;
static JobScheduler() public static void InitTempAllocator()
{ {
pTempAllocator = (TempJobAllocator*)MemoryUtility.Malloc((nuint)sizeof(TempJobAllocator)); pTempAllocator = (TempJobAllocator*)MemoryUtility.Malloc((nuint)sizeof(TempJobAllocator));
pTempAllocator->Init(); pTempAllocator->Init();
@@ -194,14 +196,18 @@ public unsafe partial class JobScheduler
/// </summary> /// </summary>
public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
{ {
private const int _SLEEP_THRESHOLD = 100; // Don't sleep indefinitely because that causes our 1ms job to become 15ms.
private const int _SLEEP_THRESHOLD = -1;
// Lock-Free constants: State mask (low 16 bits) and RC unit (1 << 16)
private const int _STATE_MASK = 0xFFFF;
private const int _RC_ONE = 0x10000;
private FreeList _jobDataAllocator; private FreeList _jobDataAllocator;
private readonly ConcurrentSlotMap<JobInfo> _jobInfoPool; private readonly ConcurrentSlotMap<JobInfo> _jobInfoPool;
private readonly ConcurrentQueue<JobHandle> _jobQueue; private readonly ConcurrentQueue<JobHandle> _jobQueue;
private readonly WorkerThread[] _workerThreads; private readonly WorkerThread[] _workerThreads;
private readonly Lock _lock;
private readonly SemaphoreSlim _workSignal; private readonly SemaphoreSlim _workSignal;
private readonly CancellationTokenSource _cts; private readonly CancellationTokenSource _cts;
@@ -221,7 +227,6 @@ public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
_jobInfoPool = new(); _jobInfoPool = new();
_jobQueue = new(); _jobQueue = new();
_lock = new();
_workSignal = new(0); _workSignal = new(0);
_cts = new(); _cts = new();
@@ -246,10 +251,11 @@ public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
private void EnqueueJobIfReady(JobHandle handle) private void EnqueueJobIfReady(JobHandle handle)
{ {
ref var jobInfo = ref _jobInfoPool.GetElementReferenceAt(handle._id, handle._generation, out var exist); ref var jobInfo = ref _jobInfoPool.GetElementReferenceAt(handle.ID, handle.Generation, out var exist);
if (exist && Volatile.Read(ref jobInfo.dependencyCount) == 0) if (exist && Volatile.Read(ref jobInfo.dependencyCount) == 0)
{ {
// Note: JobState.Created is 0, JobState.Scheduled is 1. We assume RC logic doesn't touch initial state (RC=0).
if (Interlocked.CompareExchange(ref jobInfo.state, JobState.Scheduled, JobState.Created) != JobState.Created) if (Interlocked.CompareExchange(ref jobInfo.state, JobState.Scheduled, JobState.Created) != JobState.Created)
{ {
return; return;
@@ -293,28 +299,74 @@ public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
continue; continue;
} }
lock (_lock) ref var depJobInfo = ref _jobInfoPool.GetElementReferenceAt(dependency.ID, dependency.Generation, out var exist);
if (!exist)
{ {
ref var depJobInfo = ref _jobInfoPool.GetElementReferenceAt(dependency._id, dependency._generation, out var exist); // Dependency does not exist (likely completed already)
if (!exist || Volatile.Read(ref Unsafe.As<JobState, int>(ref depJobInfo.state)) == (int)JobState.Completed) continue;
{
continue;
}
if (depJobInfo.dependentCount >= JobInfo.MAX_DEPENDENTS)
{
// Too many dependents
// TODO: Handle this case properly
_jobDataAllocator.Free(jobInfo.pJobData);
return JobHandle.Invalid;
}
depJobInfo.dependentsID[depJobInfo.dependentCount] = id;
depJobInfo.dependentsGeneration[depJobInfo.dependentCount] = generation;
depJobInfo.dependentCount++;
} }
Interlocked.Increment(ref infoInPool.dependencyCount); // Lock-free registration: Try to acquire "Reader Lock" by incrementing RC in high bits.
// If state is already Completed, we skip (dependency met).
var registered = false;
var completed = false;
var spin = new SpinWait();
while (true)
{
var stateVal = Volatile.Read(ref Unsafe.As<JobState, int>(ref depJobInfo.state));
var state = (JobState)(stateVal & _STATE_MASK);
if (state == JobState.Completed)
{
completed = true;
break;
}
// Attempt to increment RC (Reader Count)
if (Interlocked.CompareExchange(ref Unsafe.As<JobState, int>(ref depJobInfo.state), stateVal + _RC_ONE, stateVal) == stateVal)
{
// RC acquired. We are safe from "Remove" and state change.
var count = Interlocked.Increment(ref depJobInfo.dependentCount);
if (count <= JobInfo.MAX_DEPENDENTS)
{
// Safely write to the fixed buffer
depJobInfo.dependentsID[count - 1] = id;
depJobInfo.dependentsGeneration[count - 1] = generation;
registered = true;
}
// Release RC
Interlocked.Add(ref Unsafe.As<JobState, int>(ref depJobInfo.state), -_RC_ONE);
if (!registered)
{
// Failed to register because MAX_DEPENDENTS reached.
// Backtrack the counter increment.
Interlocked.Decrement(ref depJobInfo.dependentCount);
// Cleanup and fail
_jobDataAllocator.Free(jobInfo.pJobData);
return JobHandle.Invalid;
}
break;
}
spin.SpinOnce(-1);
}
if (!registered && !completed)
{
// Should not happen if logic is correct, unless loop logic changed
Interlocked.Increment(ref infoInPool.dependencyCount);
}
else if (registered)
{
// Successfully added dependency
Interlocked.Increment(ref infoInPool.dependencyCount);
}
// else: completed is true, registered is false -> Dependency is already done, so we don't increment our dependencyCount.
} }
EnqueueJobIfReady(handle); EnqueueJobIfReady(handle);
@@ -325,7 +377,20 @@ public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
internal bool HasWork() internal bool HasWork()
{ {
return !_jobQueue.IsEmpty || _workerThreads.Any(w => !w.LocalQueue.IsEmpty); if (!_jobQueue.IsEmpty)
{
return true;
}
for (var i = 0; i < _workerThreads.Length; i++)
{
if (!_workerThreads[i].LocalQueue.IsEmpty)
{
return true;
}
}
return false;
} }
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -360,7 +425,7 @@ public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
return ref Unsafe.NullRef<JobInfo>(); return ref Unsafe.NullRef<JobInfo>();
} }
return ref _jobInfoPool.GetElementReferenceAt(handle._id, handle._generation, out exist); return ref _jobInfoPool.GetElementReferenceAt(handle.ID, handle.Generation, out exist);
} }
internal void MarkJobComplete(JobHandle handle) internal void MarkJobComplete(JobHandle handle)
@@ -370,37 +435,77 @@ public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
return; return;
} }
ref var info = ref _jobInfoPool.GetElementReferenceAt(handle._id, handle._generation, out var exist); ref var info = ref _jobInfoPool.GetElementReferenceAt(handle.ID, handle.Generation, out var exist);
if (!exist) if (!exist)
{ {
return; return;
} }
if (Interlocked.CompareExchange(ref info.state, JobState.Completed, JobState.Running) != JobState.Running) // Lock-free Completion:
// 1. Transition State to Completed (preserving or setting upper bits?).
// Actually, we want to block new Readers. Setting state to Completed blocks new Readers.
// 2. Wait for existing Readers (RC == 0).
var spin = new SpinWait();
while (true)
{ {
return; var stateVal = Volatile.Read(ref Unsafe.As<JobState, int>(ref info.state));
var state = (JobState)(stateVal & _STATE_MASK);
if (state == JobState.Completed)
{
return; // Already completed (shouldn't happen for single-execution jobs)
}
if (state != JobState.Running)
{
// If in valid state (e.g. Scheduled?), we still assume we can complete it.
// Usually it should be Running.
}
// Construct new value: State=Completed, preserve RC (temporarily) or strictly replace only low bits?
// We set low bits to Completed. High bits (RC) remain.
var newState = (stateVal & ~_STATE_MASK) | (int)JobState.Completed;
if (Interlocked.CompareExchange(ref Unsafe.As<JobState, int>(ref info.state), newState, stateVal) == stateVal)
{
// Successfully set State to Completed. New readers will see Completed and back off.
// Now we must wait for existing readers to finish (RC to become 0).
while (true)
{
var current = Volatile.Read(ref Unsafe.As<JobState, int>(ref info.state));
if (((uint)current >> 16) == 0)
{
break; // RC is 0. Safe to proceed.
}
spin.SpinOnce(-1);
}
break;
}
spin.SpinOnce(-1);
} }
var dependentsToNotify = stackalloc JobHandle[JobInfo.MAX_DEPENDENTS]; // We now have exclusive access to dependentsID (no new readers, old readers finished).
var dependentCount = 0; // Safely capture dependents.
var dependentCount = info.dependentCount;
dependentCount = Math.Min(dependentCount, JobInfo.MAX_DEPENDENTS); // Safety cap
lock (_lock) // Use stackalloc to avoid allocation, but we'll copy to notify after freeing parent.
var dependentsToNotify = stackalloc JobHandle[dependentCount];
for (var i = 0; i < dependentCount; i++)
{ {
dependentCount = info.dependentCount; dependentsToNotify[i] = new JobHandle(info.dependentsID[i], info.dependentsGeneration[i]);
for (var i = 0; i < dependentCount; i++)
{
dependentsToNotify[i] = new JobHandle(info.dependentsID[i], info.dependentsGeneration[i]);
}
} }
_jobDataAllocator.Free(info.pJobData); _jobDataAllocator.Free(info.pJobData);
_jobInfoPool.Remove(handle._id, handle._generation); _jobInfoPool.Remove(handle.ID, handle.Generation);
for (var i = 0; i < dependentCount; i++) for (var i = 0; i < dependentCount; i++)
{ {
var depHandle = dependentsToNotify[i]; var depHandle = dependentsToNotify[i];
ref var depJobInfo = ref _jobInfoPool.GetElementReferenceAt(depHandle._id, depHandle._generation, out var depExist); ref var depJobInfo = ref _jobInfoPool.GetElementReferenceAt(depHandle.ID, depHandle.Generation, out var depExist);
if (depExist && Interlocked.Decrement(ref depJobInfo.dependencyCount) == 0) if (depExist && Interlocked.Decrement(ref depJobInfo.dependencyCount) == 0)
{ {
EnqueueJobIfReady(depHandle); EnqueueJobIfReady(depHandle);
@@ -437,8 +542,8 @@ public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
} }
public JobHandle Schedule<T>(ref T job, int threadIndex) public JobHandle Schedule<T>(ref T job, int threadIndex)
where T : unmanaged, IJob where T : unmanaged, IJob
=> Schedule(ref job, threadIndex, JobHandle.Invalid); => Schedule(ref job, threadIndex, JobHandle.Invalid);
public JobHandle Schedule<T>(ref T job, JobHandle dependency) public JobHandle Schedule<T>(ref T job, JobHandle dependency)
where T : unmanaged, IJob where T : unmanaged, IJob
@@ -519,13 +624,14 @@ public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
return JobState.Invalid; return JobState.Invalid;
} }
ref var jobInfo = ref _jobInfoPool.GetElementReferenceAt(handle._id, handle._generation, out var exist); ref var jobInfo = ref _jobInfoPool.GetElementReferenceAt(handle.ID, handle.Generation, out var exist);
if (!exist) if (!exist)
{ {
return JobState.Completed; // We assume completed if not found. Invalid state is reserved for error. return JobState.Completed; // We assume completed if not found. Invalid state is reserved for error.
} }
return (JobState)Volatile.Read(ref Unsafe.As<JobState, int>(ref jobInfo.state)); // Mask out the Reader Count (upper 16 bits) to return the actual State
return (JobState)(Volatile.Read(ref Unsafe.As<JobState, int>(ref jobInfo.state)) & _STATE_MASK);
} }
public void WaitComplete(JobHandle handle) public void WaitComplete(JobHandle handle)
@@ -536,9 +642,10 @@ public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
} }
var spin = new SpinWait(); var spin = new SpinWait();
while (_jobInfoPool.TryGetElement(handle._id, handle._generation, out var jobInfo)) while (_jobInfoPool.TryGetElement(handle.ID, handle.Generation, out var jobInfo))
{ {
if (jobInfo.state == JobState.Completed) // Mask out RC
if ((jobInfo.state & (JobState)_STATE_MASK) == JobState.Completed)
{ {
return; return;
} }
@@ -549,7 +656,6 @@ public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
public void WaitAll(params ReadOnlySpan<JobHandle> handles) public void WaitAll(params ReadOnlySpan<JobHandle> handles)
{ {
var sleepThreshold = _SLEEP_THRESHOLD * handles.Length;
var spin = new SpinWait(); var spin = new SpinWait();
while (true) while (true)
@@ -557,7 +663,7 @@ public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
var completedCount = 0; var completedCount = 0;
foreach (var handle in handles) foreach (var handle in handles)
{ {
if (!_jobInfoPool.Contains(handle._id, handle._generation)) if (!_jobInfoPool.Contains(handle.ID, handle.Generation))
{ {
completedCount++; completedCount++;
} }
@@ -568,26 +674,25 @@ public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
return; return;
} }
spin.SpinOnce(sleepThreshold); spin.SpinOnce(_SLEEP_THRESHOLD);
} }
} }
public JobHandle WaitAny(params ReadOnlySpan<JobHandle> handles) public JobHandle WaitAny(params ReadOnlySpan<JobHandle> handles)
{ {
var sleepThreshold = _SLEEP_THRESHOLD * handles.Length;
var spin = new SpinWait(); var spin = new SpinWait();
while (true) while (true)
{ {
foreach (var handle in handles) foreach (var handle in handles)
{ {
if (!_jobInfoPool.Contains(handle._id, handle._generation)) if (!_jobInfoPool.Contains(handle.ID, handle.Generation))
{ {
return handle; return handle;
} }
} }
spin.SpinOnce(sleepThreshold); spin.SpinOnce(_SLEEP_THRESHOLD);
} }
} }

View File

@@ -1,9 +1,11 @@
using System.Collections.Concurrent; using System.Collections.Concurrent;
namespace Misaki.HighPerformance.Jobs; namespace Misaki.HighPerformance.Jobs;
internal class WorkerThread : IDisposable internal class WorkerThread : IDisposable
{ {
private const int _MAX_STEAL_ATTEMPTS = 8;
private readonly int _index; private readonly int _index;
private readonly Thread _thread; private readonly Thread _thread;
private readonly ConcurrentQueue<JobHandle> _localQueue; private readonly ConcurrentQueue<JobHandle> _localQueue;
@@ -29,54 +31,55 @@ internal class WorkerThread : IDisposable
public void Start() => _thread.Start(); public void Start() => _thread.Start();
private JobHandle FindJob() private bool TryFindJob(out JobHandle handle)
{ {
var handle = JobHandle.Invalid; // 1. Check own local queue first
if (_localQueue.TryDequeue(out handle) if (_localQueue.TryDequeue(out handle))
|| _scheduler.TryStealJob(-1, out handle))
{ {
return handle; return true;
} }
while (true) // 2. Check global queue
if (_scheduler.TryStealJob(-1, out handle))
{
return true;
}
// 3. Bounded random work stealing from other workers
for (var i = 0; i < _MAX_STEAL_ATTEMPTS; i++)
{ {
var randomIndex = _random.Next(0, _scheduler.WorkerCount); var randomIndex = _random.Next(0, _scheduler.WorkerCount);
if (_scheduler.TryStealJob(randomIndex, out handle)) if (randomIndex != _index && _scheduler.TryStealJob(randomIndex, out handle))
{ {
return handle; return true;
} }
} }
handle = JobHandle.Invalid;
return false;
} }
private unsafe void WorkLoop() private unsafe void WorkLoop()
{ {
while (!_scheduler.IsCancellationRequested) while (!_scheduler.IsCancellationRequested)
{ {
var spinner = new SpinWait(); // Wait for work signal directly — the semaphore already acts as
for (var i = 0; i < 25; i++) // both a notification and a count of available work items.
{
spinner.SpinOnce(-1);
if (_scheduler.HasWork())
{
// Instead of goto, we still need to go through the WaitForWork to claim a release.
// This causes lock and lots of branches inside the SemaphoreSlim, which lost 0.03ms.
// goto DoWork;
break;
}
}
try try
{ {
_scheduler.WaitForWork(); _scheduler.WaitForWork();
} }
catch (OperationCanceledException) catch (OperationCanceledException)
{
break;
}
// After being signaled, try to find and execute a job.
if (!TryFindJob(out var handle))
{ {
continue; continue;
} }
//DoWork:
var handle = FindJob();
ref var jobInfo = ref _scheduler.GetJobInfoReference(handle, out var exist); ref var jobInfo = ref _scheduler.GetJobInfoReference(handle, out var exist);
if (exist) if (exist)

View File

@@ -0,0 +1,80 @@
using Misaki.HighPerformance.Jobs;
using System.Numerics;
using System.Runtime.CompilerServices;
namespace Misaki.HighPerformance.Mathematics.SPMD;
public interface IJobSPMD<TNumber>
where TNumber : unmanaged, INumber<TNumber>, IMinMaxValue<TNumber>, IBitwiseOperators<TNumber, TNumber, TNumber>
{
void Execute<TLane>(int baseIndex, int threadIndex)
where TLane : ISPMD<TLane, TNumber>;
}
internal struct SPMDJobWrapper<T, TNumber> : IJobParallelFor
where T : unmanaged, IJobSPMD<TNumber>
where TNumber : unmanaged, INumber<TNumber>, IMinMaxValue<TNumber>, IBitwiseOperators<TNumber, TNumber, TNumber>
{
public T innerJob;
public int totalCount;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Execute(int loopIndex, int threadIndex)
{
var baseIndex = loopIndex * WideLane<TNumber>.LaneWidth;
var remaining = totalCount - baseIndex;
if (remaining >= WideLane<TNumber>.LaneWidth)
{
innerJob.Execute<WideLane<TNumber>>(baseIndex, threadIndex);
}
else
{
for (var i = 0; i < remaining; i++)
{
innerJob.Execute<ScalarLane<TNumber>>(baseIndex + i, threadIndex);
}
}
}
}
public static class IJobParallelForSPMDExtensions
{
public static void Run<T, TNumber>(this ref T job, int totalCount, int threadIndex)
where T : unmanaged, IJobSPMD<TNumber>
where TNumber : unmanaged, INumber<TNumber>, IMinMaxValue<TNumber>, IBitwiseOperators<TNumber, TNumber, TNumber>
{
var iterations = (totalCount + WideLane<TNumber>.LaneWidth - 1) / WideLane<TNumber>.LaneWidth;
for (var loopIndex = 0; loopIndex < iterations; loopIndex++)
{
var baseIndex = loopIndex * WideLane<TNumber>.LaneWidth;
var remaining = totalCount - baseIndex;
if (remaining >= WideLane<TNumber>.LaneWidth)
{
job.Execute<WideLane<TNumber>>(baseIndex, threadIndex);
}
else
{
for (var i = 0; i < remaining; i++)
{
job.Execute<ScalarLane<TNumber>>(baseIndex + i, threadIndex);
}
}
}
}
public static JobHandle ScheduleParallelSPDM<T, TNumber>(this JobScheduler jobScheduler, ref T job, int totalCount, int batchSize, int threadIndex, JobHandle dependency)
where T : unmanaged, IJobSPMD<TNumber>
where TNumber : unmanaged, INumber<TNumber>, IMinMaxValue<TNumber>, IBitwiseOperators<TNumber, TNumber, TNumber>
{
var warper = new SPMDJobWrapper<T, TNumber>
{
innerJob = job,
totalCount = totalCount,
};
var iterations = (totalCount + WideLane<TNumber>.LaneWidth - 1) / WideLane<TNumber>.LaneWidth;
return jobScheduler.ScheduleParallel(ref warper, iterations, batchSize, threadIndex, dependency);
}
}

View File

@@ -0,0 +1,589 @@
using System.Numerics;
using System.Runtime.CompilerServices;
using static System.Runtime.InteropServices.JavaScript.JSType;
namespace Misaki.HighPerformance.Mathematics.SPMD;
/// <summary>
/// Common marker interface for SPMD lane types.
/// </summary>
public interface ISPMD
{
/// <summary>
/// Gets the number of lanes (vector width) for the SPMD implementation.
/// </summary>
static abstract int LaneWidth
{
get;
}
}
/// <summary>
/// Represents a single-lane or multi-lane (vectorized) SPMD value and the operations supported on it.
/// </summary>
/// <typeparam name="TSelf">The concrete SPMD lane type implementing this interface.</typeparam>
/// <typeparam name="TNumber">The underlying numeric element type.</typeparam>
public interface ISPMD<TSelf, TNumber> : ISPMD
where TSelf : ISPMD<TSelf, TNumber>
where TNumber : unmanaged, INumber<TNumber>, IMinMaxValue<TNumber>, IBitwiseOperators<TNumber, TNumber, TNumber>
{
/// <summary>
/// Gets a lane value where all lanes are set to numeric zero.
/// </summary>
static abstract TSelf Zero
{
get;
}
/// <summary>
/// Gets a lane value where all lanes are set to numeric one.
/// </summary>
static abstract TSelf One
{
get;
}
/// <summary>
/// Gets a lane value where all lanes are set to the minimum representable value of the underlying numeric type.
/// </summary>
static abstract TSelf MinValue
{
get;
}
/// <summary>
/// Gets a lane value where all lanes are set to the maximum representable value of the underlying numeric type.
/// </summary>
static abstract TSelf MaxValue
{
get;
}
/// <summary>
/// Gets the element value for the specified lane index.
/// </summary>
/// <param name="index">The zero-based lane index.</param>
TNumber this[int index]
{
get;
}
/// <summary>
/// Creates a lane value where all lanes are set to the specified value.
/// </summary>
/// <param name="value">The value to set for all lanes.</param>
/// <returns>The created lane value.</returns>
static abstract TSelf Create(TNumber value);
/// <summary>
/// Creates a new instance of the type from the specified sequence of numeric values.
/// </summary>
/// <param name="values">A parameter array of read-only spans containing the numeric values to use for initialization.</param>
/// <returns>A new instance of the type initialized with the provided numeric values.</returns>
static abstract TSelf Create(params ReadOnlySpan<TNumber> values);
/// <summary>
/// Creates a lane value from the specified vector.
/// </summary>
/// <param name="value">The vector to create the lane value from.</param>
/// <returns>The lane value built from the vector.</returns>
static abstract TSelf Create(Vector<TNumber> value);
/// <summary>
/// Creates a lane value with a sequence starting from the specified value with the given step.
/// </summary>
/// <param name="start">The starting value.</param>
/// <param name="step">The step value for the sequence.</param>
/// <returns>The lane value containing the arithmetic sequence.</returns>
/// <remarks>
/// Implementations may rely on vector creation helpers and assume that the resulting sequence length matches <see cref="LaneWidth"/>.
/// </remarks>
static abstract TSelf Sequence(TNumber start, TNumber step);
/// <summary>
/// Loads a lane value from the specified reference.
/// </summary>
/// <param name="value">The reference to load from.</param>
/// <returns>The loaded lane value.</returns>
static abstract TSelf Load(ref TNumber value);
/// <summary>
/// Loads a lane value from the specified pointer.
/// </summary>
/// <param name="pValue">The pointer to load from.</param>
/// <returns>The loaded lane value.</returns>
/// <remarks>
/// Unsafe pointer overloads are provided for scenarios where sequential lane data is already contiguous in memory.
/// </remarks>
static abstract unsafe TSelf Load(TNumber* pValue);
/// <summary>
/// Stores the lane value to the specified reference.
/// </summary>
/// <param name="destination">The reference to store to.</param>
void Store(ref TNumber destination);
/// <summary>
/// Stores the lane value to the specified pointer.
/// </summary>
/// <param name="pDestination">The pointer to store to.</param>
unsafe void Store(TNumber* pDestination);
/// <summary>
/// Compresses the data specified by the given mask and stores the compressed result in the provided destination
/// variable.
/// </summary>
/// <param name="mask">A mask value that determines which elements are included in the compression operation.</param>
/// <param name="destination">A reference to the variable where the compressed data will be stored.</param>
/// <returns>The number of elements written to the destination as a result of the compression. Returns 0 if no elements are compressed.</returns>
/// <remarks>
/// Implementations may use hardware-specific shuffle tables to reorder the selected lanes before storing, falling back to a scalar loop otherwise.
/// </remarks>
int CompressStore(TSelf mask, ref TNumber destination);
/// <summary>
/// Compresses the data specified by the given mask and stores the compressed result in the provided destination
/// variable.
/// </summary>
/// <param name="mask">A mask value that determines which elements are included in the compression operation.</param>
/// <param name="pDestination">A pointer to the variable where the compressed data will be stored.</param>
/// <returns>The number of elements written to the destination as a result of the compression. Returns 0 if no elements are compressed.</returns>
/// <remarks>
/// Implementations may use hardware-specific shuffle tables to reorder the selected lanes before storing, falling back to a scalar loop otherwise.
/// </remarks>
unsafe int CompressStore(TSelf mask, TNumber* pDestination);
/// <summary>
/// Converts the lane value to a vector.
/// </summary>
/// <returns>The backing vector representation.</returns>
Vector<TNumber> AsVector();
/// <summary>
/// Adds two lane values element-wise.
/// </summary>
/// <param name="a">The first lane value.</param>
/// <param name="b">The second lane value.</param>
/// <returns>The lane-wise sum.</returns>
static abstract TSelf operator +(TSelf a, TSelf b);
/// <summary>
/// Adds a lane value and a scalar element-wise.
/// </summary>
/// <param name="a">The lane value.</param>
/// <param name="b">The scalar value.</param>
/// <returns>The lane value with the scalar added to each element.</returns>
static abstract TSelf operator +(TSelf a, TNumber b);
/// <summary>
/// Subtracts two lane values element-wise.
/// </summary>
/// <param name="a">The first lane value.</param>
/// <param name="b">The second lane value.</param>
/// <returns>The lane-wise difference.</returns>
static abstract TSelf operator -(TSelf a, TSelf b);
/// <summary>
/// Subtracts a scalar from a lane value element-wise.
/// </summary>
/// <param name="a">The lane value.</param>
/// <param name="b">The scalar value.</param>
/// <returns>The lane value with the scalar subtracted from each element.</returns>
static abstract TSelf operator -(TSelf a, TNumber b);
/// <summary>
/// Multiplies two lane values element-wise.
/// </summary>
/// <param name="a">The first lane value.</param>
/// <param name="b">The second lane value.</param>
/// <returns>The lane-wise product.</returns>
static abstract TSelf operator *(TSelf a, TSelf b);
/// <summary>
/// Multiplies a lane value by a scalar element-wise.
/// </summary>
/// <param name="a">The lane value.</param>
/// <param name="b">The scalar value.</param>
/// <returns>The lane value scaled by the scalar.</returns>
static abstract TSelf operator *(TSelf a, TNumber b);
/// <summary>
/// Divides two lane values element-wise.
/// </summary>
/// <param name="a">The first lane value.</param>
/// <param name="b">The second lane value.</param>
/// <returns>The lane-wise quotient.</returns>
static abstract TSelf operator /(TSelf a, TSelf b);
/// <summary>
/// Divides a lane value by a scalar element-wise.
/// </summary>
/// <param name="a">The lane value.</param>
/// <param name="b">The scalar value.</param>
/// <returns>The lane value divided by the scalar.</returns>
static abstract TSelf operator /(TSelf a, TNumber b);
/// <summary>
/// Computes the modulus of two lane values element-wise.
/// </summary>
/// <param name="a">The first lane value.</param>
/// <param name="b">The second lane value.</param>
/// <returns>The lane-wise modulus.</returns>
static abstract TSelf operator %(TSelf a, TSelf b);
/// <summary>
/// Computes the modulus of a lane value and a scalar element-wise.
/// </summary>
/// <param name="a">The lane value.</param>
/// <param name="b">The scalar value.</param>
/// <returns>The lane value modulus scalar.</returns>
static abstract TSelf operator %(TSelf a, TNumber b);
/// <summary>
/// Negates the lane value element-wise.
/// </summary>
/// <param name="a">The lane value to negate.</param>
/// <returns>The negated lane value.</returns>
static abstract TSelf operator -(TSelf a);
/// <summary>
/// Computes the bitwise AND of two lane values element-wise.
/// </summary>
/// <param name="a">The first lane value.</param>
/// <param name="b">The second lane value.</param>
/// <returns>The result of the bitwise AND.</returns>
static abstract TSelf operator &(TSelf a, TSelf b);
/// <summary>
/// Computes the bitwise AND of a lane value and a scalar element-wise.
/// </summary>
/// <param name="a">The lane value.</param>
/// <param name="b">The scalar value.</param>
/// <returns>The result of the bitwise AND.</returns>
static abstract TSelf operator &(TSelf a, TNumber b);
/// <summary>
/// Computes the bitwise OR of two lane values element-wise.
/// </summary>
/// <param name="a">The first lane value.</param>
/// <param name="b">The second lane value.</param>
/// <returns>The result of the bitwise OR.</returns>
static abstract TSelf operator |(TSelf a, TSelf b);
/// <summary>
/// Computes the bitwise OR of a lane value and a scalar element-wise.
/// </summary>
/// <param name="a">The lane value.</param>
/// <param name="b">The scalar value.</param>
/// <returns>The result of the bitwise OR.</returns>
static abstract TSelf operator |(TSelf a, TNumber b);
/// <summary>
/// Computes the bitwise XOR of two lane values element-wise.
/// </summary>
/// <param name="a">The first lane value.</param>
/// <param name="b">The second lane value.</param>
/// <returns>The result of the bitwise XOR.</returns>
static abstract TSelf operator ^(TSelf a, TSelf b);
/// <summary>
/// Computes the bitwise XOR of a lane value and a scalar element-wise.
/// </summary>
/// <param name="a">The lane value.</param>
/// <param name="b">The scalar value.</param>
/// <returns>The result of the bitwise XOR.</returns>
static abstract TSelf operator ^(TSelf a, TNumber b);
/// <summary>
/// Computes the bitwise NOT of a lane value element-wise.
/// </summary>
/// <param name="a">The lane value.</param>
/// <returns>The bitwise complement of the lane value.</returns>
static abstract TSelf operator ~(TSelf a);
/// <summary>
/// Computes the absolute value of the lane value element-wise.
/// </summary>
/// <param name="x">The lane value.</param>
/// <returns>The absolute lane value.</returns>
static abstract TSelf Abs(TSelf value);
/// <summary>
/// Computes the floor of the lane value element-wise.
/// </summary>
/// <param name="x">The lane value.</param>
/// <returns>The lane value with each element rounded toward negative infinity.</returns>
static abstract TSelf Floor(TSelf value);
/// <summary>
/// Computes the fractional part of the lane value element-wise.
/// </summary>
/// <param name="x">The lane value.</param>
/// <returns>The fractional lane value.</returns>
static abstract TSelf Frac(TSelf value);
/// <summary>
/// Computes the square root of the lane value element-wise.
/// </summary>
/// <param name="x">The lane value.</param>
/// <returns>The square root lane value.</returns>
static abstract TSelf Sqrt(TSelf value);
/// <summary>
/// Performs linear interpolation between two lane values.
/// </summary>
/// <param name="a">The start lane value.</param>
/// <param name="b">The end lane value.</param>
/// <param name="t">The interpolation factor.</param>
/// <returns>The interpolated lane value.</returns>
static abstract TSelf Lerp(TSelf a, TSelf b, TSelf t);
/// <summary>
/// Computes a * b + c element-wise.
/// </summary>
/// <param name="a">The first multiplier.</param>
/// <param name="b">The second multiplier.</param>
/// <param name="c">The addend.</param>
/// <returns>The result of the fused multiply-add operation.</returns>
/// <remarks>
/// Float and double implementations should use fused multiply-add instructions when available for both accuracy and performance.
/// </remarks>
static abstract TSelf MultipleAdd(TSelf a, TSelf b, TSelf c);
/// <summary>
/// Returns the minimum of the two lane values element-wise.
/// </summary>
/// <param name="a">The first lane value.</param>
/// <param name="b">The second lane value.</param>
/// <returns>The lane value containing the minimum of each element.</returns>
static abstract TSelf Min(TSelf a, TSelf b);
/// <summary>
/// Returns the maximum of the two lane values element-wise.
/// </summary>
/// <param name="a">The first lane value.</param>
/// <param name="b">The second lane value.</param>
/// <returns>The lane value containing the maximum of each element.</returns>
static abstract TSelf Max(TSelf a, TSelf b);
/// <summary>
/// Clamps each element of the lane value between the specified minimum and maximum values.
/// </summary>
/// <param name="value">The lane value to clamp.</param>
/// <param name="min">The inclusive minimum.</param>
/// <param name="max">The inclusive maximum.</param>
/// <returns>The clamped lane value.</returns>
static abstract TSelf Clamp(TSelf value, TSelf min, TSelf max);
/// <summary>
/// Saturates each element in the lane value to the 0..1 range.
/// </summary>
/// <param name="value">The lane value to saturate.</param>
/// <returns>The saturated lane value.</returns>
static abstract TSelf Saturate(TSelf value);
/// <summary>
/// Computes the sine of each lane element.
/// </summary>
/// <param name="value">The source lane value.</param>
/// <returns>The sine of each lane element.</returns>
/// <remarks>
/// Implementations may rely on vectorized math intrinsics for float/double and approximate values for other types.
/// </remarks>
static abstract TSelf Sin(TSelf value);
/// <summary>
/// Computes the cosine of each lane element.
/// </summary>
/// <param name="value">The source lane value.</param>
/// <returns>The cosine of each lane element.</returns>
/// <remarks>
/// Implementations may rely on vectorized math intrinsics for float/double and approximate values for other types.
/// </remarks>
static abstract TSelf Cos(TSelf value);
/// <summary>
/// Computes both sine and cosine of each lane element.
/// </summary>
/// <param name="value">The source lane value.</param>
/// <returns>A tuple containing sine and cosine lane values.</returns>
/// <remarks>
/// Implementations returning both sin and cos simultaneously can reuse intermediate values for better performance.
/// </remarks>
static abstract (TSelf sin, TSelf cos) SinCos(TSelf value);
/// <summary>
/// Computes the tangent of each lane element.
/// </summary>
/// <param name="value">The source lane value.</param>
/// <returns>The tangent of each lane element.</returns>
/// <remarks>
/// Many implementations use polynomial approximations and assume the input is reduced to [-pi/4, pi/4] for accuracy.
/// </remarks>
static abstract TSelf Tan(TSelf value);
/// <summary>
/// Computes the arcsine of each lane element.
/// </summary>
/// <param name="value">The source lane value.</param>
/// <returns>The arcsine of each lane element.</returns>
/// <remarks>
/// Implementations typically assume input is within [-1, 1] and may use polynomial approximations for performance.
/// </remarks>
static abstract TSelf Asin(TSelf value);
/// <summary>
/// Computes the arccosine of each lane element.
/// </summary>
/// <param name="value">The source lane value.</param>
/// <returns>The arccosine of each lane element.</returns>
/// <remarks>
/// Input is expected to be in [-1, 1]; implementations often rely on approximation polynomials combined with range reduction.
/// </remarks>
static abstract TSelf Acos(TSelf value);
/// <summary>
/// Computes the arctangent of each lane element.
/// </summary>
/// <param name="value">The source lane value.</param>
/// <returns>The arctangent of each lane element.</returns>
/// <remarks>
/// Polynomial approximations with restricted input ranges are commonly used for performance-sensitive implementations.
/// </remarks>
static abstract TSelf Atan(TSelf value);
/// <summary>
/// Computes the arctangent of y/x for each lane element.
/// </summary>
/// <param name="y">The numerator lane value.</param>
/// <param name="x">The denominator lane value.</param>
/// <returns>The arctangent of each lane pair.</returns>
/// <remarks>
/// Implementations often rely on quadrant-aware polynomial routines and assume inputs are finite to avoid NaNs.
/// </remarks>
static abstract TSelf Atan2(TSelf y, TSelf x);
/// <summary>
/// Raises each lane element to the specified power.
/// </summary>
/// <param name="x">The base lane value.</param>
/// <param name="y">The exponent lane value. Cannot be negative.</param>
/// <returns>The power result for each lane.</returns>
static abstract TSelf Pow(TSelf x, TSelf y);
/// <summary>
/// Computes the exponential of each lane element.
/// </summary>
/// <param name="value">The source lane value.</param>
/// <returns>The exponential of each lane element.</returns>
/// <remarks>
/// Float and double implementations typically call into vectorized exp intrinsics; other types may fall back to scalar paths.
/// </remarks>
static abstract TSelf Exp(TSelf value);
/// <summary>
/// Computes 2 raised to each lane element.
/// </summary>
/// <param name="value">The source lane value.</param>
/// <returns>The base-2 exponential of each lane element.</returns>
/// <remarks>
/// This can be implemented via <see cref="Exp(TSelf)"/> when no dedicated base-2 intrinsic exists.
/// </remarks>
static abstract TSelf Exp2(TSelf value);
/// <summary>
/// Computes the natural logarithm of each lane element.
/// </summary>
/// <param name="value">The source lane value.</param>
/// <returns>The natural logarithm of each lane element.</returns>
/// <remarks>
/// Vectorized logarithm instructions may only exist for floating-point types; other types should mimic the scalar behavior.
/// </remarks>
static abstract TSelf Log(TSelf value);
/// <summary>
/// Computes the base-2 logarithm of each lane element.
/// </summary>
/// <param name="value">The source lane value.</param>
/// <returns>The base-2 logarithm of each lane element.</returns>
/// <remarks>
/// If a dedicated base-2 intrinsic is unavailable, the implementation may compute <c>Log(value)/Log(2)</c>.
/// </remarks>
static abstract TSelf Log2(TSelf value);
/// <summary>
/// Computes the ceiling of each lane element.
/// </summary>
/// <param name="x">The lane value.</param>
/// <returns>The smallest integral value greater than or equal to each element.</returns>
/// <remarks>
/// Implementations should use <see cref="Vector"/> helpers for floating-point types when available.
/// </remarks>
static abstract TSelf Ceil(TSelf value);
/// <summary>
/// Rounds each lane element to the nearest integer value.
/// </summary>
/// <param name="x">The lane value.</param>
/// <returns>The rounded lane value.</returns>
/// <remarks>
/// Implementations should prefer vectorized round intrinsics for floating-point implementations.
/// </remarks>
static abstract TSelf Round(TSelf value);
/// <summary>
/// Truncates each lane element toward zero.
/// </summary>
/// <param name="x">The lane value.</param>
/// <returns>The truncated lane value.</returns>
/// <remarks>
/// Floating-point truncation typically maps to <see cref="Vector.Truncate(Vector{TNumber})"/>.
/// </remarks>
static abstract TSelf Trunc(TSelf value);
/// <summary>
/// Returns the sign of each lane element.
/// </summary>
/// <param name="x">The lane value.</param>
/// <returns>-1, 0, or 1 per lane.</returns>
static abstract TSelf Sign(TSelf value);
/// <summary>
/// Copies the sign of the second lane value to the magnitude of the first.
/// </summary>
/// <param name="magnitude">The magnitude lane value.</param>
/// <param name="sign">The sign lane value.</param>
/// <returns>The result of merging magnitude with sign.</returns>
static abstract TSelf CopySign(TSelf magnitude, TSelf sign);
/// <summary>
/// Computes the reciprocal of each lane element.
/// </summary>
/// <param name="x">The lane value.</param>
/// <returns>The reciprocal lane value.</returns>
/// <remarks>
/// Fast paths may use <c>Sse.Reciprocal</c> or <c>Avx.Reciprocal</c> when <c>TNumber</c> is <c>float</c>.
/// </remarks>
static abstract TSelf Rcp(TSelf value);
/// <summary>
/// Computes the reciprocal square root of each lane element.
/// </summary>
/// <param name="x">The lane value.</param>
/// <returns>The reciprocal square root lane value.</returns>
/// <remarks>
/// Float implementations may prefer hardware reciprocal-sqrt intrinsics and fallback to <c>Create(TNumber.One)/Sqrt(x)</c> otherwise.
/// </remarks>
static abstract TSelf Rsqrt(TSelf value);
/// <summary>
/// Selects values from two lane values based on a condition mask.
/// </summary>
/// <param name="conditionMask">The condition mask.</param>
/// <param name="ifTrue">The value to select if true.</param>
/// <param name="ifFalse">The value to select if false.</param>
/// <returns>The selected lane value.</returns>
static abstract TSelf Select(TSelf conditionMask, TSelf ifTrue, TSelf ifFalse);
/// <summary>
/// Compares two lane values for greater than element-wise.
/// </summary>
/// <param name="a">The first lane value.</param>
/// <param name="b">The second lane value.</param>
/// <returns>The mask representing the greater than comparison result.</returns>
static abstract TSelf GreaterThan(TSelf a, TSelf b);
/// <summary>
/// Compares two lane values for greater than or equal element-wise.
/// </summary>
/// <param name="a">The first lane value.</param>
/// <param name="b">The second lane value.</param>
/// <returns>The mask representing the greater than or equal comparison result.</returns>
static abstract TSelf GreaterThanOrEqual(TSelf a, TSelf b);
/// <summary>
/// Compares two lane values for less than element-wise.
/// </summary>
/// <param name="a">The first lane value.</param>
/// <param name="b">The second lane value.</param>
/// <returns>The mask representing the less than comparison result.</returns>
static abstract TSelf LessThan(TSelf a, TSelf b);
/// <summary>
/// Compares two lane values for less than or equal element-wise.
/// </summary>
/// <param name="a">The first lane value.</param>
/// <param name="b">The second lane value.</param>
/// <returns>The mask representing the less than or equal comparison result.</returns>
static abstract TSelf LessThanOrEqual(TSelf a, TSelf b);
/// <summary>
/// Compares two lane values for equality element-wise.
/// </summary>
/// <param name="a">The first lane value.</param>
/// <param name="b">The second lane value.</param>
/// <returns>The mask representing the equality comparison result.</returns>
static abstract TSelf Equal(TSelf a, TSelf b);
/// <summary>
/// Checks if any lane in the mask is true.
/// </summary>
/// <param name="mask">The mask to check.</param>
/// <returns>True if any lane is true; otherwise, false.</returns>
static abstract bool Any(TSelf mask);
/// <summary>
/// Checks if all lanes in the mask are true.
/// </summary>
/// <param name="mask">The mask to check.</param>
/// <returns>True if all lanes are true; otherwise, false.</returns>
static abstract bool All(TSelf mask);
/// <summary>
/// Checks if no lanes in the mask are true.
/// </summary>
/// <param name="mask">The mask to check.</param>
/// <returns>True if no lanes are true; otherwise, false.</returns>
static abstract bool None(TSelf mask);
}

View File

@@ -0,0 +1,14 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\Misaki.HighPerformance.Jobs\Misaki.HighPerformance.Jobs.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,564 @@
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace Misaki.HighPerformance.Mathematics.SPMD;
[StructLayout(LayoutKind.Sequential)]
public readonly unsafe struct ScalarLane<T> : ISPMD<ScalarLane<T>, T>
where T : unmanaged, INumber<T>, IMinMaxValue<T>, IBitwiseOperators<T, T, T>
{
public readonly T value;
public static int LaneWidth
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get => 1;
}
public static ScalarLane<T> Zero
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get => new(T.Zero);
}
public static ScalarLane<T> One
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get => new(T.One);
}
public static ScalarLane<T> MinValue
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get => new(T.MinValue);
}
public static ScalarLane<T> MaxValue
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get => new(T.MaxValue);
}
public readonly T this[int index]
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get => value;
}
public ScalarLane(T value)
{
this.value = value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Create(T value) => new(value);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Create(params ReadOnlySpan<T> values) => new(values[0]);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Create(Vector<T> value) => new(value[0]);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Sequence(T start, T step) => new(start);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Load(ref T value) => new(value);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Load(T* pValue) => new(*pValue);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public readonly void Store(ref T destination) => destination = value;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public readonly void Store(T* pDestination) => *pDestination = value;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public int CompressStore(ScalarLane<T> mask, ref T destination)
{
return CompressStore(mask, (T*)Unsafe.AsPointer(in destination));
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public int CompressStore(ScalarLane<T> mask, T* pDestination)
{
if (mask.value != T.Zero)
{
*pDestination = value;
return 1;
}
return 0;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public readonly Vector<T> AsVector() => Vector.Create(value);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> operator +(ScalarLane<T> a, ScalarLane<T> b) => new(a.value + b.value);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> operator +(ScalarLane<T> a, T b) => new(a.value + b);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> operator -(ScalarLane<T> a, ScalarLane<T> b) => new(a.value - b.value);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> operator -(ScalarLane<T> a, T b) => new(a.value - b);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> operator *(ScalarLane<T> a, ScalarLane<T> b) => new(a.value * b.value);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> operator *(ScalarLane<T> a, T b) => new(a.value * b);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> operator /(ScalarLane<T> a, ScalarLane<T> b) => new(a.value / b.value);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> operator /(ScalarLane<T> a, T b) => new(a.value / b);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> operator %(ScalarLane<T> a, ScalarLane<T> b) => new(a.value % b.value);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> operator %(ScalarLane<T> a, T b) => new(a.value % b);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> operator -(ScalarLane<T> a) => new(-a.value);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> operator &(ScalarLane<T> a, ScalarLane<T> b) => new(a.value & b.value);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> operator &(ScalarLane<T> a, T b) => new(a.value & b);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> operator |(ScalarLane<T> a, ScalarLane<T> b) => new(a.value | b.value);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> operator |(ScalarLane<T> a, T b) => new(a.value | b);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> operator ^(ScalarLane<T> a, ScalarLane<T> b) => new(a.value ^ b.value);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> operator ^(ScalarLane<T> a, T b) => new(a.value ^ b);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> operator ~(ScalarLane<T> a) => new(~a.value);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Abs(ScalarLane<T> value) => new(T.Abs(value.value));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Floor(ScalarLane<T> value)
{
// Note: INumber<T> does not provide Floor method, so we need to handle float and double specifically.
// This is acceptable for performance because JIT generates specialized code for each T as long as they are struct.
// Which mean for ScalarLane<float>, typeof(T) == typeof(float) is always true and jit will optimize away the other branches.
if (typeof(T) == typeof(float))
{
var f = Unsafe.As<ScalarLane<T>, float>(ref value);
var result = MathF.Floor(f);
return Unsafe.As<float, ScalarLane<T>>(ref result);
}
else if (typeof(T) == typeof(double))
{
var d = Unsafe.As<ScalarLane<T>, double>(ref value);
var result = Math.Floor(d);
return Unsafe.As<double, ScalarLane<T>>(ref result);
}
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Frac(ScalarLane<T> value) => new(value.value - T.CreateTruncating(value.value));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Sqrt(ScalarLane<T> value)
{
if (typeof(T) == typeof(float))
{
var f = Unsafe.As<ScalarLane<T>, float>(ref value);
var result = MathF.Sqrt(f);
return Unsafe.As<float, ScalarLane<T>>(ref result);
}
else if (typeof(T) == typeof(double))
{
var d = Unsafe.As<ScalarLane<T>, double>(ref value);
var result = Math.Sqrt(d);
return Unsafe.As<double, ScalarLane<T>>(ref result);
}
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Lerp(ScalarLane<T> a, ScalarLane<T> b, ScalarLane<T> t) => new(a.value + (b.value - a.value) * t.value);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> MultipleAdd(ScalarLane<T> a, ScalarLane<T> b, ScalarLane<T> c) => new(T.MultiplyAddEstimate(a.value, b.value, c.value));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Min(ScalarLane<T> a, ScalarLane<T> b) => new(T.Min(a.value, b.value));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Max(ScalarLane<T> a, ScalarLane<T> b) => new(T.Max(a.value, b.value));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Clamp(ScalarLane<T> value, ScalarLane<T> min, ScalarLane<T> max) => new(T.Clamp(value.value, min.value, max.value));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Saturate(ScalarLane<T> value) => Clamp(value, new(T.Zero), new(T.One));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Sin(ScalarLane<T> value)
{
if (typeof(T) == typeof(float))
{
var f = Unsafe.As<ScalarLane<T>, float>(ref value);
var result = MathF.Sin(f);
return Unsafe.As<float, ScalarLane<T>>(ref result);
}
else if (typeof(T) == typeof(double))
{
var d = Unsafe.As<ScalarLane<T>, double>(ref value);
var result = Math.Sin(d);
return Unsafe.As<double, ScalarLane<T>>(ref result);
}
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Cos(ScalarLane<T> value)
{
if (typeof(T) == typeof(float))
{
var f = Unsafe.As<ScalarLane<T>, float>(ref value);
var result = MathF.Cos(f);
return Unsafe.As<float, ScalarLane<T>>(ref result);
}
else if (typeof(T) == typeof(double))
{
var d = Unsafe.As<ScalarLane<T>, double>(ref value);
var result = Math.Cos(d);
return Unsafe.As<double, ScalarLane<T>>(ref result);
}
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static (ScalarLane<T> sin, ScalarLane<T> cos) SinCos(ScalarLane<T> value) => (Sin(value), Cos(value));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Tan(ScalarLane<T> value)
{
if (typeof(T) == typeof(float))
{
var f = Unsafe.As<ScalarLane<T>, float>(ref value);
var result = MathF.Tan(f);
return Unsafe.As<float, ScalarLane<T>>(ref result);
}
else if (typeof(T) == typeof(double))
{
var d = Unsafe.As<ScalarLane<T>, double>(ref value);
var result = Math.Tan(d);
return Unsafe.As<double, ScalarLane<T>>(ref result);
}
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Asin(ScalarLane<T> value)
{
if (typeof(T) == typeof(float))
{
var f = Unsafe.As<ScalarLane<T>, float>(ref value);
var result = MathF.Asin(f);
return Unsafe.As<float, ScalarLane<T>>(ref result);
}
else if (typeof(T) == typeof(double))
{
var d = Unsafe.As<ScalarLane<T>, double>(ref value);
var result = Math.Asin(d);
return Unsafe.As<double, ScalarLane<T>>(ref result);
}
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Acos(ScalarLane<T> value)
{
if (typeof(T) == typeof(float))
{
var f = Unsafe.As<ScalarLane<T>, float>(ref value);
var result = MathF.Acos(f);
return Unsafe.As<float, ScalarLane<T>>(ref result);
}
else if (typeof(T) == typeof(double))
{
var d = Unsafe.As<ScalarLane<T>, double>(ref value);
var result = Math.Acos(d);
return Unsafe.As<double, ScalarLane<T>>(ref result);
}
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Atan(ScalarLane<T> value)
{
if (typeof(T) == typeof(float))
{
var f = Unsafe.As<ScalarLane<T>, float>(ref value);
var result = MathF.Atan(f);
return Unsafe.As<float, ScalarLane<T>>(ref result);
}
else if (typeof(T) == typeof(double))
{
var d = Unsafe.As<ScalarLane<T>, double>(ref value);
var result = Math.Atan(d);
return Unsafe.As<double, ScalarLane<T>>(ref result);
}
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Atan2(ScalarLane<T> y, ScalarLane<T> x)
{
if (typeof(T) == typeof(float))
{
var fy = Unsafe.As<ScalarLane<T>, float>(ref y);
var fx = Unsafe.As<ScalarLane<T>, float>(ref x);
var result = MathF.Atan2(fy, fx);
return Unsafe.As<float, ScalarLane<T>>(ref result);
}
else if (typeof(T) == typeof(double))
{
var dy = Unsafe.As<ScalarLane<T>, double>(ref y);
var dx = Unsafe.As<ScalarLane<T>, double>(ref x);
var result = Math.Atan2(dy, dx);
return Unsafe.As<double, ScalarLane<T>>(ref result);
}
return y;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Pow(ScalarLane<T> x, ScalarLane<T> y)
{
if (typeof(T) == typeof(float))
{
var fx = Unsafe.As<ScalarLane<T>, float>(ref x);
var fy = Unsafe.As<ScalarLane<T>, float>(ref y);
var result = MathF.Pow(fx, fy);
return Unsafe.As<float, ScalarLane<T>>(ref result);
}
else if (typeof(T) == typeof(double))
{
var dx = Unsafe.As<ScalarLane<T>, double>(ref x);
var dy = Unsafe.As<ScalarLane<T>, double>(ref y);
var result = Math.Pow(dx, dy);
return Unsafe.As<double, ScalarLane<T>>(ref result);
}
return x;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Exp(ScalarLane<T> value)
{
if (typeof(T) == typeof(float))
{
var f = Unsafe.As<ScalarLane<T>, float>(ref value);
var result = MathF.Exp(f);
return Unsafe.As<float, ScalarLane<T>>(ref result);
}
else if (typeof(T) == typeof(double))
{
var d = Unsafe.As<ScalarLane<T>, double>(ref value);
var result = Math.Log(d);
return Unsafe.As<double, ScalarLane<T>>(ref result);
}
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Exp2(ScalarLane<T> value)
{
return Pow(new ScalarLane<T>(T.CreateChecked(2)), value);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Log(ScalarLane<T> value)
{
if (typeof(T) == typeof(float))
{
var f = Unsafe.As<ScalarLane<T>, float>(ref value);
var result = MathF.Log(f);
return Unsafe.As<float, ScalarLane<T>>(ref result);
}
else if (typeof(T) == typeof(double))
{
var d = Unsafe.As<ScalarLane<T>, double>(ref value);
var result = Math.Log(d);
return Unsafe.As<double, ScalarLane<T>>(ref result);
}
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Log2(ScalarLane<T> value)
{
if (typeof(T) == typeof(float))
{
var f = Unsafe.As<ScalarLane<T>, float>(ref value);
var result = MathF.Log2(f);
return Unsafe.As<float, ScalarLane<T>>(ref result);
}
else if (typeof(T) == typeof(double))
{
var d = Unsafe.As<ScalarLane<T>, double>(ref value);
var result = Math.Log2(d);
return Unsafe.As<double, ScalarLane<T>>(ref result);
}
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Ceil(ScalarLane<T> value)
{
if (typeof(T) == typeof(float))
{
var f = Unsafe.As<ScalarLane<T>, float>(ref value);
var result = MathF.Ceiling(f);
return Unsafe.As<float, ScalarLane<T>>(ref result);
}
else if (typeof(T) == typeof(double))
{
var d = Unsafe.As<ScalarLane<T>, double>(ref value);
var result = Math.Ceiling(d);
return Unsafe.As<double, ScalarLane<T>>(ref result);
}
else if (typeof(T) == typeof(decimal))
{
var d = Unsafe.As<ScalarLane<T>, decimal>(ref value);
var result = Math.Ceiling(d);
return Unsafe.As<decimal, ScalarLane<T>>(ref result);
}
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Round(ScalarLane<T> value)
{
if (typeof(T) == typeof(float))
{
var f = Unsafe.As<ScalarLane<T>, float>(ref value);
var result = MathF.Round(f);
return Unsafe.As<float, ScalarLane<T>>(ref result);
}
else if (typeof(T) == typeof(double))
{
var d = Unsafe.As<ScalarLane<T>, double>(ref value);
var result = Math.Round(d);
return Unsafe.As<double, ScalarLane<T>>(ref result);
}
else if (typeof(T) == typeof(decimal))
{
var d = Unsafe.As<ScalarLane<T>, decimal>(ref value);
var result = Math.Round(d);
return Unsafe.As<decimal, ScalarLane<T>>(ref result);
}
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Trunc(ScalarLane<T> value)
{
if (typeof(T) == typeof(float))
{
var f = Unsafe.As<ScalarLane<T>, float>(ref value);
var result = MathF.Truncate(f);
return Unsafe.As<float, ScalarLane<T>>(ref result);
}
else if (typeof(T) == typeof(double))
{
var d = Unsafe.As<ScalarLane<T>, double>(ref value);
var result = Math.Truncate(d);
return Unsafe.As<double, ScalarLane<T>>(ref result);
}
else if (typeof(T) == typeof(decimal))
{
var d = Unsafe.As<ScalarLane<T>, decimal>(ref value);
var result = Math.Truncate(d);
return Unsafe.As<decimal, ScalarLane<T>>(ref result);
}
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Sign(ScalarLane<T> value) => new((value.value > T.Zero) ? T.One : (value.value < T.Zero) ? ~T.Zero : T.Zero);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> CopySign(ScalarLane<T> magnitude, ScalarLane<T> sign) => new(T.CopySign(magnitude.value, sign.value));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Rcp(ScalarLane<T> value) => new(T.One / value.value);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Rsqrt(ScalarLane<T> value) => Sqrt(Rcp(value));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Select(ScalarLane<T> conditionMask, ScalarLane<T> ifTrue, ScalarLane<T> ifFalse) => new(conditionMask.value != T.Zero ? ifTrue.value : ifFalse.value);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> GreaterThan(ScalarLane<T> a, ScalarLane<T> b) => new(a.value > b.value ? ~T.Zero : T.Zero);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> GreaterThanOrEqual(ScalarLane<T> a, ScalarLane<T> b) => new(a.value >= b.value ? ~T.Zero : T.Zero);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> LessThan(ScalarLane<T> a, ScalarLane<T> b) => new(a.value < b.value ? ~T.Zero : T.Zero);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> LessThanOrEqual(ScalarLane<T> a, ScalarLane<T> b) => new(a.value <= b.value ? ~T.Zero : T.Zero);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ScalarLane<T> Equal(ScalarLane<T> a, ScalarLane<T> b) => new(a.value == b.value ? ~T.Zero : T.Zero);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool Any(ScalarLane<T> mask) => mask.value != T.Zero;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool All(ScalarLane<T> mask) => mask.value != T.Zero;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool None(ScalarLane<T> mask) => mask.value == T.Zero;
public override string ToString()
{
return value.ToString() ?? string.Empty;
}
}

View File

@@ -0,0 +1,206 @@
using System.Runtime.InteropServices;
namespace Misaki.HighPerformance.Mathematics.SPMD;
public static unsafe class ShuffleTableGenerator
{
public static uint* ComputeShuffleTable512_32Bit()
{
const nuint entryCount = 512;
const int elementCount = 16;
// Align to 64 bytes for AVX-512 performance
var table = (uint*)NativeMemory.AlignedAlloc(entryCount * elementCount * sizeof(uint), 64);
for (var mask = 0u; mask < entryCount; mask++)
{
// We are filling 16 integers for this mask
var pRow = table + (mask * elementCount);
var outputIndex = 0;
// 1. Pack the valid indices to the front
for (var bit = 0; bit < 16; bit++)
{
// Check if the i-th bit is set
if ((mask & (1 << bit)) != 0)
{
pRow[outputIndex] = (uint)bit; // Write the Source Index
outputIndex++;
}
}
// 2. Fill the remaining slots (Pad with 0 or similar)
// It doesn't strictly matter what these are, as we won't read them,
// but filling with 0 is clean.
while (outputIndex < 16)
{
pRow[outputIndex] = 0;
outputIndex++;
}
}
return table;
}
public static ulong* ComputeShuffleTable512_64Bit()
{
const nuint entryCount = 256;
const int elementCount = 8;
// Align to 64 bytes for AVX-512 performance
var table = (ulong*)NativeMemory.AlignedAlloc(entryCount * elementCount * sizeof(ulong), 64);
for (var mask = 0u; mask < entryCount; mask++)
{
// We are filling 8 integers for this mask
var pRow = table + (mask * elementCount);
var outputIndex = 0;
// 1. Pack the valid indices to the front
for (var bit = 0; bit < 8; bit++)
{
// Check if the i-th bit is set
if ((mask & (1 << bit)) != 0)
{
pRow[outputIndex] = (ulong)bit; // Write the Source Index
outputIndex++;
}
}
// 2. Fill the remaining slots (Pad with 0 or similar)
while (outputIndex < 8)
{
pRow[outputIndex] = 0;
outputIndex++;
}
}
return table;
}
public static uint* ComputeShuffleTable256_32Bit()
{
const nuint entryCount = 256;
const nuint elementCount = 8;
// Align to 32 bytes for AVX performance
var table = (uint*)NativeMemory.AlignedAlloc(entryCount * elementCount * sizeof(uint), 32);
for (var mask = 0u; mask < entryCount; mask++)
{
// We are filling 8 integers for this mask
var pRow = table + (mask * elementCount);
var outputIndex = 0;
for (var bit = 0; bit < 8; bit++)
{
if ((mask & (1 << bit)) != 0)
{
pRow[outputIndex] = (uint)bit;
outputIndex++;
}
}
while (outputIndex < 8)
{
pRow[outputIndex] = 0;
outputIndex++;
}
}
return table;
}
public static ulong* ComputeShuffleTable256_64Bit()
{
const nuint entryCount = 16;
const nuint elementCount = 4;
var table = (ulong*)NativeMemory.AlignedAlloc(entryCount * elementCount * sizeof(ulong), 32);
for (var mask = 0u; mask < entryCount; mask++)
{
var pRow = table + (mask * elementCount);
var outputIndex = 0;
// We only check 4 bits because there are only 4 ulongs in a Vector256
for (var bit = 0; bit < 4; bit++)
{
if ((mask & (1 << bit)) != 0)
{
pRow[outputIndex] = (ulong)bit;
outputIndex++;
}
}
// Fill remaining slots with 0 (or a specific 'clear' index)
while (outputIndex < 4)
{
pRow[outputIndex] = 0;
outputIndex++;
}
}
return table;
}
public static uint* ComputeShuffleTable128_32Bit()
{
const nuint entryCount = 16;
const nuint elementCount = 4;
var table = (uint*)NativeMemory.AlignedAlloc(entryCount * elementCount * sizeof(uint), 16);
for (var mask = 0u; mask < entryCount; mask++)
{
var pRow = table + (mask * elementCount);
var outputIndex = 0;
for (var bit = 0; bit < 4; bit++)
{
if ((mask & (1 << bit)) != 0)
{
pRow[outputIndex] = (uint)bit;
outputIndex++;
}
}
while (outputIndex < 4)
{
pRow[outputIndex] = 0;
outputIndex++;
}
}
return table;
}
public static ulong* ComputeShuffleTable128_64Bit()
{
const nuint entryCount = 8;
const nuint elementCount = 2;
var table = (ulong*)NativeMemory.AlignedAlloc(entryCount * elementCount * sizeof(ulong), 16);
for (var mask = 0u; mask < entryCount; mask++)
{
var pRow = table + (mask * elementCount);
var outputIndex = 0;
for (var bit = 0; bit < 2; bit++)
{
if ((mask & (1 << bit)) != 0)
{
pRow[outputIndex] = (byte)bit;
outputIndex++;
}
}
while (outputIndex < 2)
{
pRow[outputIndex] = 0;
outputIndex++;
}
}
return table;
}
}

View File

@@ -0,0 +1,794 @@
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace Misaki.HighPerformance.Mathematics.SPMD;
[StructLayout(LayoutKind.Sequential)]
public readonly unsafe struct WideLane<T> : ISPMD<WideLane<T>, T>
where T : unmanaged, INumber<T>, IMinMaxValue<T>, IBitwiseOperators<T, T, T>
{
private static readonly Vector<T> s_indices;
private static readonly uint* s_shuffleTable512_32bit;
private static readonly ulong* s_shuffleTable512_64bit;
private static readonly uint* s_shuffleTable256_32bit;
private static readonly ulong* s_shuffleTable256_64bit;
private static readonly uint* s_shuffleTable128_32bit;
private static readonly ulong* s_shuffleTable128_64bit;
public readonly Vector<T> value;
public static int LaneWidth
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get => Vector<T>.Count;
}
public static WideLane<T> Zero
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get => new(Vector<T>.Zero);
}
public static WideLane<T> One
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get => new(Vector<T>.One);
}
public static WideLane<T> MinValue
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get => Create(T.MinValue);
}
public static WideLane<T> MaxValue
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get => Create(T.MaxValue);
}
public readonly T this[int index]
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get => value[index];
}
static WideLane()
{
var pValues = stackalloc T[LaneWidth];
for (var i = 0; i < LaneWidth; i++)
{
pValues[i] = T.CreateChecked(i);
}
s_indices = Vector.Load(pValues);
s_shuffleTable512_32bit = ShuffleTableGenerator.ComputeShuffleTable512_32Bit();
s_shuffleTable512_64bit = ShuffleTableGenerator.ComputeShuffleTable512_64Bit();
s_shuffleTable256_32bit = ShuffleTableGenerator.ComputeShuffleTable256_32Bit();
s_shuffleTable256_64bit = ShuffleTableGenerator.ComputeShuffleTable256_64Bit();
s_shuffleTable128_32bit = ShuffleTableGenerator.ComputeShuffleTable128_32Bit();
s_shuffleTable128_64bit = ShuffleTableGenerator.ComputeShuffleTable128_64Bit();
}
public WideLane(Vector<T> value)
{
this.value = value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector<T> VectorFloor(Vector<T> vector)
{
if (typeof(T) == typeof(float))
{
ref var v = ref Unsafe.As<Vector<T>, Vector<float>>(ref vector);
var floored = Vector.Floor(v);
return Unsafe.As<Vector<float>, Vector<T>>(ref floored);
}
else if (typeof(T) == typeof(double))
{
ref var v = ref Unsafe.As<Vector<T>, Vector<double>>(ref vector);
var floored = Vector.Floor(v);
return Unsafe.As<Vector<double>, Vector<T>>(ref floored);
}
return vector;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector<T> VectorTruncate(Vector<T> vector)
{
if (typeof(T) == typeof(float))
{
ref var v = ref Unsafe.As<Vector<T>, Vector<float>>(ref vector);
var truncated = Vector.Truncate(v);
return Unsafe.As<Vector<float>, Vector<T>>(ref truncated);
}
else if (typeof(T) == typeof(double))
{
ref var v = ref Unsafe.As<Vector<T>, Vector<double>>(ref vector);
var truncated = Vector.Truncate(v);
return Unsafe.As<Vector<double>, Vector<T>>(ref truncated);
}
return vector;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Create(T value) => new(Vector.Create(value));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Create(params ReadOnlySpan<T> values) => new(Vector.Create(values));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Create(Vector<T> value) => new(value);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Sequence(T start, T step) => new(Vector.Create(start) + (Vector.Create(step) * s_indices));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Load(ref T value) => new(Vector.LoadUnsafe(ref value));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Load(T* pValue) => new(Vector.Load(pValue));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> CastFrom<U>(WideLane<U> value)
where U : unmanaged, INumber<U>, IMinMaxValue<U>, IBitwiseOperators<U, U, U>
{
return new(Unsafe.As<WideLane<U>, Vector<T>>(ref value));
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public readonly void Store(ref T destination) => value.StoreUnsafe(ref destination);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public readonly void Store(T* pDestination) => value.Store(pDestination);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public int CompressStore(WideLane<T> mask, ref T destination)
{
return CompressStore(mask, (T*)Unsafe.AsPointer(in destination));
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public int CompressStore(WideLane<T> mask, T* pDestination)
{
var size = sizeof(T);
if (LaneWidth == Vector512<T>.Count && Vector512.IsHardwareAccelerated)
{
if (size == 4)
{
ref var vec = ref Unsafe.As<WideLane<T>, Vector512<uint>>(ref Unsafe.AsRef(in this));
var m = Unsafe.As<WideLane<T>, Vector512<uint>>(ref mask);
var moveMask = m.ExtractMostSignificantBits();
// Offset is (moveMask * 16) because each control vector has 16 elements
var shuffle = Vector512.Load(s_shuffleTable512_32bit + (moveMask * 16));
var compressed = Vector512.Shuffle(vec, shuffle);
compressed.Store((uint*)pDestination);
return BitOperations.PopCount(moveMask);
}
if (size == 8)
{
ref var vec = ref Unsafe.As<WideLane<T>, Vector512<ulong>>(ref Unsafe.AsRef(in this));
var m = Unsafe.As<WideLane<T>, Vector512<ulong>>(ref mask);
var moveMask = m.ExtractMostSignificantBits();
// Offset is (moveMask * 8) because each control vector has 8 elements
var shuffle = Vector512.Load(s_shuffleTable512_64bit + (moveMask * 8));
var compressed = Vector512.Shuffle(vec, shuffle);
compressed.Store((ulong*)pDestination);
return BitOperations.PopCount(moveMask);
}
}
else if (LaneWidth == Vector256<T>.Count && Vector256.IsHardwareAccelerated)
{
if (size == 4)
{
ref var vec = ref Unsafe.As<WideLane<T>, Vector256<uint>>(ref Unsafe.AsRef(in this));
var m = Unsafe.As<WideLane<T>, Vector256<uint>>(ref mask);
var moveMask = m.ExtractMostSignificantBits();
// Offset is (moveMask * 8) because each control vector has 8 elements
var shuffle = Vector256.Load(s_shuffleTable256_32bit + (moveMask * 8));
var compressed = Vector256.Shuffle(vec, shuffle);
compressed.Store((uint*)pDestination);
return BitOperations.PopCount(moveMask);
}
if (size == 8)
{
ref var vec = ref Unsafe.As<WideLane<T>, Vector256<ulong>>(ref Unsafe.AsRef(in this));
var m = Unsafe.As<WideLane<T>, Vector256<ulong>>(ref mask);
// For 64-bit, ExtractMostSignificantBits only populates 4 bits (0-15)
var moveMask = m.ExtractMostSignificantBits();
// Offset is (moveMask * 4) because each control vector has 4 elements
var shuffle = Vector256.Load(s_shuffleTable256_64bit + (moveMask * 4));
var compressed = Vector256.Shuffle(vec, shuffle);
compressed.Store((ulong*)pDestination);
return BitOperations.PopCount(moveMask);
}
}
else if (LaneWidth == Vector128<T>.Count && Vector128.IsHardwareAccelerated)
{
if (size == 4)
{
ref var vec = ref Unsafe.As<WideLane<T>, Vector128<uint>>(ref Unsafe.AsRef(in this));
var m = Unsafe.As<WideLane<T>, Vector128<uint>>(ref mask);
var moveMask = m.ExtractMostSignificantBits();
// Offset is (moveMask * 4) because each control vector has 4 elements
var shuffle = Vector128.Load(s_shuffleTable128_32bit + (moveMask * 4));
var compressed = Vector128.Shuffle(vec, shuffle);
compressed.Store((uint*)pDestination);
return BitOperations.PopCount(moveMask);
}
if (size == 8)
{
ref var vec = ref Unsafe.As<WideLane<T>, Vector128<ulong>>(ref Unsafe.AsRef(in this));
var m = Unsafe.As<WideLane<T>, Vector128<ulong>>(ref mask);
var moveMask = m.ExtractMostSignificantBits();
// Offset is (moveMask * 2) because each control vector has 2 elements
var shuffle = Vector128.Load(s_shuffleTable128_64bit + (moveMask * 2));
var compressed = Vector128.Shuffle(vec, shuffle);
compressed.Store((ulong*)pDestination);
return BitOperations.PopCount(moveMask);
}
}
// This is slow, but correct on ANY hardware.
// Check sign bit of the mask lane
var count = 0;
for (var i = 0; i < LaneWidth; i++)
{
if (mask.value[i] == ~T.Zero)
{
pDestination[count++] = value[i];
}
}
return count;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public readonly Vector<T> AsVector() => value;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> operator +(WideLane<T> a, WideLane<T> b) => new(a.value + b.value);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> operator +(WideLane<T> a, T b) => new(a.value + Vector.Create(b));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> operator -(WideLane<T> a, WideLane<T> b) => new(a.value - b.value);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> operator -(WideLane<T> a, T b) => new(a.value - Vector.Create(b));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> operator *(WideLane<T> a, WideLane<T> b) => new(a.value * b.value);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> operator *(WideLane<T> a, T b) => new(a.value * Vector.Create(b));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> operator /(WideLane<T> a, WideLane<T> b) => new(a.value / b.value);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> operator /(WideLane<T> a, T b) => new(a.value / Vector.Create(b));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> operator %(WideLane<T> a, WideLane<T> b) => new(a.value - VectorFloor(a.value / b.value) * b.value);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> operator %(WideLane<T> a, T b)
{
var vb = Vector.Create(b);
return new(a.value - VectorFloor(a.value / vb) * vb);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> operator -(WideLane<T> a) => new(-a.value);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> operator &(WideLane<T> a, WideLane<T> b) => new(a.value & b.value);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> operator &(WideLane<T> a, T b) => new(a.value & Vector.Create(b));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> operator |(WideLane<T> a, WideLane<T> b) => new(a.value | b.value);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> operator |(WideLane<T> a, T b) => new(a.value | Vector.Create(b));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> operator ^(WideLane<T> a, WideLane<T> b) => new(a.value ^ b.value);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> operator ^(WideLane<T> a, T b) => new(a.value ^ Vector.Create(b));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> operator ~(WideLane<T> a) => new(~a.value);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Abs(WideLane<T> value) => new(Vector.Abs(value.value));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Floor(WideLane<T> value)
{
if (typeof(T) == typeof(float))
{
ref var v = ref Unsafe.As<WideLane<T>, Vector<float>>(ref value);
var floored = Vector.Floor(v);
return new WideLane<T>(Unsafe.As<Vector<float>, Vector<T>>(ref floored));
}
else if (typeof(T) == typeof(double))
{
ref var v = ref Unsafe.As<WideLane<T>, Vector<double>>(ref value);
var floored = Vector.Floor(v);
return new WideLane<T>(Unsafe.As<Vector<double>, Vector<T>>(ref floored));
}
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Frac(WideLane<T> value) => new(value.value - VectorFloor(value.value));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Sqrt(WideLane<T> value) => new(Vector.SquareRoot(value.value));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Lerp(WideLane<T> a, WideLane<T> b, WideLane<T> t) => new(a.value + (b.value - a.value) * t.value);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> MultipleAdd(WideLane<T> a, WideLane<T> b, WideLane<T> c)
{
if (typeof(T) == typeof(float))
{
ref var va = ref Unsafe.As<WideLane<T>, Vector<float>>(ref a);
ref var vb = ref Unsafe.As<WideLane<T>, Vector<float>>(ref b);
ref var vc = ref Unsafe.As<WideLane<T>, Vector<float>>(ref c);
var result = Vector.FusedMultiplyAdd(va, vb, vc);
return new WideLane<T>(Unsafe.As<Vector<float>, Vector<T>>(ref result));
}
else if (typeof(T) == typeof(double))
{
ref var va = ref Unsafe.As<WideLane<T>, Vector<double>>(ref a);
ref var vb = ref Unsafe.As<WideLane<T>, Vector<double>>(ref b);
ref var vc = ref Unsafe.As<WideLane<T>, Vector<double>>(ref c);
var result = Vector.FusedMultiplyAdd(va, vb, vc);
return new WideLane<T>(Unsafe.As<Vector<double>, Vector<T>>(ref result));
}
else
{
return new((a.value * b.value) + c.value);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Min(WideLane<T> a, WideLane<T> b) => new(Vector.Min(a.value, b.value));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Max(WideLane<T> a, WideLane<T> b) => new(Vector.Max(a.value, b.value));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Clamp(WideLane<T> value, WideLane<T> min, WideLane<T> max) => new(Vector.Clamp(value.value, min.value, max.value));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Saturate(WideLane<T> value) => Clamp(value, Create(T.Zero), Create(T.One));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Sin(WideLane<T> value)
{
if (typeof(T) == typeof(float))
{
ref var v = ref Unsafe.As<WideLane<T>, Vector<float>>(ref value);
var result = Vector.Sin(v);
return new WideLane<T>(Unsafe.As<Vector<float>, Vector<T>>(ref result));
}
else if (typeof(T) == typeof(double))
{
ref var v = ref Unsafe.As<WideLane<T>, Vector<double>>(ref value);
var result = Vector.Sin(v);
return new WideLane<T>(Unsafe.As<Vector<double>, Vector<T>>(ref result));
}
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Cos(WideLane<T> value)
{
if (typeof(T) == typeof(float))
{
ref var v = ref Unsafe.As<WideLane<T>, Vector<float>>(ref value);
var result = Vector.Cos(v);
return new WideLane<T>(Unsafe.As<Vector<float>, Vector<T>>(ref result));
}
else if (typeof(T) == typeof(double))
{
ref var v = ref Unsafe.As<WideLane<T>, Vector<double>>(ref value);
var result = Vector.Cos(v);
return new WideLane<T>(Unsafe.As<Vector<double>, Vector<T>>(ref result));
}
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static (WideLane<T> sin, WideLane<T> cos) SinCos(WideLane<T> value)
{
if (typeof(T) == typeof(float))
{
ref var v = ref Unsafe.As<WideLane<T>, Vector<float>>(ref value);
var (sin, cos) = Vector.SinCos(v);
return (new WideLane<T>(Unsafe.As<Vector<float>, Vector<T>>(ref sin)), new WideLane<T>(Unsafe.As<Vector<float>, Vector<T>>(ref cos)));
}
else if (typeof(T) == typeof(double))
{
ref var v = ref Unsafe.As<WideLane<T>, Vector<double>>(ref value);
var (sin, cos) = Vector.SinCos(v);
return (new WideLane<T>(Unsafe.As<Vector<double>, Vector<T>>(ref sin)), new WideLane<T>(Unsafe.As<Vector<double>, Vector<T>>(ref cos)));
}
return (value, value);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Tan(WideLane<T> value)
{
// 1. Range Reduction
// Transform value into range [-pi/4, pi/4].
// This is complex to do right (Payne-Hanek), but for games
// a simple approximation: value = value - (PI * Round(value / PI)) is good enough.
var pi = Create(T.CreateChecked(Math.PI));
var x = value - pi * Round(value / pi);
// 2. The Approximation (Remez Polynomial)
// tan(value) ~= value + c1*value^3 + c2*value^5
// Factored (Horner's Method) for fewer ops: value * (1 + value^2 * (c1 + c2*value^2))
var x2 = x * x;
var vc1 = Create(T.CreateChecked(0.3333314036)); // 1/3
var vc2 = Create(T.CreateChecked(0.1333923995)); // 2/15
// x2 * (c1 + c2 * x2)
var poly = MultipleAdd(x2, vc2, vc1);
// value * (1 + x2 * poly)
return MultipleAdd(x, MultipleAdd(x2, poly, One), Zero);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Asin(WideLane<T> value)
{
// asin(value) = pi/2 - acos(value)
var piOver2 = Create(T.CreateChecked(Math.PI / 2));
return piOver2 - Acos(value);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Acos(WideLane<T> value)
{
// 0 <= value <= 1 : acos(value) = sqrt(1 - value) * (c0 + c1*value + c2*value^2 + c3*value^3)
// value < 0 : acos(value) = pi - acos(-value)
var x = Abs(value);
var c0 = Create(T.CreateChecked(1.5707288f)); // pi/2
var c1 = Create(T.CreateChecked(-0.2121144f));
var c2 = Create(T.CreateChecked(0.0742610f));
var c3 = Create(T.CreateChecked(-0.0187293f));
var term1 = MultipleAdd(x, c3, c2);
var term2 = MultipleAdd(x, term1, c1);
var poly = MultipleAdd(x, term2, c0);
var sqrtTerm = Sqrt(One - x);
var result = poly * sqrtTerm;
var pi = Create(T.CreateChecked(Math.PI));
var isNegative = LessThan(value, Zero);
return Select(isNegative, pi - result, result);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Atan(WideLane<T> value)
{
// atan(value) = value * (c1 + c2*value^2)
var c1 = Create(T.CreateChecked(0.97239411f));
var c2 = Create(T.CreateChecked(-0.19194795f));
var x2 = value * value;
var poly = MultipleAdd(x2, c2, c1);
return value * poly;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Atan2(WideLane<T> y, WideLane<T> x)
{
var absX = Abs(x);
var absY = Abs(y);
// 1. Determine the ratio (input to Atan)
// If |value| > |y|, we are in the "shallow" region, ratio = y/value
// If |y| > |value|, we are in the "steep" region, ratio = value/y (and we transform result)
var yGtX = GreaterThan(absY, absX);
// Select numerator and denominator to ensure ratio is always in [-1, 1]
var num = Select(yGtX, absX, absY);
var den = Select(yGtX, absY, absX);
var t = num / den; // t is now in [0, 1]
var t2 = t * t;
// 2. Polynomial Approximation (Odd function: value * (c1 + c2*value^2))
var c1 = Create(T.CreateChecked(0.97239411f));
var c2 = Create(T.CreateChecked(-0.19194795f));
// (c1 + c2 * t2)
var poly = MultipleAdd(c2, t2, c1);
// result = t * poly
var result = t * poly;
// 3. Reconstruct the angle
// If we swapped value/y (yGtX), the identity is: atan(value/y) = PI/2 - atan(y/value)
var halfPi = Create(T.CreateChecked(1.570796327f));
result = Select(yGtX, halfPi - result, result);
// 4. Adjust for Quadrants (Signs)
// If value < 0, we are in quadrants 2 or 3, so we need to add PI
var pi = Create(T.CreateChecked(3.141592654f));
var xLtZero = LessThan(x, Zero);
result = Select(xLtZero, pi - result, result);
// If y < 0, the result should be negative (standard atan2 convention)
// NOTE: This sign flip strategy depends on exact polynomial range mapping,
// but typically just copy the sign of Y to the result.
var yLtZero = LessThan(y, Zero);
// If original Y was negative, negate the result
// (This works because our ratio logic effectively computed atan(|y|/|value|) above)
var negativeResult = -result;
return Select(yLtZero, negativeResult, result);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Pow(WideLane<T> x, WideLane<T> y) => Exp(y * Log(x));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Exp(WideLane<T> value)
{
if (typeof(T) == typeof(float))
{
ref var v = ref Unsafe.As<WideLane<T>, Vector<float>>(ref value);
var result = Vector.Exp(v);
return new WideLane<T>(Unsafe.As<Vector<float>, Vector<T>>(ref result));
}
else if (typeof(T) == typeof(double))
{
ref var v = ref Unsafe.As<WideLane<T>, Vector<double>>(ref value);
var result = Vector.Exp(v);
return new WideLane<T>(Unsafe.As<Vector<double>, Vector<T>>(ref result));
}
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Exp2(WideLane<T> value)
{
return Pow(Create(T.CreateChecked(2)), value);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Log(WideLane<T> value)
{
if (typeof(T) == typeof(float))
{
ref var v = ref Unsafe.As<WideLane<T>, Vector<float>>(ref value);
var result = Vector.Log(v);
return new WideLane<T>(Unsafe.As<Vector<float>, Vector<T>>(ref result));
}
else if (typeof(T) == typeof(double))
{
ref var v = ref Unsafe.As<WideLane<T>, Vector<double>>(ref value);
var result = Vector.Log(v);
return new WideLane<T>(Unsafe.As<Vector<double>, Vector<T>>(ref result));
}
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Log2(WideLane<T> value)
{
if (typeof(T) == typeof(float))
{
ref var v = ref Unsafe.As<WideLane<T>, Vector<float>>(ref value);
var result = Vector.Log2(v);
return new WideLane<T>(Unsafe.As<Vector<float>, Vector<T>>(ref result));
}
else if (typeof(T) == typeof(double))
{
ref var v = ref Unsafe.As<WideLane<T>, Vector<double>>(ref value);
var result = Vector.Log2(v);
return new WideLane<T>(Unsafe.As<Vector<double>, Vector<T>>(ref result));
}
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Ceil(WideLane<T> value)
{
if (typeof(T) == typeof(float))
{
ref var v = ref Unsafe.As<WideLane<T>, Vector<float>>(ref value);
var result = Vector.Ceiling(v);
return new WideLane<T>(Unsafe.As<Vector<float>, Vector<T>>(ref result));
}
else if (typeof(T) == typeof(double))
{
ref var v = ref Unsafe.As<WideLane<T>, Vector<double>>(ref value);
var result = Vector.Ceiling(v);
return new WideLane<T>(Unsafe.As<Vector<double>, Vector<T>>(ref result));
}
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Round(WideLane<T> value)
{
if (typeof(T) == typeof(float))
{
ref var v = ref Unsafe.As<WideLane<T>, Vector<float>>(ref value);
var result = Vector.Round(v);
return new WideLane<T>(Unsafe.As<Vector<float>, Vector<T>>(ref result));
}
else if (typeof(T) == typeof(double))
{
ref var v = ref Unsafe.As<WideLane<T>, Vector<double>>(ref value);
var result = Vector.Round(v);
return new WideLane<T>(Unsafe.As<Vector<double>, Vector<T>>(ref result));
}
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Trunc(WideLane<T> value)
{
if (typeof(T) == typeof(float))
{
ref var v = ref Unsafe.As<WideLane<T>, Vector<float>>(ref value);
var result = Vector.Truncate(v);
return new WideLane<T>(Unsafe.As<Vector<float>, Vector<T>>(ref result));
}
else if (typeof(T) == typeof(double))
{
ref var v = ref Unsafe.As<WideLane<T>, Vector<double>>(ref value);
var result = Vector.Truncate(v);
return new WideLane<T>(Unsafe.As<Vector<double>, Vector<T>>(ref result));
}
return value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Sign(WideLane<T> value) => Select(
GreaterThan(value, Zero),
One,
Select(
LessThan(value, Zero),
~Zero,
Zero));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> CopySign(WideLane<T> magnitude, WideLane<T> sign) => new(Vector.CopySign(magnitude.value, sign.value));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Rcp(WideLane<T> value)
{
if (typeof(T) == typeof(float))
{
if (Sse.IsSupported && LaneWidth == Vector128<float>.Count)
{
var vf = Unsafe.As<WideLane<T>, Vector128<float>>(ref value);
var result = Sse.Reciprocal(vf);
return Unsafe.As<Vector128<float>, WideLane<T>>(ref result);
}
else if (Avx.IsSupported && LaneWidth == Vector256<float>.Count)
{
var vf = Unsafe.As<WideLane<T>, Vector256<float>>(ref value);
var result = Avx.Reciprocal(vf);
return Unsafe.As<Vector256<float>, WideLane<T>>(ref result);
}
}
return Create(T.One) / value;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Rsqrt(WideLane<T> value)
{
if (typeof(T) == typeof(float))
{
if (Sse.IsSupported && LaneWidth == Vector128<float>.Count)
{
var vf = Unsafe.As<WideLane<T>, Vector128<float>>(ref value);
var result = Sse.ReciprocalSqrt(vf);
return Unsafe.As<Vector128<float>, WideLane<T>>(ref result);
}
else if (Avx.IsSupported && LaneWidth == Vector256<float>.Count)
{
var vf = Unsafe.As<WideLane<T>, Vector256<float>>(ref value);
var result = Avx.ReciprocalSqrt(vf);
return Unsafe.As<Vector256<float>, WideLane<T>>(ref result);
}
}
return Create(T.One) / Sqrt(value);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Select(WideLane<T> conditionMask, WideLane<T> ifTrue, WideLane<T> ifFalse)
=> new(Vector.ConditionalSelect(
conditionMask.value,
ifTrue.value,
ifFalse.value));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> GreaterThan(WideLane<T> a, WideLane<T> b) => new(Vector.GreaterThan(a.value, b.value));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> GreaterThanOrEqual(WideLane<T> a, WideLane<T> b) => new(Vector.GreaterThanOrEqual(a.value, b.value));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> LessThan(WideLane<T> a, WideLane<T> b) => new(Vector.LessThan(a.value, b.value));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> LessThanOrEqual(WideLane<T> a, WideLane<T> b) => new(Vector.LessThanOrEqual(a.value, b.value));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static WideLane<T> Equal(WideLane<T> a, WideLane<T> b) => new(Vector.Equals(a.value, b.value));
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool Any(WideLane<T> mask) => !Vector.EqualsAll(mask.value, Vector<T>.Zero);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool All(WideLane<T> mask) => Vector.EqualsAll(mask.value, Vector<T>.AllBitsSet);
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool None(WideLane<T> mask) => Vector.EqualsAll(mask.value, Vector<T>.Zero);
public override string ToString()
{
return value.ToString();
}
}

View File

@@ -0,0 +1,9 @@
using System;
using System.Collections.Generic;
using System.Text;
namespace Misaki.HighPerformance.Mathematics;
internal class AutoSIMDAttribute
{
}

View File

@@ -2503,7 +2503,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float tan(float x) public static float tan(float x)
{ {
return (float)System.Math.Tan(x); return MathF.Tan(x);
} }
/// <summary>Returns the componentwise tangent of a float2 vector.</summary> /// <summary>Returns the componentwise tangent of a float2 vector.</summary>
@@ -2540,7 +2540,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double tan(double x) public static double tan(double x)
{ {
return System.Math.Tan(x); return Math.Tan(x);
} }
/// <summary>Returns the componentwise tangent of a double2 vector.</summary> /// <summary>Returns the componentwise tangent of a double2 vector.</summary>
@@ -2577,7 +2577,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float tanh(float x) public static float tanh(float x)
{ {
return (float)System.Math.Tanh(x); return MathF.Tanh(x);
} }
/// <summary>Returns the componentwise hyperbolic tangent of a float2 vector.</summary> /// <summary>Returns the componentwise hyperbolic tangent of a float2 vector.</summary>
@@ -2614,7 +2614,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double tanh(double x) public static double tanh(double x)
{ {
return System.Math.Tanh(x); return Math.Tanh(x);
} }
/// <summary>Returns the componentwise hyperbolic tangent of a double2 vector.</summary> /// <summary>Returns the componentwise hyperbolic tangent of a double2 vector.</summary>
@@ -2651,7 +2651,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float atan(float x) public static float atan(float x)
{ {
return (float)System.Math.Atan(x); return MathF.Atan(x);
} }
/// <summary>Returns the componentwise arctangent of a float2 vector.</summary> /// <summary>Returns the componentwise arctangent of a float2 vector.</summary>
@@ -2688,7 +2688,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double atan(double x) public static double atan(double x)
{ {
return System.Math.Atan(x); return Math.Atan(x);
} }
/// <summary>Returns the componentwise arctangent of a double2 vector.</summary> /// <summary>Returns the componentwise arctangent of a double2 vector.</summary>
@@ -2726,7 +2726,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float atan2(float y, float x) public static float atan2(float y, float x)
{ {
return (float)System.Math.Atan2(y, x); return MathF.Atan2(y, x);
} }
/// <summary>Returns the componentwise 2-argument arctangent of a pair of floats2 vectors.</summary> /// <summary>Returns the componentwise 2-argument arctangent of a pair of floats2 vectors.</summary>
@@ -2767,7 +2767,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double atan2(double y, double x) public static double atan2(double y, double x)
{ {
return System.Math.Atan2(y, x); return Math.Atan2(y, x);
} }
/// <summary>Returns the 2-argument arctangent of a pair of double2 vectors.</summary> /// <summary>Returns the 2-argument arctangent of a pair of double2 vectors.</summary>
@@ -2807,7 +2807,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float cos(float x) public static float cos(float x)
{ {
return (float)System.Math.Cos(x); return MathF.Cos(x);
} }
/// <summary>Returns the componentwise cosine of a float2 vector.</summary> /// <summary>Returns the componentwise cosine of a float2 vector.</summary>
@@ -2844,7 +2844,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double cos(double x) public static double cos(double x)
{ {
return System.Math.Cos(x); return Math.Cos(x);
} }
/// <summary>Returns the componentwise cosine of a double2 vector.</summary> /// <summary>Returns the componentwise cosine of a double2 vector.</summary>
@@ -2881,7 +2881,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float cosh(float x) public static float cosh(float x)
{ {
return (float)System.Math.Cosh(x); return MathF.Cosh(x);
} }
/// <summary>Returns the componentwise hyperbolic cosine of a float2 vector.</summary> /// <summary>Returns the componentwise hyperbolic cosine of a float2 vector.</summary>
@@ -2918,7 +2918,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double cosh(double x) public static double cosh(double x)
{ {
return System.Math.Cosh(x); return Math.Cosh(x);
} }
/// <summary>Returns the componentwise hyperbolic cosine of a double2 vector.</summary> /// <summary>Returns the componentwise hyperbolic cosine of a double2 vector.</summary>
@@ -2955,7 +2955,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float acos(float x) public static float acos(float x)
{ {
return (float)System.Math.Acos((float)x); return MathF.Acos(x);
} }
/// <summary>Returns the componentwise arccosine of a float2 vector.</summary> /// <summary>Returns the componentwise arccosine of a float2 vector.</summary>
@@ -2992,7 +2992,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double acos(double x) public static double acos(double x)
{ {
return System.Math.Acos(x); return Math.Acos(x);
} }
/// <summary>Returns the componentwise arccosine of a double2 vector.</summary> /// <summary>Returns the componentwise arccosine of a double2 vector.</summary>
@@ -3029,7 +3029,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float sin(float x) public static float sin(float x)
{ {
return (float)System.Math.Sin((float)x); return MathF.Sin(x);
} }
/// <summary>Returns the componentwise sine of a float2 vector.</summary> /// <summary>Returns the componentwise sine of a float2 vector.</summary>
@@ -3066,7 +3066,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double sin(double x) public static double sin(double x)
{ {
return System.Math.Sin(x); return Math.Sin(x);
} }
/// <summary>Returns the componentwise sine of a double2 vector.</summary> /// <summary>Returns the componentwise sine of a double2 vector.</summary>
@@ -3103,7 +3103,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float sinh(float x) public static float sinh(float x)
{ {
return (float)System.Math.Sinh((float)x); return MathF.Sinh(x);
} }
/// <summary>Returns the componentwise hyperbolic sine of a float2 vector.</summary> /// <summary>Returns the componentwise hyperbolic sine of a float2 vector.</summary>
@@ -3140,7 +3140,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double sinh(double x) public static double sinh(double x)
{ {
return System.Math.Sinh(x); return Math.Sinh(x);
} }
/// <summary>Returns the componentwise hyperbolic sine of a double2 vector.</summary> /// <summary>Returns the componentwise hyperbolic sine of a double2 vector.</summary>
@@ -3177,7 +3177,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float asin(float x) public static float asin(float x)
{ {
return (float)System.Math.Asin((float)x); return MathF.Asin(x);
} }
/// <summary>Returns the componentwise arcsine of a float2 vector.</summary> /// <summary>Returns the componentwise arcsine of a float2 vector.</summary>
@@ -3214,7 +3214,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double asin(double x) public static double asin(double x)
{ {
return System.Math.Asin(x); return Math.Asin(x);
} }
/// <summary>Returns the componentwise arcsine of a double2 vector.</summary> /// <summary>Returns the componentwise arcsine of a double2 vector.</summary>
@@ -3251,7 +3251,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float floor(float x) public static float floor(float x)
{ {
return (float)System.Math.Floor((float)x); return MathF.Floor(x);
} }
/// <summary>Returns the result of rounding each component of a float2 vector value down to the nearest value less or equal to the original value.</summary> /// <summary>Returns the result of rounding each component of a float2 vector value down to the nearest value less or equal to the original value.</summary>
@@ -3288,7 +3288,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double floor(double x) public static double floor(double x)
{ {
return System.Math.Floor(x); return Math.Floor(x);
} }
/// <summary>Returns the result of rounding each component of a double2 vector value down to the nearest value less or equal to the original value.</summary> /// <summary>Returns the result of rounding each component of a double2 vector value down to the nearest value less or equal to the original value.</summary>
@@ -3325,7 +3325,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float ceil(float x) public static float ceil(float x)
{ {
return (float)System.Math.Ceiling((float)x); return MathF.Ceiling(x);
} }
/// <summary>Returns the result of rounding each component of a float2 vector value up to the nearest value greater or equal to the original value.</summary> /// <summary>Returns the result of rounding each component of a float2 vector value up to the nearest value greater or equal to the original value.</summary>
@@ -3362,7 +3362,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double ceil(double x) public static double ceil(double x)
{ {
return System.Math.Ceiling(x); return Math.Ceiling(x);
} }
/// <summary>Returns the result of rounding each component of a double2 vector value up to the nearest integral value greater or equal to the original value.</summary> /// <summary>Returns the result of rounding each component of a double2 vector value up to the nearest integral value greater or equal to the original value.</summary>
@@ -3399,7 +3399,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float round(float x) public static float round(float x)
{ {
return (float)System.Math.Round((float)x); return MathF.Round(x);
} }
/// <summary>Returns the result of rounding each component of a float2 vector value to the nearest integral value.</summary> /// <summary>Returns the result of rounding each component of a float2 vector value to the nearest integral value.</summary>
@@ -3436,7 +3436,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double round(double x) public static double round(double x)
{ {
return System.Math.Round(x); return Math.Round(x);
} }
/// <summary>Returns the result of rounding each component of a double2 vector value to the nearest integral value.</summary> /// <summary>Returns the result of rounding each component of a double2 vector value to the nearest integral value.</summary>
@@ -3473,7 +3473,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float trunc(float x) public static float trunc(float x)
{ {
return (float)System.Math.Truncate((float)x); return MathF.Truncate(x);
} }
/// <summary>Returns the result of a componentwise truncation of a float2 value to an integral float2 value.</summary> /// <summary>Returns the result of a componentwise truncation of a float2 value to an integral float2 value.</summary>
@@ -3500,7 +3500,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float4 trunc(float4 x) public static float4 trunc(float4 x)
{ {
return Vector128.Truncate(x.AsVector128()).Asfloat4(); return Vector128.Truncate(x.__v).Asfloat4();
} }
@@ -3510,7 +3510,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double trunc(double x) public static double trunc(double x)
{ {
return System.Math.Truncate(x); return Math.Truncate(x);
} }
/// <summary>Returns the result of a componentwise truncation of a double2 value to an integral double2 value.</summary> /// <summary>Returns the result of a componentwise truncation of a double2 value to an integral double2 value.</summary>
@@ -3537,7 +3537,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double4 trunc(double4 x) public static double4 trunc(double4 x)
{ {
return Vector256.Truncate(x.AsVector256()).Asdouble4(); return Vector256.Truncate(x.__v).Asdouble4();
} }
@@ -3805,7 +3805,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float pow(float x, float y) public static float pow(float x, float y)
{ {
return (float)System.Math.Pow((float)x, (float)y); return MathF.Pow(x, (float)y);
} }
/// <summary>Returns the componentwise result of raising x to the power y.</summary> /// <summary>Returns the componentwise result of raising x to the power y.</summary>
@@ -3846,7 +3846,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double pow(double x, double y) public static double pow(double x, double y)
{ {
return System.Math.Pow(x, y); return Math.Pow(x, y);
} }
/// <summary>Returns the componentwise result of raising x to the power y.</summary> /// <summary>Returns the componentwise result of raising x to the power y.</summary>
@@ -3886,7 +3886,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float exp(float x) public static float exp(float x)
{ {
return (float)System.Math.Exp((float)x); return MathF.Exp(x);
} }
/// <summary>Returns the componentwise base-e exponential of x.</summary> /// <summary>Returns the componentwise base-e exponential of x.</summary>
@@ -3923,7 +3923,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double exp(double x) public static double exp(double x)
{ {
return System.Math.Exp(x); return Math.Exp(x);
} }
/// <summary>Returns the componentwise base-e exponential of x.</summary> /// <summary>Returns the componentwise base-e exponential of x.</summary>
@@ -3960,7 +3960,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float exp2(float x) public static float exp2(float x)
{ {
return (float)System.Math.Exp((float)x * 0.69314718f); return MathF.Exp(x * 0.69314718f);
} }
/// <summary>Returns the componentwise base-2 exponential of x.</summary> /// <summary>Returns the componentwise base-2 exponential of x.</summary>
@@ -3997,7 +3997,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double exp2(double x) public static double exp2(double x)
{ {
return System.Math.Exp(x * 0.693147180559945309); return Math.Exp(x * 0.693147180559945309);
} }
/// <summary>Returns the componentwise base-2 exponential of x.</summary> /// <summary>Returns the componentwise base-2 exponential of x.</summary>
@@ -4034,7 +4034,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float exp10(float x) public static float exp10(float x)
{ {
return (float)System.Math.Exp((float)x * 2.30258509f); return MathF.Exp(x * 2.30258509f);
} }
/// <summary>Returns the componentwise base-10 exponential of x.</summary> /// <summary>Returns the componentwise base-10 exponential of x.</summary>
@@ -4071,7 +4071,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double exp10(double x) public static double exp10(double x)
{ {
return System.Math.Exp(x * 2.302585092994045684); return Math.Exp(x * 2.302585092994045684);
} }
/// <summary>Returns the componentwise base-10 exponential of x.</summary> /// <summary>Returns the componentwise base-10 exponential of x.</summary>
@@ -4108,7 +4108,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float log(float x) public static float log(float x)
{ {
return (float)System.Math.Log((float)x); return MathF.Log(x);
} }
/// <summary>Returns the componentwise natural logarithm of a float2 vector.</summary> /// <summary>Returns the componentwise natural logarithm of a float2 vector.</summary>
@@ -4145,7 +4145,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double log(double x) public static double log(double x)
{ {
return System.Math.Log(x); return Math.Log(x);
} }
/// <summary>Returns the componentwise natural logarithm of a double2 vector.</summary> /// <summary>Returns the componentwise natural logarithm of a double2 vector.</summary>
@@ -4182,7 +4182,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float log2(float x) public static float log2(float x)
{ {
return (float)System.Math.Log((float)x, 2.0f); return MathF.Log(x, 2.0f);
} }
/// <summary>Returns the componentwise base-2 logarithm of a float2 vector.</summary> /// <summary>Returns the componentwise base-2 logarithm of a float2 vector.</summary>
@@ -4219,7 +4219,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double log2(double x) public static double log2(double x)
{ {
return System.Math.Log(x, 2.0); return Math.Log(x, 2.0);
} }
/// <summary>Returns the componentwise base-2 logarithm of a double2 vector.</summary> /// <summary>Returns the componentwise base-2 logarithm of a double2 vector.</summary>
@@ -4255,7 +4255,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float log10(float x) public static float log10(float x)
{ {
return (float)System.Math.Log10((float)x); return MathF.Log10(x);
} }
/// <summary>Returns the componentwise base-10 logarithm of a float2 vector.</summary> /// <summary>Returns the componentwise base-10 logarithm of a float2 vector.</summary>
@@ -4292,7 +4292,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double log10(double x) public static double log10(double x)
{ {
return System.Math.Log10(x); return Math.Log10(x);
} }
/// <summary>Returns the componentwise base-10 logarithm of a double2 vector.</summary> /// <summary>Returns the componentwise base-10 logarithm of a double2 vector.</summary>
@@ -4519,7 +4519,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float sqrt(float x) public static float sqrt(float x)
{ {
return (float)System.Math.Sqrt((float)x); return MathF.Sqrt(x);
} }
/// <summary>Returns the componentwise square root of a float2 vector.</summary> /// <summary>Returns the componentwise square root of a float2 vector.</summary>
@@ -4556,7 +4556,7 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double sqrt(double x) public static double sqrt(double x)
{ {
return System.Math.Sqrt(x); return Math.Sqrt(x);
} }
/// <summary>Returns the componentwise square root of a double2 vector.</summary> /// <summary>Returns the componentwise square root of a double2 vector.</summary>
@@ -4727,8 +4727,8 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float2 normalizesafe(float2 x, float2 defaultvalue = new float2()) public static float2 normalizesafe(float2 x, float2 defaultvalue = new float2())
{ {
var len = math.dot(x, x); var len = dot(x, x);
return math.select(defaultvalue, x * math.rsqrt(len), len > FLT_MIN_NORMAL); return select(defaultvalue, x * rsqrt(len), len > FLT_MIN_NORMAL);
} }
/// <summary> /// <summary>
@@ -4741,8 +4741,8 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float3 normalizesafe(float3 x, float3 defaultvalue = new float3()) public static float3 normalizesafe(float3 x, float3 defaultvalue = new float3())
{ {
var len = math.dot(x, x); var len = dot(x, x);
return math.select(defaultvalue, x * math.rsqrt(len), len > FLT_MIN_NORMAL); return select(defaultvalue, x * rsqrt(len), len > FLT_MIN_NORMAL);
} }
/// <summary> /// <summary>
@@ -4755,8 +4755,8 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float4 normalizesafe(float4 x, float4 defaultvalue = new float4()) public static float4 normalizesafe(float4 x, float4 defaultvalue = new float4())
{ {
var len = math.dot(x, x); var len = dot(x, x);
return math.select(defaultvalue, x * math.rsqrt(len), len > FLT_MIN_NORMAL); return select(defaultvalue, x * rsqrt(len), len > FLT_MIN_NORMAL);
} }
@@ -4770,8 +4770,8 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double2 normalizesafe(double2 x, double2 defaultvalue = new double2()) public static double2 normalizesafe(double2 x, double2 defaultvalue = new double2())
{ {
var len = math.dot(x, x); var len = dot(x, x);
return math.select(defaultvalue, x * math.rsqrt(len), len > FLT_MIN_NORMAL); return select(defaultvalue, x * rsqrt(len), len > FLT_MIN_NORMAL);
} }
/// <summary> /// <summary>
@@ -4784,8 +4784,8 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double3 normalizesafe(double3 x, double3 defaultvalue = new double3()) public static double3 normalizesafe(double3 x, double3 defaultvalue = new double3())
{ {
var len = math.dot(x, x); var len = dot(x, x);
return math.select(defaultvalue, x * math.rsqrt(len), len > FLT_MIN_NORMAL); return select(defaultvalue, x * rsqrt(len), len > FLT_MIN_NORMAL);
} }
/// <summary> /// <summary>
@@ -4798,8 +4798,8 @@ public static partial class math
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double4 normalizesafe(double4 x, double4 defaultvalue = new double4()) public static double4 normalizesafe(double4 x, double4 defaultvalue = new double4())
{ {
var len = math.dot(x, x); var len = dot(x, x);
return math.select(defaultvalue, x * math.rsqrt(len), len > FLT_MIN_NORMAL); return select(defaultvalue, x * rsqrt(len), len > FLT_MIN_NORMAL);
} }
@@ -8665,7 +8665,7 @@ public static partial class math
var d1 = qv * qv.wwww * float4(2f); //xw, yw, zw, ww var d1 = qv * qv.wwww * float4(2f); //xw, yw, zw, ww
var d2 = qv * qv.yzxw * float4(2f); //xy, yz, zx, ww var d2 = qv * qv.yzxw * float4(2f); //xy, yz, zx, ww
var d3 = qv * qv; var d3 = qv * qv;
var euler = Misaki.HighPerformance.Mathematics.float3.zero; var euler = Mathematics.float3.zero;
var y1 = d2.z - d1.y; var y1 = d2.z - d1.y;
if (y1 * y1 < cutoff) if (y1 * y1 < cutoff)
@@ -8705,7 +8705,7 @@ public static partial class math
var d1 = qv * qv.wwww * float4(2f); //xw, yw, zw, ww var d1 = qv * qv.wwww * float4(2f); //xw, yw, zw, ww
var d2 = qv * qv.yzxw * float4(2f); //xy, yz, zx, ww var d2 = qv * qv.yzxw * float4(2f); //xy, yz, zx, ww
var d3 = qv * qv; var d3 = qv * qv;
var euler = Misaki.HighPerformance.Mathematics.float3.zero; var euler = Mathematics.float3.zero;
var y1 = d2.x + d1.z; var y1 = d2.x + d1.z;
if (y1 * y1 < cutoff) if (y1 * y1 < cutoff)
@@ -8745,7 +8745,7 @@ public static partial class math
var d1 = qv * qv.wwww * float4(2f); //xw, yw, zw, ww var d1 = qv * qv.wwww * float4(2f); //xw, yw, zw, ww
var d2 = qv * qv.yzxw * float4(2f); //xy, yz, zx, ww var d2 = qv * qv.yzxw * float4(2f); //xy, yz, zx, ww
var d3 = qv * qv; var d3 = qv * qv;
var euler = Misaki.HighPerformance.Mathematics.float3.zero; var euler = Mathematics.float3.zero;
var y1 = d2.y + d1.x; var y1 = d2.y + d1.x;
if (y1 * y1 < cutoff) if (y1 * y1 < cutoff)
@@ -8785,7 +8785,7 @@ public static partial class math
var d1 = qv * qv.wwww * float4(2f); //xw, yw, zw, ww var d1 = qv * qv.wwww * float4(2f); //xw, yw, zw, ww
var d2 = qv * qv.yzxw * float4(2f); //xy, yz, zx, ww var d2 = qv * qv.yzxw * float4(2f); //xy, yz, zx, ww
var d3 = qv * qv; var d3 = qv * qv;
var euler = Misaki.HighPerformance.Mathematics.float3.zero; var euler = Mathematics.float3.zero;
var y1 = d2.x - d1.z; var y1 = d2.x - d1.z;
if (y1 * y1 < cutoff) if (y1 * y1 < cutoff)
@@ -8825,7 +8825,7 @@ public static partial class math
var d1 = qv * qv.wwww * float4(2f); //xw, yw, zw, ww var d1 = qv * qv.wwww * float4(2f); //xw, yw, zw, ww
var d2 = qv * qv.yzxw * float4(2f); //xy, yz, zx, ww var d2 = qv * qv.yzxw * float4(2f); //xy, yz, zx, ww
var d3 = qv * qv; var d3 = qv * qv;
var euler = Misaki.HighPerformance.Mathematics.float3.zero; var euler = Mathematics.float3.zero;
var y1 = d2.y - d1.x; var y1 = d2.y - d1.x;
if (y1 * y1 < cutoff) if (y1 * y1 < cutoff)
@@ -8864,7 +8864,7 @@ public static partial class math
var d1 = qv * qv.wwww * float4(2f); //xw, yw, zw, ww var d1 = qv * qv.wwww * float4(2f); //xw, yw, zw, ww
var d2 = qv * qv.yzxw * float4(2f); //xy, yz, zx, ww var d2 = qv * qv.yzxw * float4(2f); //xy, yz, zx, ww
var d3 = qv * qv; var d3 = qv * qv;
var euler = Misaki.HighPerformance.Mathematics.float3.zero; var euler = Mathematics.float3.zero;
var y1 = d2.z + d1.y; var y1 = d2.z + d1.y;
if (y1 * y1 < cutoff) if (y1 * y1 < cutoff)
@@ -8898,24 +8898,24 @@ public static partial class math
/// <param name="order">The order in which the rotations are applied.</param> /// <param name="order">The order in which the rotations are applied.</param>
/// <returns>The Euler angle representation of the quaternion in the specified order.</returns> /// <returns>The Euler angle representation of the quaternion in the specified order.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float3 Euler(quaternion q, math.RotationOrder order = math.RotationOrder.Default) public static float3 Euler(quaternion q, math.RotationOrder order = RotationOrder.Default)
{ {
switch (order) switch (order)
{ {
case math.RotationOrder.XYZ: case RotationOrder.XYZ:
return EulerXYZ(q); return EulerXYZ(q);
case math.RotationOrder.XZY: case RotationOrder.XZY:
return EulerXZY(q); return EulerXZY(q);
case math.RotationOrder.YXZ: case RotationOrder.YXZ:
return EulerYXZ(q); return EulerYXZ(q);
case math.RotationOrder.YZX: case RotationOrder.YZX:
return EulerYZX(q); return EulerYZX(q);
case math.RotationOrder.ZXY: case RotationOrder.ZXY:
return EulerZXY(q); return EulerZXY(q);
case math.RotationOrder.ZYX: case RotationOrder.ZYX:
return EulerZYX(q); return EulerZYX(q);
default: default:
return Misaki.HighPerformance.Mathematics.float3.zero; return Mathematics.float3.zero;
} }
} }

View File

@@ -1,7 +1,8 @@
#define NOISE_BENCHMARK #define ADD_BENCHMARK
using BenchmarkDotNet.Attributes; using BenchmarkDotNet.Attributes;
using Misaki.HighPerformance.Mathematics; using Misaki.HighPerformance.Mathematics;
using Misaki.HighPerformance.Mathematics.SPMD;
using System.Numerics; using System.Numerics;
using System.Runtime.Intrinsics; using System.Runtime.Intrinsics;
@@ -9,7 +10,7 @@ namespace Misaki.HighPerformance.Test.Benchmark;
public class MathematicsBenchmark public class MathematicsBenchmark
{ {
#if VECTOR_BENCHMARK #if ADD_BENCHMARK
private Vector4 _va = new Vector4(1, 2, 1, 2); private Vector4 _va = new Vector4(1, 2, 1, 2);
private Vector4 _vb = new Vector4(3, 4, 3, 4); private Vector4 _vb = new Vector4(3, 4, 3, 4);
@@ -39,61 +40,52 @@ public class MathematicsBenchmark
} }
#endif #endif
#if NOISE_BENCHMARK #if FMA_BENCHMARK
private Vector4 _va = new Vector4(1, 2, 1, 2);
private Vector4 _vb = new Vector4(3, 4, 3, 4);
private Vector4 _vc = new Vector4(5, 6, 5, 6);
private const int _SIZE = 32; private Vector128<float> _va128 = Vector128.Create(1f, 2f, 1f, 2f);
private Vector128<float> _vb128 = Vector128.Create(3f, 4f, 3f, 4f);
private Vector128<float> _vc128 = Vector128.Create(5f, 6f, 5f, 6f);
private float4 _fa = new float4(1, 2, 1, 2);
private float4 _fb = new float4(3, 4, 3, 4);
private float4 _fc = new float4(5, 6, 5, 6);
[Benchmark] [Benchmark]
public unsafe void VectorNoise() public Vector4 Vector4()
{ {
var buf = stackalloc float[_SIZE * _SIZE]; for (var i = 0; i < 10; i++)
var job = new Misaki.HighPerformance.Test.Jobs.NoiseJobVector
{ {
buffers = buf, _va = _vb * _vc + _va;
width = _SIZE,
height = _SIZE,
};
for (var i = 0; i < _SIZE * _SIZE; i++)
{
job.Execute(i, 0);
} }
return _va;
} }
[Benchmark] [Benchmark]
public unsafe void MathNoise() public Vector128<float> VectorFMA()
{ {
var buf = stackalloc float[_SIZE * _SIZE]; for (var i = 0; i < 10; i++)
var job = new Misaki.HighPerformance.Test.Jobs.NoiseJobMath
{ {
buffers = buf, _va128 = System.Runtime.Intrinsics.X86.Fma.MultiplyAdd(_vb128, _vc128, _va128);
width = _SIZE,
height = _SIZE,
};
for (var i = 0; i < _SIZE * _SIZE; i++)
{
job.Execute(i, 0);
} }
return _va128;
} }
[Benchmark] [Benchmark]
// This is 10x faster than VectorNoise and MathNoise, but writing a burst like compiler to compile MathNoise into this is incredibly hard. public float4 floatFMA()
public unsafe void MathVNoise()
{ {
var buf = stackalloc float[_SIZE * _SIZE]; for (var i = 0; i < 10; i++)
var job = new Misaki.HighPerformance.Test.Jobs.NoiseJobMathV
{ {
buffers = buf, _fa = _fb * _fc + _fa;
width = _SIZE,
height = _SIZE,
};
for (var i = 0; i < _SIZE * _SIZE / 8; i++)
{
job.Execute(i, 0);
} }
return _fa;
} }
#endif #endif
#if MATRIX_BENCHMARK #if MATRIX_BENCHMARK

View File

@@ -0,0 +1,98 @@
using BenchmarkDotNet.Attributes;
using Misaki.HighPerformance.Jobs;
using Misaki.HighPerformance.Mathematics.SPMD;
using System.Runtime.InteropServices;
namespace Misaki.HighPerformance.Test.Benchmark;
public unsafe class SPMDBenchmark
{
private const int _SIZE = 512;
private JobScheduler _scheduler = null!;
private float* _buf;
[GlobalSetup]
public void Setup()
{
_scheduler = new JobScheduler(Environment.ProcessorCount);
_buf = (float*)NativeMemory.Alloc(sizeof(float) * _SIZE * _SIZE);
}
[GlobalCleanup]
public void Cleanup()
{
_scheduler.Dispose();
NativeMemory.Free(_buf);
}
[Benchmark]
public void VectorNoiseSingleThread()
{
var job = new Jobs.NoiseJobVector
{
buffers = _buf,
width = _SIZE,
height = _SIZE,
};
job.Run(_SIZE * _SIZE, 0);
}
//[Benchmark]
public void VectorNoise()
{
var job = new Jobs.NoiseJobVector
{
buffers = _buf,
width = _SIZE,
height = _SIZE,
};
var handle = _scheduler.ScheduleParallel(ref job, _SIZE * _SIZE, 64);
_scheduler.WaitComplete(handle);
}
//[Benchmark]
public void MathNoise()
{
var job = new Jobs.NoiseJobMath
{
buffers = _buf,
width = _SIZE,
height = _SIZE,
};
var handle = _scheduler.ScheduleParallel(ref job, _SIZE * _SIZE, 64);
_scheduler.WaitComplete(handle);
}
//[Benchmark(Baseline = true)]
public void ManualSPMDNoise()
{
var job = new Jobs.NoiseJobMathV
{
buffers = _buf,
width = _SIZE,
height = _SIZE,
};
var iterations = (_SIZE * _SIZE + 8 - 1) / 8;
var handle = _scheduler.ScheduleParallel(ref job, iterations, 64);
_scheduler.WaitComplete(handle);
}
[Benchmark(Baseline = true)]
public void SPMDNoise()
{
var job = new Jobs.NoiseJobMathSPMD
{
buffers = _buf,
width = _SIZE,
height = _SIZE,
};
var handle = _scheduler.ScheduleParallelSPDM<Jobs.NoiseJobMathSPMD, float>(ref job, _SIZE * _SIZE, 64, -1, JobHandle.Invalid);
_scheduler.WaitComplete(handle);
}
}

View File

@@ -1,9 +1,9 @@
using Misaki.HighPerformance.Jobs; using Misaki.HighPerformance.Jobs;
using Misaki.HighPerformance.Mathematics; using Misaki.HighPerformance.Mathematics;
using Misaki.HighPerformance.Mathematics.SPMD;
using System.Numerics; using System.Numerics;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics; using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace Misaki.HighPerformance.Test.Jobs; namespace Misaki.HighPerformance.Test.Jobs;
@@ -16,7 +16,7 @@ internal unsafe struct NoiseJobVector : IJobParallelFor
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float Frac(float x) public static float Frac(float x)
{ {
return x - MathF.Truncate(x); return x - MathF.Floor(x);
} }
private static Vector2 GradientNoiseDirect(Vector2 uv) private static Vector2 GradientNoiseDirect(Vector2 uv)
@@ -101,7 +101,7 @@ internal unsafe struct NoiseJobMathV : IJobParallelFor
private static Vector256<float> Mod289(Vector256<float> x) private static Vector256<float> Mod289(Vector256<float> x)
{ {
var div = x / Vector256.Create(289.0f); var div = x / Vector256.Create(289.0f);
var flr = Vector256.Floor(div); var flr = Vector256.Truncate(div);
return x - (flr * Vector256.Create(289.0f)); return x - (flr * Vector256.Create(289.0f));
} }
@@ -119,12 +119,13 @@ internal unsafe struct NoiseJobMathV : IJobParallelFor
var hy = Mod289(iy); var hy = Mod289(iy);
var p = hx * Vector256.Create(34.0f) + Vector256.Create(1.0f); var p = hx * Vector256.Create(34.0f) + Vector256.Create(1.0f);
p = Mod289(p * hx + hy); p = Mod289(p * hx) + hy;
var pPrev = p;
p = p * Vector256.Create(34.0f) + Vector256.Create(1.0f); p = p * Vector256.Create(34.0f) + Vector256.Create(1.0f);
p = Mod289(p * hx); p = Mod289(p * pPrev);
var r = (p / 41.0f); var r = (p / 41.0f);
r = (r - Vector256.Floor(r)) * 2.0f - Vector256<float>.One; r = (r - Vector256.Truncate(r)) * 2.0f - Vector256<float>.One;
var gx = r - Vector256.Floor(r + Vector256.Create(0.5f)); var gx = r - Vector256.Floor(r + Vector256.Create(0.5f));
var gy = Vector256.Abs(r) - Vector256.Create(0.5f); var gy = Vector256.Abs(r) - Vector256.Create(0.5f);
@@ -153,10 +154,10 @@ internal unsafe struct NoiseJobMathV : IJobParallelFor
var d10 = GradDot(ipX + Vector256<float>.One, ipY, fpX - Vector256<float>.One, fpY); var d10 = GradDot(ipX + Vector256<float>.One, ipY, fpX - Vector256<float>.One, fpY);
var d11 = GradDot(ipX + Vector256<float>.One, ipY + Vector256<float>.One, fpX - Vector256<float>.One, fpY - Vector256<float>.One); var d11 = GradDot(ipX + Vector256<float>.One, ipY + Vector256<float>.One, fpX - Vector256<float>.One, fpY - Vector256<float>.One);
var lerpX1 = d00 + (d10 - d00) * uX; var lerpY1 = d00 + (d10 - d00) * uY;
var lerpX2 = d01 + (d11 - d01) * uX; var lerpY2 = d01 + (d11 - d01) * uY;
return lerpX1 + (lerpX2 - lerpX1) * uY; return lerpY1 + (lerpY2 - lerpY1) * uX;
} }
public void Execute(int loopIndex, int threadIndex) public void Execute(int loopIndex, int threadIndex)
@@ -164,15 +165,17 @@ internal unsafe struct NoiseJobMathV : IJobParallelFor
// --------------------------------------------------------- // ---------------------------------------------------------
// IMPORTANT: Loop Stride is now 8! // IMPORTANT: Loop Stride is now 8!
// --------------------------------------------------------- // ---------------------------------------------------------
int baseIndex = loopIndex * 8; var baseIndex = loopIndex * 8;
// Safety check // Safety check
if (baseIndex + 7 >= width * height) if (baseIndex + 7 >= width * height)
{
return; return;
}
// Calculate Coords // Calculate Coords
int y = baseIndex / width; var y = baseIndex / width;
int x = baseIndex % width; var x = baseIndex % width;
// Sequence: 0, 1, 2, 3, 4, 5, 6, 7 // Sequence: 0, 1, 2, 3, 4, 5, 6, 7
var vSeqX = Vector256.Create(0f, 1f, 2f, 3f, 4f, 5f, 6f, 7f); var vSeqX = Vector256.Create(0f, 1f, 2f, 3f, 4f, 5f, 6f, 7f);
@@ -185,6 +188,81 @@ internal unsafe struct NoiseJobMathV : IJobParallelFor
var result = GradientNoiseAVX(vBaseX / vWidth, vBaseY / vHeight); var result = GradientNoiseAVX(vBaseX / vWidth, vBaseY / vHeight);
// Store 8 floats (32 bytes) // Store 8 floats (32 bytes)
Avx.Store(buffers + baseIndex, result); result.Store(buffers + baseIndex);
}
}
internal unsafe struct NoiseJobMathSPMD : IJobSPMD<float>
{
public float* buffers;
public int width;
public int height;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static T GradDot<T>(T ix, T iy, T fx, T fy)
where T : ISPMD<T, float>
{
var c289 = T.Create(289f);
var c34 = T.Create(34f);
var c1 = T.Create(1f);
var c41 = T.Create(41f);
var c2 = T.Create(2f);
var half = T.Create(0.5f);
ix %= c289;
iy %= c289;
var x = (c34 * ix + c1) * ix % c289 + iy;
x = (c34 * x + c1) * x % c289;
x = T.Frac(x / c41) * c2 - c1;
var gx = x - T.Floor(x + half);
var gy = T.Abs(x) - half;
// normalize
var len = T.Sqrt(gx * gx + gy * gy);
gx /= len;
gy /= len;
return gx * fx + gy * fy;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static T Noise<T>(T uvX, T uvY)
where T : ISPMD<T, float>
{
var c1 = T.Create(1f);
var c6 = T.Create(6f);
var c10 = T.Create(10f);
var c15 = T.Create(15f);
var ipX = T.Floor(uvX);
var ipY = T.Floor(uvY);
var fpX = uvX - ipX;
var fpY = uvY - ipY;
var d00 = GradDot(ipX, ipY, fpX, fpY);
var d01 = GradDot(ipX, ipY + c1, fpX, fpY - c1);
var d10 = GradDot(ipX + c1, ipY, fpX - c1, fpY);
var d11 = GradDot(ipX + c1, ipY + c1, fpX - c1, fpY - c1);
// fade
var uX = fpX * fpX * fpX * (fpX * (fpX * c6 - c15) + c10);
var uY = fpY * fpY * fpY * (fpY * (fpY * c6 - c15) + c10);
return T.Lerp(T.Lerp(d00, d10, uY), T.Lerp(d01, d11, uY), uX);
}
public readonly void Execute<TLane>(int baseIndex, int threadIndex)
where TLane : ISPMD<TLane, float>
{
var indices = TLane.Sequence(baseIndex, 1f);
var w = TLane.Create(width);
var h = TLane.Create(height);
var uvX = (indices % w) / w;
var uvY = TLane.Floor(indices / w) / h;
var result = Noise(uvX, uvY);
result.Store(buffers + baseIndex);
} }
} }

View File

@@ -24,6 +24,7 @@
<ProjectReference Include="..\Misaki.HighPerformance.Image\Misaki.HighPerformance.Image.csproj" /> <ProjectReference Include="..\Misaki.HighPerformance.Image\Misaki.HighPerformance.Image.csproj" />
<ProjectReference Include="..\Misaki.HighPerformance.Jobs\Misaki.HighPerformance.Jobs.csproj" /> <ProjectReference Include="..\Misaki.HighPerformance.Jobs\Misaki.HighPerformance.Jobs.csproj" />
<ProjectReference Include="..\Misaki.HighPerformance.LowLevel\Misaki.HighPerformance.LowLevel.csproj" /> <ProjectReference Include="..\Misaki.HighPerformance.LowLevel\Misaki.HighPerformance.LowLevel.csproj" />
<ProjectReference Include="..\Misaki.HighPerformance.Mathematics.SPMD\Misaki.HighPerformance.Mathematics.SPMD.csproj" />
<ProjectReference Include="..\Misaki.HighPerformance.Mathematics\Misaki.HighPerformance.Mathematics.csproj" /> <ProjectReference Include="..\Misaki.HighPerformance.Mathematics\Misaki.HighPerformance.Mathematics.csproj" />
<ProjectReference Include="..\Misaki.HighPerformance\Misaki.HighPerformance.csproj" /> <ProjectReference Include="..\Misaki.HighPerformance\Misaki.HighPerformance.csproj" />
<ProjectReference Include="..\Misaki.HighPerformance.Analyzer\Misaki.HighPerformance.Analyzer\Misaki.HighPerformance.Analyzer.csproj" OutputItemType="Analyzer" ReferenceOutputAssembly="false" /> <ProjectReference Include="..\Misaki.HighPerformance.Analyzer\Misaki.HighPerformance.Analyzer\Misaki.HighPerformance.Analyzer.csproj" OutputItemType="Analyzer" ReferenceOutputAssembly="false" />

View File

@@ -1,9 +1,15 @@
using Misaki.HighPerformance; using Misaki.HighPerformance;
using Misaki.HighPerformance.Jobs;
using Misaki.HighPerformance.LowLevel; using Misaki.HighPerformance.LowLevel;
using Misaki.HighPerformance.LowLevel.Utilities; using Misaki.HighPerformance.LowLevel.Utilities;
using Misaki.HighPerformance.Mathematics.SPMD;
using Misaki.HighPerformance.Test.UnitTest.Jobs;
using System.Numerics;
using System.Runtime.Intrinsics;
using System.Text;
BenchmarkDotNet.Running.BenchmarkRunner.Run<Misaki.HighPerformance.Test.Benchmark.MathematicsBenchmark>(); BenchmarkDotNet.Running.BenchmarkRunner.Run<Misaki.HighPerformance.Test.Benchmark.SPMDBenchmark>();
//return;
//using Misaki.HighPerformance.Collections; //using Misaki.HighPerformance.Collections;
//using Misaki.HighPerformance.LowLevel.Buffer; //using Misaki.HighPerformance.LowLevel.Buffer;
//using Misaki.HighPerformance.LowLevel.Collections; //using Misaki.HighPerformance.LowLevel.Collections;

View File

@@ -0,0 +1,114 @@
using Misaki.HighPerformance.Mathematics.SPMD;
using System.Numerics;
namespace Misaki.HighPerformance.Test.UnitTest.Jobs;
public static class CompressStoreTest
{
public static void Run()
{
Console.WriteLine("--- Testing CompressStore (Double) ---");
// Test 1: Simple Pattern (True, False, True, False...)
TestPattern_Double(
input: new double[] { 1, 2, 3, 4, 5, 6, 7, 8 },
// Mask: Keep only even numbers (values > 0)
// We simulate a mask by comparing against 0 or -1
keepPattern: new bool[] { true, false, true, false, true, false, true, false }
);
// Test 2: All True
TestPattern_Double(
input: new double[] { 10, 20, 30, 40, 50, 60, 70, 80 },
keepPattern: new bool[] { true, true, true, true, true, true, true, true }
);
// Test 3: All False
TestPattern_Double(
input: new double[] { 10, 20, 30, 40, 50, 60, 70, 80 },
keepPattern: new bool[] { false, false, false, false, false, false, false, false }
);
// Test 4: Sparse (First and Last only)
TestPattern_Double(
input: new double[] { 1, 2, 3, 4, 5, 6, 7, 8 },
keepPattern: new bool[] { true, false, false, false, false, false, false, true }
);
}
private unsafe static void TestPattern_Double(double[] input, bool[] keepPattern)
{
// 1. Setup Input Vector
// Handle case where Vector<T> is smaller than 8 (e.g. 2 or 4)
var vecSize = Vector<double>.Count;
var safeInput = new double[vecSize];
var safeMaskVal = new double[vecSize];
// Expected Output Calculation
var expected = new double[vecSize];
var expectedCount = 0;
for (var i = 0; i < vecSize; i++)
{
safeInput[i] = input[i];
// If we want to keep it, make mask "GreaterThan" true
// We'll compare X > 0.
// If keep=true, val=1. If keep=false, val=-1.
safeMaskVal[i] = keepPattern[i] ? 1 : -1;
if (keepPattern[i])
{
expected[expectedCount++] = input[i];
}
}
// 2. Create WideLanes
var vInput = WideLane<double>.Load(ref safeInput.AsSpan().GetPinnableReference());
// Create Mask: greater than 0
var vMaskVal = WideLane<double>.Load(ref safeMaskVal.AsSpan().GetPinnableReference());
var vZero = WideLane<double>.Create(0);
var vMask = WideLane<double>.GreaterThan(vMaskVal, vZero);
// 3. Run CompressStore
var outputBuffer = new double[vecSize];
var actualCount = 0;
fixed (double* ptr = outputBuffer)
{
actualCount = vInput.CompressStore(vMask, ptr);
}
// 4. Verify
var pass = actualCount == expectedCount;
for (var i = 0; i < expectedCount; i++)
{
if (outputBuffer[i] != expected[i])
pass = false;
}
// 5. Report
var hardware = (vecSize == 4) ? "AVX2 (256-bit)" : (vecSize == 2) ? "SSE/NEON (128-bit)" : "Scalar";
Console.Write($"[{hardware}] Pattern: ");
for (var i = 0; i < vecSize; i++)
Console.Write(keepPattern[i] ? "1" : "0");
if (pass)
{
Console.WriteLine($" -> PASS (Count: {actualCount})");
}
else
{
Console.WriteLine($" -> FAIL!");
Console.WriteLine($" Expected Count: {expectedCount}, Actual: {actualCount}");
Console.Write(" Expected Data: ");
foreach (var d in expected)
Console.Write($"{d} ");
Console.WriteLine();
Console.Write(" Actual Data: ");
foreach (var d in outputBuffer)
Console.Write($"{d} ");
Console.WriteLine();
}
}
}

View File

@@ -1,19 +1,28 @@
using Misaki.HighPerformance.Jobs; using Misaki.HighPerformance.Jobs;
using Misaki.HighPerformance.LowLevel.Buffer; using Misaki.HighPerformance.LowLevel.Buffer;
using Misaki.HighPerformance.LowLevel.Collections; using Misaki.HighPerformance.LowLevel.Collections;
using Misaki.HighPerformance.LowLevel.Utilities; using Misaki.HighPerformance.LowLevel.Utilities;
using Misaki.HighPerformance.Mathematics.SPMD;
using System.Runtime.InteropServices;
namespace Misaki.HighPerformance.Test.UnitTest.Jobs; namespace Misaki.HighPerformance.Test.UnitTest.Jobs;
[TestClass] [TestClass]
[DoNotParallelize]
public unsafe class TestJobSystem public unsafe class TestJobSystem
{ {
private JobScheduler _jobScheduler = null!; private JobScheduler _jobScheduler = null!;
public TestContext TestContext
{
get;
set;
}
[TestInitialize] [TestInitialize]
public void Initialize() public void Initialize()
{ {
_jobScheduler = new JobScheduler(Environment.ProcessorCount); _jobScheduler = new JobScheduler(3);
} }
[TestCleanup] [TestCleanup]
@@ -251,4 +260,102 @@ public unsafe class TestJobSystem
Assert.AreEqual(JobState.Completed, _jobScheduler.GetJobStatus(completedHandle)); Assert.AreEqual(JobState.Completed, _jobScheduler.GetJobStatus(completedHandle));
} }
[TestMethod]
public void RaceConditionTest()
{
const int jobCount = 20000;
var pExecutedCount = (int*)NativeMemory.Alloc(sizeof(int));
*pExecutedCount = 0;
var startSignal = false;
// 1. Create a "Gatekeeper" vectorJob that spins/blocks a worker thread until signaled.
// This allows us to control exactly when the dependency completes.
var rootJob = new WaitJob { pSignal = &startSignal };
var rootHandle = _jobScheduler.Schedule(ref rootJob);
// 2. Start a background task to flood the scheduler with dependencies on the Gatekeeper.
using var barrier = new Barrier(2);
var scheduleTask = Task.Run(() =>
{
var depJob = new IncrementJob { pCounter = pExecutedCount };
barrier.SignalAndWait(TestContext.CancellationTokenSource.Token); // Synchronize start with main thread
for (var i = 0; i < jobCount; i++)
{
// CONTENTION POINT:
// Trying to add a dependency to 'rootHandle'.
// Eventually, this will happen exactly while 'rootHandle' is transitioning to Completed.
_jobScheduler.Schedule(ref depJob, rootHandle);
}
}, TestContext.CancellationTokenSource.Token);
barrier.SignalAndWait(TestContext.CancellationTokenSource.Token); // Wait for scheduler task to be ready
// Allow the scheduling loop to get a head start and queue some readers
Thread.Sleep(5);
// 3. Open the gate.
// This triggers the Gatekeeper to complete. It will change its State and iterate its dependency list.
// This happens CONCURRENTLY with the loop above adding more items to that same list.
startSignal = true;
scheduleTask.Wait(TestContext.CancellationTokenSource.Token);
// 4. Validate results
// If the lock-free logic works, every single dependent vectorJob must eventually execute.
// If there is a race (e.g., missed notification), pExecutedCount will stick below jobCount.
var spin = new SpinWait();
var timeout = DateTime.Now.AddSeconds(10);
while (Volatile.Read(ref *pExecutedCount) < jobCount)
{
if (DateTime.Now > timeout)
{
break;
}
spin.SpinOnce();
}
// Ensure the root vectorJob is officially cleaned up
_jobScheduler.WaitComplete(rootHandle);
Assert.AreEqual(jobCount, *pExecutedCount, "Race condition detected: Some dependent jobs failed to execute (Wait timeout).");
NativeMemory.Free(pExecutedCount);
}
[TestMethod]
public void SPMDCorrectness()
{
const int size = 8;
var vectorBuf = stackalloc float[size * size];
var vs = new Span<float>(vectorBuf, size * size);
var vectorJob = new Misaki.HighPerformance.Test.Jobs.NoiseJobVector
{
buffers = vectorBuf,
width = size,
height = size,
};
vectorJob.Run(size * size, -1);
var spmdBuf = stackalloc float[size * size];
var ss = new Span<float>(spmdBuf, size * size);
var spmdJob = new Misaki.HighPerformance.Test.Jobs.NoiseJobMath
{
buffers = spmdBuf,
width = size,
height = size,
};
spmdJob.Run(size * size, -1);
var eq = vs.SequenceCompareTo(ss);
Assert.AreEqual(0, eq);
}
} }

View File

@@ -1,4 +1,4 @@
using Misaki.HighPerformance.Jobs; using Misaki.HighPerformance.Jobs;
namespace Misaki.HighPerformance.Test.UnitTest.Jobs; namespace Misaki.HighPerformance.Test.UnitTest.Jobs;
@@ -71,3 +71,27 @@ internal unsafe struct ParallelMultiplyJob : IJobParallelFor
inout[loopIndex] *= multiplier; inout[loopIndex] *= multiplier;
} }
} }
public unsafe struct WaitJob : IJob
{
public bool* pSignal;
public void Execute(int loopIndex)
{
var spin = new SpinWait();
while (!Volatile.Read(ref *pSignal))
{
spin.SpinOnce();
}
}
}
public unsafe struct IncrementJob : IJob
{
public int* pCounter;
public void Execute(int loopIndex)
{
Interlocked.Increment(ref *pCounter);
}
}

View File

@@ -1,7 +1,7 @@
Microsoft Visual Studio Solution File, Format Version 12.00 Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 18 # Visual Studio Version 18
VisualStudioVersion = 18.3.11218.70 d18.3 VisualStudioVersion = 18.3.11218.70
MinimumVisualStudioVersion = 10.0.40219.1 MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Misaki.HighPerformance", "Misaki.HighPerformance\Misaki.HighPerformance.csproj", "{275B2E80-9B2A-4567-A157-F147A6B28A0F}" Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Misaki.HighPerformance", "Misaki.HighPerformance\Misaki.HighPerformance.csproj", "{275B2E80-9B2A-4567-A157-F147A6B28A0F}"
EndProject EndProject
@@ -25,6 +25,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Misaki.HighPerformance.Anal
EndProject EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Analyzer", "Analyzer", "{457CB43B-38FA-4221-BCC2-BE866D0A2A06}" Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Analyzer", "Analyzer", "{457CB43B-38FA-4221-BCC2-BE866D0A2A06}"
EndProject EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Misaki.HighPerformance.Mathematics.SPMD", "Misaki.HighPerformance.Mathematics.SPMD\Misaki.HighPerformance.Mathematics.SPMD.csproj", "{09DF7B3B-F36D-4925-B086-E8827E228FFC}"
EndProject
Global Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU Debug|Any CPU = Debug|Any CPU
@@ -71,6 +73,10 @@ Global
{446213EF-4E44-4EEF-970A-536F0AFC7AEB}.Debug|Any CPU.Build.0 = Debug|Any CPU {446213EF-4E44-4EEF-970A-536F0AFC7AEB}.Debug|Any CPU.Build.0 = Debug|Any CPU
{446213EF-4E44-4EEF-970A-536F0AFC7AEB}.Release|Any CPU.ActiveCfg = Release|Any CPU {446213EF-4E44-4EEF-970A-536F0AFC7AEB}.Release|Any CPU.ActiveCfg = Release|Any CPU
{446213EF-4E44-4EEF-970A-536F0AFC7AEB}.Release|Any CPU.Build.0 = Release|Any CPU {446213EF-4E44-4EEF-970A-536F0AFC7AEB}.Release|Any CPU.Build.0 = Release|Any CPU
{09DF7B3B-F36D-4925-B086-E8827E228FFC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{09DF7B3B-F36D-4925-B086-E8827E228FFC}.Debug|Any CPU.Build.0 = Debug|Any CPU
{09DF7B3B-F36D-4925-B086-E8827E228FFC}.Release|Any CPU.ActiveCfg = Release|Any CPU
{09DF7B3B-F36D-4925-B086-E8827E228FFC}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection EndGlobalSection
GlobalSection(SolutionProperties) = preSolution GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE HideSolutionNode = FALSE

View File

@@ -0,0 +1,398 @@
using System.Runtime.CompilerServices;
namespace Misaki.HighPerformance;
public readonly struct Result
{
private readonly string? _message;
private readonly bool _isSuccess;
public readonly string? Message => _message;
public readonly bool IsSuccess => _isSuccess;
public readonly bool IsFailure => !IsSuccess;
public Result(bool success, string? message = null)
{
_isSuccess = success;
_message = message;
}
public static Result Success()
{
return new Result(true);
}
public static Result Failure(string? message = null)
{
return new Result(false, message);
}
public static Result Failure(Error status)
{
return new Result(false, status.ToString());
}
public static Result<T> Success<T>(T value)
{
return Result<T>.Success(value);
}
public static Result<T> Failure<T>(string? message = null)
{
return Result<T>.Failure(message);
}
public static Result<T> Failure<T>(Error status)
{
return Result<T>.Failure(status.ToString());
}
public void Deconstruct(out bool success, out string? message)
{
success = IsSuccess;
message = Message;
}
public override string ToString() => IsSuccess ? "OK" : $"Error: {Message}";
public static implicit operator bool(Result result) => result.IsSuccess;
}
public readonly struct Result<T>
{
private readonly T _value;
private readonly string? _message;
private readonly bool _isSuccess;
/// <summary>
/// Gets the value. Undefined if the result is a failure.
/// </summary>
public T Value
{
get
{
#if DEBUG
if (IsFailure)
{
throw new InvalidOperationException($"Cannot access Value when Result is a failure. {_message}");
}
#endif
return _value;
}
}
public readonly string? Message => _message;
public readonly bool IsSuccess => _isSuccess;
public readonly bool IsFailure => !IsSuccess;
public Result(bool success, T value, string? message = null)
{
_isSuccess = success;
_value = value;
_message = message;
}
public static Result<T> Success(T value)
{
return new Result<T>(true, value);
}
public static Result<T> Failure(string? message = null)
{
return new Result<T>(false, default!, message);
}
public void Deconstruct(out bool success, out T value, out string? message)
{
success = IsSuccess;
value = Value;
message = Message;
}
public override string ToString() => IsSuccess ? $"OK: {Value}" : $"Error: {Message}";
public static implicit operator Result<T>(T? data) => data is not null ? Success(data) : Failure(null);
public static implicit operator Result<T>(Result result) => result.IsSuccess ? Success(default!) : Failure(result.Message);
public static implicit operator bool(Result<T> result) => result.IsSuccess;
}
public enum Error : byte
{
None,
NotFound,
InvalidArgument,
InvalidState,
InternalError,
PermissionDenied,
NotSupported,
OutOfMemory,
Timeout,
Cancelled,
UnknownError,
Success = None,
}
public readonly struct Result<T, E>
where E : struct, Enum
{
private readonly T _value;
private readonly E _error;
/// <summary>
/// Gets the value. Undefined if the result is a failure.
/// </summary>
public T Value
{
get
{
#if DEBUG
if (IsFailure)
{
throw new InvalidOperationException($"Cannot access Value when Result is a failure. Error: {_error}");
}
#endif
return _value;
}
}
public E Error => _error;
public bool IsSuccess => EqualityComparer<E>.Default.Equals(_error, default);
public bool IsFailure => !IsSuccess;
public Result(T value, E status)
{
_value = value;
_error = status;
}
public static Result<T, E> Success(T value)
{
return new Result<T, E>(value, default);
}
public static Result<T, E> Failure(E status)
{
return new Result<T, E>(default!, status);
}
public void Deconstruct(out T value, out E status)
{
value = Value;
status = Error;
}
public override string ToString() => $"Value: {_value}, Status: {_error}";
public static implicit operator Result<T, E>(T data) => new(data, default);
public static implicit operator Result<T, E>(E status) => new(default!, status);
public static implicit operator bool(Result<T, E> result) => result.IsSuccess;
}
public readonly ref struct RefResult<T, E>
where E : struct, Enum
{
private readonly ref T _value;
private readonly E _error;
/// <summary>
/// Gets a reference to the value. Undefined if the result is a failure.
/// </summary>
public ref T Value
{
get
{
#if DEBUG
if (IsFailure)
{
throw new InvalidOperationException($"Cannot access Value when Result is a failure. Error: {_error}");
}
#endif
return ref _value;
}
}
public E Error => _error;
public bool IsSuccess => EqualityComparer<E>.Default.Equals(_error, default);
public bool IsFailure => !IsSuccess;
public RefResult(ref T value, E error)
{
_value = ref value;
_error = error;
}
public static RefResult<T, E> Success(ref T value)
{
return new RefResult<T, E>(ref value, default);
}
public static RefResult<T, E> Failure(E error)
{
return new RefResult<T, E>(ref Unsafe.NullRef<T>(), error);
}
public override string ToString() => $"Value: {_value}, Status: {_error}";
public static implicit operator RefResult<T, E>(E error) => new(ref Unsafe.NullRef<T>(), error);
public static implicit operator bool(RefResult<T, E> result) => result.IsSuccess;
}
public static class ResultExtensions
{
public static void ThrowIfFailed(this Error result, [CallerArgumentExpression(nameof(result))] string? op = null)
{
if (result != Error.None)
{
throw new InvalidOperationException($"{op} failed: {result}");
}
}
public static void ThrowIfFailed(this Result result, [CallerArgumentExpression(nameof(result))] string? op = null)
{
if (!result.IsSuccess)
{
throw new InvalidOperationException($"{op} failed: {result.Message}");
}
}
public static T GetValueOrThrow<T>(this Result<T> result, [CallerArgumentExpression(nameof(result))] string? op = null)
{
if (!result.IsSuccess)
{
throw new InvalidOperationException($"{op} failed: {result.Message}");
}
return result.Value;
}
public static T GetValueOrThrow<T, S>(this Result<T, S> result, [CallerArgumentExpression(nameof(result))] string? op = null)
where S : struct, Enum
{
if (!result.IsSuccess)
{
throw new InvalidOperationException($"{op} failed: status {result.Error}");
}
return result.Value;
}
public static T? GetValueOrDefault<T>(this Result<T> result, T? defaultValue = default)
{
return result.IsSuccess ? result.Value : defaultValue;
}
public static T? GetValueOrDefault<T, S>(this Result<T, S> result, T? defaultValue = default)
where S : struct, Enum
{
return result.IsSuccess ? result.Value : defaultValue;
}
public static bool TryGetValue<T>(this Result<T> result, out T value)
{
if (result.IsSuccess)
{
value = result.Value;
return true;
}
value = default!;
return false;
}
public static bool TryGetValue<T, S>(this Result<T, S> result, out T value)
where S : struct, Enum
{
if (result.IsSuccess)
{
value = result.Value;
return true;
}
value = default!;
return false;
}
public static Result OnSuccess(this Result result, Action action)
{
if (result.IsSuccess)
{
action();
}
return result;
}
public static Result<T> OnSuccess<T>(this Result<T> result, Action<T> action)
{
if (result.IsSuccess)
{
action(result.Value);
}
return result;
}
public static Result<T, E> OnSuccess<T, E>(this Result<T, E> result, Action<T> action)
where E : struct, Enum
{
if (result.IsSuccess)
{
action(result.Value);
}
return result;
}
public static Result OnFailed(this Result result, Action<string?> action)
{
if (result.IsFailure)
{
action(result.Message);
}
return result;
}
public static Result<T> OnFailed<T>(this Result<T> result, Action<string?> action)
{
if (result.IsFailure)
{
action(result.Message);
}
return result;
}
public static Result<T, E> OnFailed<T, E>(this Result<T, E> result, Action<E> action)
where E : struct, Enum
{
if (result.IsFailure)
{
action(result.Error);
}
return result;
}
public static Result<U> Then<T, U>(this Result<T> result, Func<T, Result<U>> func)
{
if (result.IsFailure)
{
return Result<U>.Failure(result.Message);
}
return func(result.Value);
}
public static Result<U, E> Then<T, U, E>(this Result<T, E> result, Func<T, Result<U, E>> func)
where E : struct, Enum
{
if (result.IsFailure)
{
return Result<U, E>.Failure(result.Error);
}
return func(result.Value);
}
}