Update Job

This commit is contained in:
2026-02-21 17:20:51 +09:00
parent 4f964b2d2a
commit 7367826978
23 changed files with 511 additions and 276 deletions

View File

@@ -7,7 +7,6 @@
<Authors>Misaki</Authors> <Authors>Misaki</Authors>
<AssemblyVersion>1.1.0</AssemblyVersion> <AssemblyVersion>1.1.0</AssemblyVersion>
<Version>$(AssemblyVersion)</Version> <Version>$(AssemblyVersion)</Version>
<GeneratePackageOnBuild>True</GeneratePackageOnBuild>
<PackageProjectUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</PackageProjectUrl> <PackageProjectUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</PackageProjectUrl>
<RepositoryUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</RepositoryUrl> <RepositoryUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</RepositoryUrl>
</PropertyGroup> </PropertyGroup>

View File

@@ -25,6 +25,20 @@ public interface IJobParallelFor
void Execute(int loopIndex, int threadIndex); void Execute(int loopIndex, int threadIndex);
} }
/// <summary>
/// Represents a job that performs the same operation for a set of items, executed in parallel.
/// </summary>
public interface IJobParallel
{
/// <summary>
/// Executes an operation over a specified range, optionally associating the execution with a particular thread index.
/// </summary>
/// <param name="startIndex">The zero-based index at which to begin the operation.</param>
/// <param name="endIndex">The zero-based index at which to end the operation.</param>
/// <param name="threadIndex">The index of the thread executing the job, useful for thread-specific operations.</param>
void Execute(int startIndex, int endIndex, int threadIndex);
}
public static class IJobExtensions public static class IJobExtensions
{ {
public static void Run<T>(this ref T job, int threadIndex) public static void Run<T>(this ref T job, int threadIndex)
@@ -39,9 +53,18 @@ public static class IJobParallelForExtensions
public static void Run<T>(this ref T job, int totalIterations, int threadIndex) public static void Run<T>(this ref T job, int totalIterations, int threadIndex)
where T : struct, IJobParallelFor where T : struct, IJobParallelFor
{ {
for (var i = 0; i < totalIterations; i++) for (int i = 0; i < totalIterations; i++)
{ {
job.Execute(i, threadIndex); job.Execute(i, threadIndex);
} }
} }
} }
public static class IJobParallelExtensions
{
public static void Run<T>(this ref T job, int totalIterations, int threadIndex)
where T : struct, IJobParallel
{
job.Execute(0, totalIterations, threadIndex);
}
}

View File

@@ -25,7 +25,7 @@ internal static unsafe class JobExecutor
return true; return true;
} }
public static bool ExecuteParallel<T>(void* pJobData, ref JobRanges jobRanges, ref int remainingBatches, int threadIndex) public static bool ExecuteParallelFor<T>(void* pJobData, ref JobRanges jobRanges, ref int remainingBatches, int threadIndex)
where T : unmanaged, IJobParallelFor where T : unmanaged, IJobParallelFor
{ {
var pJob = (T*)pJobData; var pJob = (T*)pJobData;
@@ -51,4 +51,27 @@ internal static unsafe class JobExecutor
return wasTheLastBatch; return wasTheLastBatch;
} }
public static bool ExecuteParallel<T>(void* pJobData, ref JobRanges jobRanges, ref int remainingBatches, int threadIndex)
where T : unmanaged, IJobParallel
{
var pJob = (T*)pJobData;
var wasTheLastBatch = false;
while (true)
{
if (!GetWorkerStealingRange(ref jobRanges, out var start, out var end))
{
break;
}
pJob->Execute(start, end, threadIndex);
if (Interlocked.Decrement(ref remainingBatches) == 0)
{
wasTheLastBatch = true;
}
}
return wasTheLastBatch;
}
} }

View File

@@ -21,12 +21,12 @@ public interface IJobScheduler
/// </summary> /// </summary>
/// <typeparam name="T">The type of the job to execute. Must implement <see cref="IJob"/> and be unmanaged.</typeparam> /// <typeparam name="T">The type of the job to execute. Must implement <see cref="IJob"/> and be unmanaged.</typeparam>
/// <param name="job">The job instance to be executed. The job data will be copied internally.</param> /// <param name="job">The job instance to be executed. The job data will be copied internally.</param>
/// <param name="threadIndex">The index of the thread that will execute the job. This is used to assign thread-specific data. Use -1 to allow any thread to execute the job.</param> /// <param name="threadIndex">The index of the thread that is preferred to execute the job. This is used to assign thread-specific data. Use -1 to allow any thread to execute the job.</param>
/// <param name="dependency">A <see cref="JobHandle"/> representing the dependencies that must be completed before this job can begin. /// <param name="dependency">A <see cref="JobHandle"/> representing the dependencies that must be completed before this job can begin.
/// Use <see cref="JobHandle.Invalid"/> if there are no dependencies.</param> /// Use <see cref="JobHandle.Invalid"/> if there are no dependencies.</param>
/// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job. /// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job.
/// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns> /// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns>
JobHandle Schedule<T>(ref T job, int threadIndex, JobHandle dependency) JobHandle Schedule<T>(ref readonly T job, int threadIndex, JobHandle dependency)
where T : unmanaged, IJob; where T : unmanaged, IJob;
/// <summary> /// <summary>
@@ -34,10 +34,10 @@ public interface IJobScheduler
/// </summary> /// </summary>
/// <typeparam name="T">The type of the job to execute. Must implement <see cref="IJob"/> and be unmanaged.</typeparam> /// <typeparam name="T">The type of the job to execute. Must implement <see cref="IJob"/> and be unmanaged.</typeparam>
/// <param name="job">The job instance to be executed. The job data will be copied internally.</param> /// <param name="job">The job instance to be executed. The job data will be copied internally.</param>
/// <param name="threadIndex">The index of the thread that will execute the job. This is used to assign thread-specific data. Use -1 to allow any thread to execute the job.</param> /// <param name="threadIndex">The index of the thread that is preferred to execute the job. This is used to assign thread-specific data. Use -1 to allow any thread to execute the job.</param>
/// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job. /// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job.
/// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns> /// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns>
JobHandle Schedule<T>(ref T job, int threadIndex) JobHandle Schedule<T>(ref readonly T job, int threadIndex)
where T : unmanaged, IJob; where T : unmanaged, IJob;
/// <summary> /// <summary>
@@ -45,10 +45,9 @@ public interface IJobScheduler
/// </summary> /// </summary>
/// <typeparam name="T">The type of the job to execute. Must implement <see cref="IJob"/> and be unmanaged.</typeparam> /// <typeparam name="T">The type of the job to execute. Must implement <see cref="IJob"/> and be unmanaged.</typeparam>
/// <param name="job">The job instance to be executed. The job data will be copied internally.</param> /// <param name="job">The job instance to be executed. The job data will be copied internally.</param>
/// <param name="threadIndex">The index of the thread that will execute the job. This is used to assign thread-specific data. Use -1 to allow any thread to execute the job.</param>
/// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job. /// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job.
/// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns> /// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns>
JobHandle Schedule<T>(ref T job, JobHandle dependency) JobHandle Schedule<T>(ref readonly T job, JobHandle dependency)
where T : unmanaged, IJob; where T : unmanaged, IJob;
/// <summary> /// <summary>
@@ -56,10 +55,10 @@ public interface IJobScheduler
/// </summary> /// </summary>
/// <typeparam name="T">The type of the job to execute. Must implement <see cref="IJob"/> and be unmanaged.</typeparam> /// <typeparam name="T">The type of the job to execute. Must implement <see cref="IJob"/> and be unmanaged.</typeparam>
/// <param name="job">The job instance to be executed. The job data will be copied internally.</param> /// <param name="job">The job instance to be executed. The job data will be copied internally.</param>
/// <param name="threadIndex">The index of the thread that will execute the job. This is used to assign thread-specific data. Use -1 to allow any thread to execute the job.</param> /// <param name="threadIndex">The index of the thread that is preferred to execute the job. This is used to assign thread-specific data. Use -1 to allow any thread to execute the job.</param>
/// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job. /// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job.
/// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns> /// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns>
JobHandle Schedule<T>(ref T job) JobHandle Schedule<T>(ref readonly T job)
where T : unmanaged, IJob; where T : unmanaged, IJob;
/// <summary> /// <summary>
@@ -69,12 +68,12 @@ public interface IJobScheduler
/// <param name="job">The job instance to be executed. The job data will be copied internally.</param> /// <param name="job">The job instance to be executed. The job data will be copied internally.</param>
/// <param name="totalIteration">The total number of iterations to be processed by the job.</param> /// <param name="totalIteration">The total number of iterations to be processed by the job.</param>
/// <param name="batchSize">The number of iterations to include in each batch.</param> /// <param name="batchSize">The number of iterations to include in each batch.</param>
/// <param name="threadIndex">The index of the thread that will execute the job. This is used to assign thread-specific data. Use -1 to allow any thread to execute the job.</param> /// <param name="threadIndex">The index of the thread that is preferred to execute the job. This is used to assign thread-specific data. Use -1 to allow any thread to execute the job.</param>
/// <param name="dependency">A <see cref="JobHandle"/> representing the dependencies that must be completed before this job can begin. /// <param name="dependency">A <see cref="JobHandle"/> representing the dependencies that must be completed before this job can begin.
/// Use <see cref="JobHandle.Invalid"/> if there are no dependencies.</param> /// Use <see cref="JobHandle.Invalid"/> if there are no dependencies.</param>
/// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job. /// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job.
/// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns> /// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns>
JobHandle ScheduleParallel<T>(ref T job, int totalIteration, int batchSize, int threadIndex, JobHandle dependency) JobHandle ScheduleParallelFor<T>(ref readonly T job, int totalIteration, int batchSize, int threadIndex, JobHandle dependency)
where T : unmanaged, IJobParallelFor; where T : unmanaged, IJobParallelFor;
/// <summary> /// <summary>
@@ -84,10 +83,10 @@ public interface IJobScheduler
/// <param name="job">The job instance to be executed. The job data will be copied internally.</param> /// <param name="job">The job instance to be executed. The job data will be copied internally.</param>
/// <param name="totalIteration">The total number of iterations to be processed by the job.</param> /// <param name="totalIteration">The total number of iterations to be processed by the job.</param>
/// <param name="batchSize">The number of iterations to include in each batch.</param> /// <param name="batchSize">The number of iterations to include in each batch.</param>
/// <param name="threadIndex">The index of the thread that will execute the job. This is used to assign thread-specific data. Use -1 to allow any thread to execute the job.</param> /// <param name="threadIndex">The index of the thread that is preferred to execute the job. This is used to assign thread-specific data. Use -1 to allow any thread to execute the job.</param>
/// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job. /// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job.
/// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns> /// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns>
JobHandle ScheduleParallel<T>(ref T job, int totalIteration, int batchSize, int threadIndex) JobHandle ScheduleParallelFor<T>(ref readonly T job, int totalIteration, int batchSize, int threadIndex)
where T : unmanaged, IJobParallelFor; where T : unmanaged, IJobParallelFor;
/// <summary> /// <summary>
@@ -97,10 +96,10 @@ public interface IJobScheduler
/// <param name="job">The job instance to be executed. The job data will be copied internally.</param> /// <param name="job">The job instance to be executed. The job data will be copied internally.</param>
/// <param name="totalIteration">The total number of iterations to be processed by the job.</param> /// <param name="totalIteration">The total number of iterations to be processed by the job.</param>
/// <param name="batchSize">The number of iterations to include in each batch.</param> /// <param name="batchSize">The number of iterations to include in each batch.</param>
/// <param name="threadIndex">The index of the thread that will execute the job. This is used to assign thread-specific data. Use -1 to allow any thread to execute the job.</param> /// <param name="threadIndex">The index of the thread that is preferred to execute the job. This is used to assign thread-specific data. Use -1 to allow any thread to execute the job.</param>
/// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job. /// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job.
/// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns> /// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns>
JobHandle ScheduleParallel<T>(ref T job, int totalIteration, int batchSize, JobHandle dependency) JobHandle ScheduleParallelFor<T>(ref readonly T job, int totalIteration, int batchSize, JobHandle dependency)
where T : unmanaged, IJobParallelFor; where T : unmanaged, IJobParallelFor;
/// <summary> /// <summary>
@@ -110,12 +109,66 @@ public interface IJobScheduler
/// <param name="job">The job instance to be executed. The job data will be copied internally.</param> /// <param name="job">The job instance to be executed. The job data will be copied internally.</param>
/// <param name="totalIteration">The total number of iterations to be processed by the job.</param> /// <param name="totalIteration">The total number of iterations to be processed by the job.</param>
/// <param name="batchSize">The number of iterations to include in each batch.</param> /// <param name="batchSize">The number of iterations to include in each batch.</param>
/// <param name="threadIndex">The index of the thread that will execute the job. This is used to assign thread-specific data. Use -1 to allow any thread to execute the job.</param> /// <param name="threadIndex">The index of the thread that is preferred to execute the job. This is used to assign thread-specific data. Use -1 to allow any thread to execute the job.</param>
/// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job. /// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job.
/// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns> /// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns>
JobHandle ScheduleParallel<T>(ref T job, int totalIteration, int batchSize) JobHandle ScheduleParallelFor<T>(ref readonly T job, int totalIteration, int batchSize)
where T : unmanaged, IJobParallelFor; where T : unmanaged, IJobParallelFor;
/// <summary>
/// Schedules a parallel job for execution, dividing the workload into batches and distributing it across threads.
/// </summary>
/// <typeparam name="T">The type of the job to execute. Must implement <see cref="IJobParallelFor"/> and be unmanaged.</typeparam>
/// <param name="job">The job instance to be executed. The job data will be copied internally.</param>
/// <param name="totalIteration">The total number of iterations to be processed by the job.</param>
/// <param name="batchSize">The number of iterations to include in each batch.</param>
/// <param name="threadIndex">The index of the thread that is preferred to execute the job. This is used to assign thread-specific data. Use -1 to allow any thread to execute the job.</param>
/// <param name="dependency">A <see cref="JobHandle"/> representing the dependencies that must be completed before this job can begin.
/// Use <see cref="JobHandle.Invalid"/> if there are no dependencies.</param>
/// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job.
/// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns>
JobHandle ScheduleParallel<T>(ref readonly T job, int totalIteration, int batchSize, int threadIndex, JobHandle dependency)
where T : unmanaged, IJobParallel;
/// <summary>
/// Schedules a parallel job for execution, dividing the workload into batches and distributing it across threads on a specified thread without dependency.
/// </summary>
/// <typeparam name="T">The type of the job to execute. Must implement <see cref="IJobParallelFor"/> and be unmanaged.</typeparam>
/// <param name="job">The job instance to be executed. The job data will be copied internally.</param>
/// <param name="totalIteration">The total number of iterations to be processed by the job.</param>
/// <param name="batchSize">The number of iterations to include in each batch.</param>
/// <param name="threadIndex">The index of the thread that is preferred to execute the job. This is used to assign thread-specific data. Use -1 to allow any thread to execute the job.</param>
/// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job.
/// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns>
JobHandle ScheduleParallel<T>(ref readonly T job, int totalIteration, int batchSize, int threadIndex)
where T : unmanaged, IJobParallel;
/// <summary>
/// Schedules a parallel job for execution, dividing the workload into batches and distributing it across threads on any thread, with an optional dependency on another job..
/// </summary>
/// <typeparam name="T">The type of the job to execute. Must implement <see cref="IJobParallelFor"/> and be unmanaged.</typeparam>
/// <param name="job">The job instance to be executed. The job data will be copied internally.</param>
/// <param name="totalIteration">The total number of iterations to be processed by the job.</param>
/// <param name="batchSize">The number of iterations to include in each batch.</param>
/// <param name="threadIndex">The index of the thread that is preferred to execute the job. This is used to assign thread-specific data. Use -1 to allow any thread to execute the job.</param>
/// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job.
/// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns>
JobHandle ScheduleParallel<T>(ref readonly T job, int totalIteration, int batchSize, JobHandle dependency)
where T : unmanaged, IJobParallel;
/// <summary>
/// Schedules a parallel job for execution, dividing the workload into batches and distributing it across threads on any thread without dependency.
/// </summary>
/// <typeparam name="T">The type of the job to execute. Must implement <see cref="IJobParallelFor"/> and be unmanaged.</typeparam>
/// <param name="job">The job instance to be executed. The job data will be copied internally.</param>
/// <param name="totalIteration">The total number of iterations to be processed by the job.</param>
/// <param name="batchSize">The number of iterations to include in each batch.</param>
/// <param name="threadIndex">The index of the thread that is preferred to execute the job. This is used to assign thread-specific data. Use -1 to allow any thread to execute the job.</param>
/// <returns>A <see cref="JobHandle"/> that can be used to track the completion of the scheduled job.
/// Returns <see cref="JobHandle.Invalid"/> if the job data allocation fails.</returns>
JobHandle ScheduleParallel<T>(ref readonly T job, int totalIteration, int batchSize)
where T : unmanaged, IJobParallel;
/// <summary> /// <summary>
/// Combines multiple job dependencies into a single <see cref="JobHandle"/>. /// Combines multiple job dependencies into a single <see cref="JobHandle"/>.
/// </summary> /// </summary>
@@ -394,26 +447,21 @@ public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
} }
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
internal void WaitForWork() internal void WaitForWork(int timeout)
{ {
_workSignal.Wait(_cts.Token); _workSignal.Wait(timeout, _cts.Token);
} }
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
internal bool TryStealJob(int threadIndex, out JobHandle outHandle) internal bool TryStealFromMain(int threadIndex, out JobHandle outHandle)
{ {
if (threadIndex >= 0 && threadIndex < _workerThreads.Length return _jobQueue.TryDequeue(out outHandle);
&& _workerThreads[threadIndex].LocalQueue.TryDequeue(out outHandle))
{
return true;
}
else if (_jobQueue.TryDequeue(out outHandle))
{
return true;
} }
outHandle = JobHandle.Invalid; [MethodImpl(MethodImplOptions.AggressiveInlining)]
return false; internal bool TryStealFromWorker(int threadIndex, out JobHandle outHandle)
{
return _workerThreads[threadIndex].LocalQueue.TryDequeue(out outHandle);
} }
[MethodImpl(MethodImplOptions.AggressiveInlining)] [MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -456,11 +504,11 @@ public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
return; // Already completed (shouldn't happen for single-execution jobs) return; // Already completed (shouldn't happen for single-execution jobs)
} }
if (state != JobState.Running) //if (state != JobState.Running)
{ //{
// If in valid state (e.g. Scheduled?), we still assume we can complete it. // // If in valid state (e.g. Scheduled?), we still assume we can complete it.
// Usually it should be Running. // // Usually it should be Running.
} //}
// Construct new value: State=Completed, preserve RC (temporarily) or strictly replace only low bits? // Construct new value: State=Completed, preserve RC (temporarily) or strictly replace only low bits?
// We set low bits to Completed. High bits (RC) remain. // We set low bits to Completed. High bits (RC) remain.
@@ -513,7 +561,7 @@ public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
} }
} }
public JobHandle Schedule<T>(ref T job, int threadIndex, JobHandle dependency) public JobHandle Schedule<T>(ref readonly T job, int threadIndex, JobHandle dependency)
where T : unmanaged, IJob where T : unmanaged, IJob
{ {
var pJobData = _jobDataAllocator.Allocate(MemoryUtility.SizeOf<T>(), MemoryUtility.AlignOf<T>()); var pJobData = _jobDataAllocator.Allocate(MemoryUtility.SizeOf<T>(), MemoryUtility.AlignOf<T>());
@@ -541,19 +589,19 @@ public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
return CreateJobHandle(ref jobInfo, dependency); return CreateJobHandle(ref jobInfo, dependency);
} }
public JobHandle Schedule<T>(ref T job, int threadIndex) public JobHandle Schedule<T>(ref readonly T job, int threadIndex)
where T : unmanaged, IJob where T : unmanaged, IJob
=> Schedule(ref job, threadIndex, JobHandle.Invalid); => Schedule(in job, threadIndex, JobHandle.Invalid);
public JobHandle Schedule<T>(ref T job, JobHandle dependency) public JobHandle Schedule<T>(ref readonly T job, JobHandle dependency)
where T : unmanaged, IJob where T : unmanaged, IJob
=> Schedule(ref job, -1, dependency); => Schedule(in job, -1, dependency);
public JobHandle Schedule<T>(ref T job) public JobHandle Schedule<T>(ref readonly T job)
where T : unmanaged, IJob where T : unmanaged, IJob
=> Schedule(ref job, -1, JobHandle.Invalid); => Schedule(in job, -1, JobHandle.Invalid);
public JobHandle ScheduleParallel<T>(ref T job, int totalIteration, int batchSize, int threadIndex, JobHandle dependency) public JobHandle ScheduleParallelFor<T>(ref readonly T job, int totalIteration, int batchSize, int threadIndex, JobHandle dependency)
where T : unmanaged, IJobParallelFor where T : unmanaged, IJobParallelFor
{ {
var pJobData = _jobDataAllocator.Allocate(MemoryUtility.SizeOf<T>(), MemoryUtility.AlignOf<T>()); var pJobData = _jobDataAllocator.Allocate(MemoryUtility.SizeOf<T>(), MemoryUtility.AlignOf<T>());
@@ -570,6 +618,54 @@ public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
var optimalBatchSize = Math.Max(1, batchSize); var optimalBatchSize = Math.Max(1, batchSize);
var totalBatches = (totalIteration + optimalBatchSize - 1) / optimalBatchSize; var totalBatches = (totalIteration + optimalBatchSize - 1) / optimalBatchSize;
var jobInfo = new JobInfo
{
pJobData = pJobData,
pExecutionFunc = &JobExecutor.ExecuteParallelFor<T>,
remainingBatches = totalBatches,
threadIndex = threadIndex,
jobRanges = new()
{
currentIndex = 0,
batchSize = optimalBatchSize,
totalIteration = totalIteration,
},
};
return CreateJobHandle(ref jobInfo, dependency);
}
public JobHandle ScheduleParallelFor<T>(ref readonly T job, int totalIteration, int batchSize, int threadIndex)
where T : unmanaged, IJobParallelFor
=> ScheduleParallelFor(in job, totalIteration, batchSize, threadIndex, JobHandle.Invalid);
public JobHandle ScheduleParallelFor<T>(ref readonly T job, int totalIteration, int batchSize, JobHandle dependency)
where T : unmanaged, IJobParallelFor
=> ScheduleParallelFor(in job, totalIteration, batchSize, -1, dependency);
public JobHandle ScheduleParallelFor<T>(ref readonly T job, int totalIteration, int batchSize)
where T : unmanaged, IJobParallelFor
=> ScheduleParallelFor(in job, totalIteration, batchSize, -1, JobHandle.Invalid);
public JobHandle ScheduleParallel<T>(ref readonly T job, int totalIteration, int batchSize, int threadIndex, JobHandle dependency)
where T : unmanaged, IJobParallel
{
var pJobData = _jobDataAllocator.Allocate(MemoryUtility.SizeOf<T>(), MemoryUtility.AlignOf<T>());
if (pJobData == null)
{
return JobHandle.Invalid;
}
fixed (T* pJob = &job)
{
MemoryUtility.MemCpy(pJobData, pJob, MemoryUtility.SizeOf<T>());
}
var optimalBatchSize = Math.Max(1, batchSize);
var totalBatches = (totalIteration + optimalBatchSize - 1) / optimalBatchSize;
var jobInfo = new JobInfo var jobInfo = new JobInfo
{ {
pJobData = pJobData, pJobData = pJobData,
@@ -589,17 +685,17 @@ public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
return CreateJobHandle(ref jobInfo, dependency); return CreateJobHandle(ref jobInfo, dependency);
} }
public JobHandle ScheduleParallel<T>(ref T job, int totalIteration, int batchSize, int threadIndex) public JobHandle ScheduleParallel<T>(ref readonly T job, int totalIteration, int batchSize, int threadIndex)
where T : unmanaged, IJobParallelFor where T : unmanaged, IJobParallel
=> ScheduleParallel(ref job, totalIteration, batchSize, threadIndex, JobHandle.Invalid); => ScheduleParallel(in job, totalIteration, batchSize, threadIndex, JobHandle.Invalid);
public JobHandle ScheduleParallel<T>(ref T job, int totalIteration, int batchSize, JobHandle dependency) public JobHandle ScheduleParallel<T>(ref readonly T job, int totalIteration, int batchSize, JobHandle dependency)
where T : unmanaged, IJobParallelFor where T : unmanaged, IJobParallel
=> ScheduleParallel(ref job, totalIteration, batchSize, -1, dependency); => ScheduleParallel(in job, totalIteration, batchSize, -1, dependency);
public JobHandle ScheduleParallel<T>(ref T job, int totalIteration, int batchSize) public JobHandle ScheduleParallel<T>(ref readonly T job, int totalIteration, int batchSize)
where T : unmanaged, IJobParallelFor where T : unmanaged, IJobParallel
=> ScheduleParallel(ref job, totalIteration, batchSize, -1, JobHandle.Invalid); => ScheduleParallel(in job, totalIteration, batchSize, -1, JobHandle.Invalid);
public JobHandle CombineDependencies(params ReadOnlySpan<JobHandle> dependencies) public JobHandle CombineDependencies(params ReadOnlySpan<JobHandle> dependencies)
{ {
@@ -641,6 +737,10 @@ public sealed unsafe partial class JobScheduler : IJobScheduler, IDisposable
return; return;
} }
// TODO: We can steal a up stream job to execute while waiting.
// For example, if we wait on job A which depends on job B, and both are not scheduled yet, we can steal and execute job B to speed up the completion of A.
// And then maybe we can even execute A after B if we can guarantee the order and avoid deadlock. This is a common optimization in job systems called "helping" or "work stealing with dependencies".
var spin = new SpinWait(); var spin = new SpinWait();
while (_jobInfoPool.TryGetElement(handle.ID, handle.Generation, out var jobInfo)) while (_jobInfoPool.TryGetElement(handle.ID, handle.Generation, out var jobInfo))
{ {

View File

@@ -9,7 +9,6 @@
<AssemblyVersion>1.2.3</AssemblyVersion> <AssemblyVersion>1.2.3</AssemblyVersion>
<Version>$(AssemblyVersion)</Version> <Version>$(AssemblyVersion)</Version>
<Authors>Misaki</Authors> <Authors>Misaki</Authors>
<GeneratePackageOnBuild>True</GeneratePackageOnBuild>
<PackageProjectUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</PackageProjectUrl> <PackageProjectUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</PackageProjectUrl>
<RepositoryUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</RepositoryUrl> <RepositoryUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</RepositoryUrl>
</PropertyGroup> </PropertyGroup>

View File

@@ -33,23 +33,20 @@ internal class WorkerThread : IDisposable
private bool TryFindJob(out JobHandle handle) private bool TryFindJob(out JobHandle handle)
{ {
// 1. Check own local queue first
if (_localQueue.TryDequeue(out handle)) if (_localQueue.TryDequeue(out handle))
{ {
return true; return true;
} }
// 2. Check global queue if (_scheduler.TryStealFromMain(-1, out handle))
if (_scheduler.TryStealJob(-1, out handle))
{ {
return true; return true;
} }
// 3. Bounded random work stealing from other workers
for (var i = 0; i < _MAX_STEAL_ATTEMPTS; i++) for (var i = 0; i < _MAX_STEAL_ATTEMPTS; i++)
{ {
var randomIndex = _random.Next(0, _scheduler.WorkerCount); var randomIndex = _random.Next(0, _scheduler.WorkerCount);
if (randomIndex != _index && _scheduler.TryStealJob(randomIndex, out handle)) if (randomIndex != _index && _scheduler.TryStealFromWorker(randomIndex, out handle))
{ {
return true; return true;
} }
@@ -63,25 +60,41 @@ internal class WorkerThread : IDisposable
{ {
while (!_scheduler.IsCancellationRequested) while (!_scheduler.IsCancellationRequested)
{ {
// Wait for work signal directly — the semaphore already acts as var handle = JobHandle.Invalid;
// both a notification and a count of available work items. var spin = new SpinWait();
var found = false;
while (!spin.NextSpinWillYield)
{
if (TryFindJob(out handle))
{
_scheduler.WaitForWork(0); // Consume the signal if we found work immediately
found = true;
break;
}
spin.SpinOnce(-1);
}
if (!found)
{
try try
{ {
_scheduler.WaitForWork(); _scheduler.WaitForWork(Timeout.Infinite);
} }
catch (OperationCanceledException) catch (OperationCanceledException)
{ {
break; break;
} }
// After being signaled, try to find and execute a job. if (!TryFindJob(out handle))
if (!TryFindJob(out var handle))
{ {
continue; continue;
} }
}
ref var jobInfo = ref _scheduler.GetJobInfoReference(handle, out var exist); ref var jobInfo = ref _scheduler.GetJobInfoReference(handle, out var exist);
if (exist) if (exist)
{ {
Interlocked.CompareExchange(ref jobInfo.state, JobState.Running, JobState.Scheduled); Interlocked.CompareExchange(ref jobInfo.state, JobState.Running, JobState.Scheduled);

View File

@@ -101,6 +101,23 @@ public unsafe struct UnsafeQueue<T> : IUnsafeCollection<T>
return ref UnsafeUtility.ReadArrayElementRef<T>(_array.GetUnsafePtr(), _offset); return ref UnsafeUtility.ReadArrayElementRef<T>(_array.GetUnsafePtr(), _offset);
} }
/// <summary>
/// Attempts to return the object at the top of the collection without removing it.
/// </summary>
/// <param name="value">The item at the front of the queue if the operation is successful; otherwise, the default value of <typeparamref name="T"/>.</param>
/// <returns><see langword="true"/> if an object was returned successfully; otherwise, <see langword="false"/>.</returns>
public readonly bool TryPeek(out T value)
{
if (_count == 0)
{
value = default;
return false;
}
value = _array[_offset];
return true;
}
/// <summary> /// <summary>
/// Adds an element to the end of a collection, resizing if the current capacity is reached. The new element is /// Adds an element to the end of a collection, resizing if the current capacity is reached. The new element is
/// stored in a circular buffer. /// stored in a circular buffer.

View File

@@ -181,6 +181,23 @@ public unsafe struct UnsafeStack<T> : IUnsafeCollection<T>
return _array[_count - 1]; return _array[_count - 1];
} }
/// <summary>
/// Attempts to return the item at the top of the stack without removing it.
/// </summary>
/// <param name="value">When this method returns, contains the item at the top of the stack if the stack is not empty; otherwise, the default value of <typeparamref name="T"/>.</param>
/// <returns><see langword="true"/> if an item was successfully returned; otherwise, <see langword="false"/>.</returns>
public readonly bool TryPeek(out T value)
{
if (_count == 0)
{
value = default;
return false;
}
value = _array[_count - 1];
return true;
}
public void Resize(int newSize, AllocationOption option = AllocationOption.None) public void Resize(int newSize, AllocationOption option = AllocationOption.None)
{ {
_array.Resize(newSize, option); _array.Resize(newSize, option);

View File

@@ -8,7 +8,6 @@
<Authors>Misaki</Authors> <Authors>Misaki</Authors>
<AssemblyVersion>1.3.5</AssemblyVersion> <AssemblyVersion>1.3.5</AssemblyVersion>
<Version>$(AssemblyVersion)</Version> <Version>$(AssemblyVersion)</Version>
<GeneratePackageOnBuild>True</GeneratePackageOnBuild>
<PackageProjectUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</PackageProjectUrl> <PackageProjectUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</PackageProjectUrl>
<RepositoryUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</RepositoryUrl> <RepositoryUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</RepositoryUrl>
<IncludeSymbols>True</IncludeSymbols> <IncludeSymbols>True</IncludeSymbols>

View File

@@ -1,6 +1,5 @@
using Misaki.HighPerformance.Jobs; using Misaki.HighPerformance.Jobs;
using System.Numerics; using System.Numerics;
using System.Runtime.CompilerServices;
namespace Misaki.HighPerformance.Mathematics.SPMD; namespace Misaki.HighPerformance.Mathematics.SPMD;
@@ -18,7 +17,6 @@ internal struct SPMDJobWrapper<T, TNumber> : IJobParallelFor
public T innerJob; public T innerJob;
public int totalCount; public int totalCount;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Execute(int loopIndex, int threadIndex) public void Execute(int loopIndex, int threadIndex)
{ {
var baseIndex = loopIndex * WideLane<TNumber>.LaneWidth; var baseIndex = loopIndex * WideLane<TNumber>.LaneWidth;
@@ -30,12 +28,33 @@ internal struct SPMDJobWrapper<T, TNumber> : IJobParallelFor
} }
else else
{ {
for (var i = 0; i < remaining; i++) for (var j = 0; j < remaining; j++)
{ {
innerJob.Execute<ScalarLane<TNumber>>(baseIndex + i, threadIndex); innerJob.Execute<ScalarLane<TNumber>>(baseIndex + j, threadIndex);
} }
} }
} }
//public void Execute(int startIndex, int endIndex, int threadIndex)
//{
// for (int i = startIndex; i < endIndex; i++)
// {
// var baseIndex = i * WideLane<TNumber>.LaneWidth;
// var remaining = totalCount - baseIndex;
// if (remaining >= WideLane<TNumber>.LaneWidth)
// {
// innerJob.Execute<WideLane<TNumber>>(baseIndex, threadIndex);
// }
// else
// {
// for (var j = 0; j < remaining; j++)
// {
// innerJob.Execute<ScalarLane<TNumber>>(baseIndex + j, threadIndex);
// }
// }
// }
//}
} }
internal struct SPMDScalerJobWrapper<T, TNumber> : IJobParallelFor internal struct SPMDScalerJobWrapper<T, TNumber> : IJobParallelFor
@@ -45,11 +64,19 @@ internal struct SPMDScalerJobWrapper<T, TNumber> : IJobParallelFor
public T innerJob; public T innerJob;
public int totalCount; public int totalCount;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Execute(int loopIndex, int threadIndex) public void Execute(int loopIndex, int threadIndex)
{ {
innerJob.Execute<ScalarLane<TNumber>>(loopIndex, threadIndex); innerJob.Execute<ScalarLane<TNumber>>(loopIndex, threadIndex);
} }
//[MethodImpl(MethodImplOptions.AggressiveInlining)]
//public void Execute(int startIndex, int endIndex, int threadIndex)
//{
// for (int i = startIndex; i < endIndex; i++)
// {
// innerJob.Execute<ScalarLane<TNumber>>(i, threadIndex);
// }
//}
} }
public static class IJobParallelForSPMDExtensions public static class IJobParallelForSPMDExtensions
@@ -101,7 +128,7 @@ public static class IJobParallelForSPMDExtensions
}; };
var iterations = (totalCount + WideLane<TNumber>.LaneWidth - 1) / WideLane<TNumber>.LaneWidth; var iterations = (totalCount + WideLane<TNumber>.LaneWidth - 1) / WideLane<TNumber>.LaneWidth;
return jobScheduler.ScheduleParallel(ref warper, iterations, batchSize, threadIndex, dependency); return jobScheduler.ScheduleParallelFor(ref warper, iterations, batchSize, threadIndex, dependency);
} }
else else
{ {
@@ -111,7 +138,7 @@ public static class IJobParallelForSPMDExtensions
totalCount = totalCount, totalCount = totalCount,
}; };
return jobScheduler.ScheduleParallel(ref warper, totalCount, batchSize, threadIndex, dependency); return jobScheduler.ScheduleParallelFor(ref warper, totalCount, batchSize, threadIndex, dependency);
} }
} }
} }

View File

@@ -7,6 +7,14 @@
<AllowUnsafeBlocks>true</AllowUnsafeBlocks> <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup> </PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
<IsAotCompatible>True</IsAotCompatible>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
<IsAotCompatible>True</IsAotCompatible>
</PropertyGroup>
<ItemGroup> <ItemGroup>
<Compile Remove="Templates\Vector2 - Copy (2).gen.cs" /> <Compile Remove="Templates\Vector2 - Copy (2).gen.cs" />
<Compile Remove="Templates\Vector2 - Copy.gen.cs" /> <Compile Remove="Templates\Vector2 - Copy.gen.cs" />

View File

@@ -869,5 +869,22 @@ public static unsafe partial class MathV
# endregion # endregion
# region Vector3 Specific
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector3<TLane, TNumber> Cross<TLane, TNumber>(in Vector3<TLane, TNumber> a, in Vector3<TLane, TNumber> b)
where TLane : ISPMD<TLane, TNumber>
where TNumber : unmanaged, INumber<TNumber>, IBinaryNumber<TNumber>, IMinMaxValue<TNumber>, IBitwiseOperators<TNumber, TNumber, TNumber>
{
return new Vector3<TLane, TNumber>
{
x = a.y * b.z - a.z * b.y,
y = a.z * b.x - a.x * b.z,
z = a.x * b.y - a.y * b.x,
};
}
# endregion
} }

View File

@@ -317,6 +317,23 @@ public static unsafe partial class MathV
# endregion # endregion
<# } #> <# } #>
# region Vector3 Specific
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector3<<#= GenericParameters #>> Cross<<#= GenericParameters #>>(in Vector3<<#= GenericParameters #>> a, in Vector3<<#= GenericParameters #>> b)
<#= TLaneRestrictions #>
<#= TNumberRestrictions #>
{
return new Vector3<<#= GenericParameters #>>
{
x = a.y * b.z - a.z * b.y,
y = a.z * b.x - a.x * b.z,
z = a.x * b.y - a.y * b.x,
};
}
# endregion
} }
<#+ <#+

View File

@@ -8,7 +8,6 @@
<Authors>Misaki</Authors> <Authors>Misaki</Authors>
<AssemblyVersion>1.3.1</AssemblyVersion> <AssemblyVersion>1.3.1</AssemblyVersion>
<Version>$(AssemblyVersion)</Version> <Version>$(AssemblyVersion)</Version>
<GeneratePackageOnBuild>True</GeneratePackageOnBuild>
<PackageProjectUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</PackageProjectUrl> <PackageProjectUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</PackageProjectUrl>
<RepositoryUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</RepositoryUrl> <RepositoryUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</RepositoryUrl>
</PropertyGroup> </PropertyGroup>

View File

@@ -10,8 +10,8 @@ namespace Misaki.HighPerformance.Test.Benchmark;
[MemoryDiagnoser] [MemoryDiagnoser]
public class ParallelNoiseBenchmark public class ParallelNoiseBenchmark
{ {
private const int _WIDTH = 64; private const int _WIDTH = 2048;
private const int _HEIGHT = 64; private const int _HEIGHT = 2048;
private const int _LENGTH = _WIDTH * _HEIGHT; private const int _LENGTH = _WIDTH * _HEIGHT;
internal JobScheduler _jobScheduler = null!; internal JobScheduler _jobScheduler = null!;
@@ -20,7 +20,7 @@ public class ParallelNoiseBenchmark
[GlobalSetup] [GlobalSetup]
public void Setup() public void Setup()
{ {
_jobScheduler = new JobScheduler(Environment.ProcessorCount - 1); _jobScheduler = new JobScheduler(Environment.ProcessorCount);
_buffers = new UnsafeArray<float>(_LENGTH, Allocator.Persistent); _buffers = new UnsafeArray<float>(_LENGTH, Allocator.Persistent);
} }
@@ -52,7 +52,7 @@ public class ParallelNoiseBenchmark
{ {
var x = i % _WIDTH; var x = i % _WIDTH;
var y = i / _HEIGHT; var y = i / _HEIGHT;
var uv = new Vector2(x, y); var uv = new Vector2(x, y) / new Vector2(_WIDTH, _HEIGHT);
_buffers[i] = NoiseJobVector.GradientNoise(uv); _buffers[i] = NoiseJobVector.GradientNoise(uv);
}); });
} }
@@ -64,7 +64,7 @@ public class ParallelNoiseBenchmark
{ {
var x = i % _WIDTH; var x = i % _WIDTH;
var y = i / _HEIGHT; var y = i / _HEIGHT;
var uv = new Vector2(x, y); var uv = new Vector2(x, y) / new Vector2(_WIDTH, _HEIGHT);
_buffers[i] = NoiseJobVector.GradientNoise(uv); _buffers[i] = NoiseJobVector.GradientNoise(uv);
} }
} }

View File

@@ -26,50 +26,6 @@ public unsafe class SPMDBenchmark
NativeMemory.Free(_buf); NativeMemory.Free(_buf);
} }
[Benchmark]
public void VectorNoiseSingleThread()
{
var job = new Jobs.NoiseJobVector
{
buffers = _buf,
width = _SIZE,
height = _SIZE,
};
job.Run(_SIZE * _SIZE, 0);
}
[Benchmark]
public void VectorJobNoise()
{
var job = new Jobs.NoiseJobVector
{
buffers = _buf,
width = _SIZE,
height = _SIZE,
};
var handle = _scheduler.ScheduleParallel(ref job, _SIZE * _SIZE, 64);
_scheduler.WaitComplete(handle);
}
[Benchmark]
public void ParallelVectorNoise()
{
var job = new Jobs.NoiseJobVector
{
buffers = _buf,
width = _SIZE,
height = _SIZE,
};
Parallel.For(0, _SIZE * _SIZE, (i) =>
{
job.Execute(i, 0);
});
}
[Benchmark(Baseline = true)] [Benchmark(Baseline = true)]
public void SPMDNoise() public void SPMDNoise()
{ {
@@ -83,4 +39,61 @@ public unsafe class SPMDBenchmark
var handle = _scheduler.ScheduleParallelSPDM<Jobs.NoiseJobMathSPMD, float>(ref job, _SIZE * _SIZE, 64, -1, JobHandle.Invalid); var handle = _scheduler.ScheduleParallelSPDM<Jobs.NoiseJobMathSPMD, float>(ref job, _SIZE * _SIZE, 64, -1, JobHandle.Invalid);
_scheduler.WaitComplete(handle); _scheduler.WaitComplete(handle);
} }
[Benchmark]
public void JobNoise()
{
var job = new Jobs.NoiseJobVectorFor
{
buffers = _buf,
width = _SIZE,
height = _SIZE,
};
var handle = _scheduler.ScheduleParallelFor(ref job, _SIZE * _SIZE, 64, -1, JobHandle.Invalid);
_scheduler.WaitComplete(handle);
}
//[Benchmark]
public void MathJobNoise()
{
var job = new Jobs.NoiseJobMath
{
buffers = _buf,
width = _SIZE,
height = _SIZE,
};
var handle = _scheduler.ScheduleParallel(ref job, _SIZE * _SIZE, 64, -1, JobHandle.Invalid);
_scheduler.WaitComplete(handle);
}
//[Benchmark]
public void ParallelNoise()
{
var job = new Jobs.NoiseJobVectorFor
{
buffers = _buf,
width = _SIZE,
height = _SIZE,
};
Parallel.For(0, _SIZE * _SIZE, (i) =>
{
job.Execute(i, 0);
});
}
[Benchmark]
public void SingleThreadNoise()
{
var job = new Jobs.NoiseJobVectorFor
{
buffers = _buf,
width = _SIZE,
height = _SIZE,
};
job.Run(_SIZE * _SIZE, 0);
}
} }

View File

@@ -1,5 +1,5 @@
using Misaki.HighPerformance.Jobs;
using Misaki.HighPerformance.Mathematics; using Misaki.HighPerformance.Mathematics;
using System.Runtime.CompilerServices;
using static Misaki.HighPerformance.Mathematics.math; using static Misaki.HighPerformance.Mathematics.math;
namespace Misaki.HighPerformance.Test.Jobs; namespace Misaki.HighPerformance.Test.Jobs;
@@ -7,10 +7,12 @@ namespace Misaki.HighPerformance.Test.Jobs;
public static partial class noise public static partial class noise
{ {
// Modulo 289 without a division (only multiplications) // Modulo 289 without a division (only multiplications)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float mod289(float x) public static float mod289(float x)
{ {
return x - floor(x * (1.0f / 289.0f)) * 289.0f; return x - floor(x * (1.0f / 289.0f)) * 289.0f;
} }
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float2 mod289(float2 x) public static float2 mod289(float2 x)
{ {
return x - floor(x * (1.0f / 289.0f)) * 289.0f; return x - floor(x * (1.0f / 289.0f)) * 289.0f;
@@ -74,7 +76,7 @@ public static partial class noise
{ {
var ones = float4(1.0f, 1.0f, 1.0f, -1.0f); var ones = float4(1.0f, 1.0f, 1.0f, -1.0f);
var pxyz = floor(frac(float3(j) * ip.xyz) * 7.0f) * ip.z - 1.0f; var pxyz = floor(frac(float3(j) * ip.xyz) * 7.0f) * ip.z - 1.0f;
float pw = 1.5f - dot(abs(pxyz), ones.xyz); var pw = 1.5f - dot(abs(pxyz), ones.xyz);
var p = float4(pxyz, pw); var p = float4(pxyz, pw);
var s = float4(p < 0.0f); var s = float4(p < 0.0f);
p.xyz = p.xyz + (s.xyz * 2.0f - 1.0f) * s.www; p.xyz = p.xyz + (s.xyz * 2.0f - 1.0f) * s.www;
@@ -86,96 +88,8 @@ public static partial class noise
public static float2 rgrad2(float2 p, float rot) public static float2 rgrad2(float2 p, float rot)
{ {
// For more isotropic gradients, math.sin/math.cos can be used instead. // For more isotropic gradients, math.sin/math.cos can be used instead.
float u = permute(permute(p.x) + p.y) * 0.0243902439f + rot; // Rotate by shift var u = permute(permute(p.x) + p.y) * 0.0243902439f + rot; // Rotate by shift
u = frac(u) * 6.28318530718f; // 2*pi u = frac(u) * 6.28318530718f; // 2*pi
return float2(cos(u), sin(u)); return float2(cos(u), sin(u));
} }
} }
internal unsafe struct NoiseJob3D : IJobParallelFor
{
public float* buffers;
public int size; // size x size x size
public void Execute(int loopIndex, int threadIndex)
{
var v = float3(
(loopIndex % size) / (float)size,
((loopIndex / size) % size) / (float)size,
(loopIndex / (size * size)) / (float)size
);
var C = float2(1.0f / 6.0f, 1.0f / 3.0f);
var D = float4(0.0f, 0.5f, 1.0f, 2.0f);
// First corner
var i = floor(v + dot(v, C.yyy));
var x0 = v - i + dot(i, C.xxx);
// Other corners
var g = step(x0.yzx, x0.xyz);
var l = 1.0f - g;
var i1 = min(g.xyz, l.zxy);
var i2 = max(g.xyz, l.zxy);
// x0 = x0 - 0.0 + 0.0 * C.xxx;
// x1 = x0 - i1 + 1.0 * C.xxx;
// x2 = x0 - i2 + 2.0 * C.xxx;
// x3 = x0 - 1.0 + 3.0 * C.xxx;
var x1 = x0 - i1 + C.xxx;
var x2 = x0 - i2 + C.yyy; // 2.0*C.x = 1/3 = C.y
var x3 = x0 - D.yyy; // -1.0+3.0*C.x = -0.5 = -D.y
// Permutations
i = noise.mod289(i);
var p = noise.permute(noise.permute(noise.permute(
i.z + float4(0.0f, i1.z, i2.z, 1.0f))
+ i.y + float4(0.0f, i1.y, i2.y, 1.0f))
+ i.x + float4(0.0f, i1.x, i2.x, 1.0f));
// Gradients: 7x7 points over a square, mapped onto an octahedron.
// The ring size 17*17 = 289 is close to a multiple of 49 (49*6 = 294)
float n_ = 0.142857142857f; // 1.0/7.0
var ns = n_ * D.wyz - D.xzx;
var j = p - 49.0f * floor(p * ns.z * ns.z); // math.mod(p,7*7)
var x_ = floor(j * ns.z);
var y_ = floor(j - 7.0f * x_); // math.mod(j,N)
var x = x_ * ns.x + ns.yyyy;
var y = y_ * ns.x + ns.yyyy;
var h = 1.0f - abs(x) - abs(y);
var b0 = float4(x.xy, y.xy);
var b1 = float4(x.zw, y.zw);
//float4 s0 = float4(math.lessThan(b0,0.0))*2.0 - 1.0;
//float4 s1 = float4(math.lessThan(b1,0.0))*2.0 - 1.0;
var s0 = floor(b0) * 2.0f + 1.0f;
var s1 = floor(b1) * 2.0f + 1.0f;
var sh = -step(h, float4(0.0f));
var a0 = b0.xzyw + s0.xzyw * sh.xxyy;
var a1 = b1.xzyw + s1.xzyw * sh.zzww;
var p0 = float3(a0.xy, h.x);
var p1 = float3(a0.zw, h.y);
var p2 = float3(a1.xy, h.z);
var p3 = float3(a1.zw, h.w);
//Normalise gradients
var norm = noise.taylorInvSqrt(float4(dot(p0, p0), dot(p1, p1), dot(p2, p2), dot(p3, p3)));
p0 *= norm.x;
p1 *= norm.y;
p2 *= norm.z;
p3 *= norm.w;
// Mix final noise value
var m = max(0.6f - float4(dot(x0, x0), dot(x1, x1), dot(x2, x2), dot(x3, x3)), 0.0f);
m *= m;
buffers[loopIndex] = 42.0f * dot(m * m, float4(dot(p0, x0), dot(p1, x1), dot(p2, x2), dot(p3, x3)));
}
}

View File

@@ -1,5 +1,4 @@
using Misaki.HighPerformance.Jobs; using Misaki.HighPerformance.Jobs;
using Misaki.HighPerformance.LowLevel.Utilities;
using Misaki.HighPerformance.Mathematics; using Misaki.HighPerformance.Mathematics;
using Misaki.HighPerformance.Mathematics.SPMD; using Misaki.HighPerformance.Mathematics.SPMD;
using System.Numerics; using System.Numerics;
@@ -8,7 +7,22 @@ using System.Runtime.Intrinsics;
namespace Misaki.HighPerformance.Test.Jobs; namespace Misaki.HighPerformance.Test.Jobs;
internal unsafe struct NoiseJobVector : IJobParallelFor internal unsafe struct NoiseJobVectorFor : IJobParallelFor
{
public float* buffers;
public int width;
public int height;
public void Execute(int loopIndex, int threadIndex)
{
var x = loopIndex % width;
var y = loopIndex / height;
var uv = new Vector2(x, y) / new Vector2(width, height);
buffers[loopIndex] = NoiseJobVector.GradientNoise(uv);
}
}
internal unsafe struct NoiseJobVector : IJobParallel
{ {
public float* buffers; public float* buffers;
public int width; public int width;
@@ -20,16 +34,23 @@ internal unsafe struct NoiseJobVector : IJobParallelFor
return x - MathF.Floor(x); return x - MathF.Floor(x);
} }
private static float Mod289(float x)
{
return x - MathF.Floor(x / 289) * 289;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector2 GradientNoiseDirect(Vector2 uv) private static Vector2 GradientNoiseDirect(Vector2 uv)
{ {
uv.X %= 289; uv.X = Mod289(uv.X);
uv.Y %= 289; uv.Y = Mod289(uv.Y);
var x = (34 * uv.X + 1) * uv.X % 289 + uv.Y; var x = (34 * uv.X + 1) * Mod289(uv.X) + uv.Y;
x = (34 * x + 1) * x % 289; x = (34 * x + 1) * Mod289(x);
x = Frac(x / 41) * 2 - 1; x = Frac(x / 41) * 2 - 1;
return Vector2.Normalize(new Vector2(x - MathF.Floor(x + 0.5f), MathF.Abs(x) - 0.5f)); return Vector2.Normalize(new Vector2(x - MathF.Floor(x + 0.5f), MathF.Abs(x) - 0.5f));
} }
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float GradientNoise(Vector2 uv) public static float GradientNoise(Vector2 uv)
{ {
var ip = new Vector2(MathF.Floor(uv.X), MathF.Floor(uv.Y)); var ip = new Vector2(MathF.Floor(uv.X), MathF.Floor(uv.Y));
@@ -44,30 +65,35 @@ internal unsafe struct NoiseJobVector : IJobParallelFor
return float.Lerp(float.Lerp(d00, d10, fp.Y), float.Lerp(d01, d11, fp.Y), fp.X); return float.Lerp(float.Lerp(d00, d10, fp.Y), float.Lerp(d01, d11, fp.Y), fp.X);
} }
public void Execute(int loopIndex, int threadIndex) public void Execute(int startIndex, int endIndex, int threadIndex)
{ {
var x = loopIndex % width; for (int i = startIndex; i < endIndex; i++)
var y = loopIndex / height; {
var x = i % width;
var y = i / height;
var uv = new Vector2(x, y) / new Vector2(width, height); var uv = new Vector2(x, y) / new Vector2(width, height);
buffers[loopIndex] = GradientNoise(uv); buffers[i] = GradientNoise(uv);
}
} }
} }
internal unsafe struct NoiseJobMath : IJobParallelFor internal unsafe struct NoiseJobMath : IJobParallel
{ {
public float* buffers; public float* buffers;
public int width; public int width;
public int height; public int height;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static float2 GradientNoiseDirect(float2 uv) private static float2 GradientNoiseDirect(float2 uv)
{ {
uv %= 289; uv = noise.mod289(uv);
var x = (34 * uv.x + 1) * uv.x % 289 + uv.y; var x = (34 * uv.x + 1) * noise.mod289(uv.x) + uv.y;
x = (34 * x + 1) * x % 289; x = (34 * x + 1) * noise.mod289(x);
x = math.frac(x / 41) * 2 - 1; x = math.frac(x / 41) * 2 - 1;
return math.normalize(new float2(x - math.floor(x + 0.5f), math.abs(x) - 0.5f)); return math.normalize(new float2(x - math.floor(x + 0.5f), math.abs(x) - 0.5f));
} }
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float GradientNoise(float2 uv) public static float GradientNoise(float2 uv)
{ {
var ip = new float2(math.floor(uv.x), math.floor(uv.y)); var ip = new float2(math.floor(uv.x), math.floor(uv.y));
@@ -82,6 +108,7 @@ internal unsafe struct NoiseJobMath : IJobParallelFor
return float.Lerp(float.Lerp(d00, d10, fp.y), float.Lerp(d01, d11, fp.y), fp.x); return float.Lerp(float.Lerp(d00, d10, fp.y), float.Lerp(d01, d11, fp.y), fp.x);
} }
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Execute(int loopIndex, int threadIndex) public void Execute(int loopIndex, int threadIndex)
{ {
var x = loopIndex % width; var x = loopIndex % width;
@@ -89,9 +116,21 @@ internal unsafe struct NoiseJobMath : IJobParallelFor
var uv = new float2(x, y) / new float2(width, height); var uv = new float2(x, y) / new float2(width, height);
buffers[loopIndex] = GradientNoise(uv); buffers[loopIndex] = GradientNoise(uv);
} }
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Execute(int startIndex, int endIndex, int threadIndex)
{
for (var i = startIndex; i < endIndex; i++)
{
var x = i % width;
var y = i / height;
var uv = new float2(x, y) / new float2(width, height);
buffers[i] = GradientNoise(uv);
}
}
} }
internal unsafe struct NoiseJobMathV : IJobParallelFor internal unsafe struct NoiseJobMathV : IJobParallel
{ {
public float* buffers; public float* buffers;
public int width; public int width;
@@ -160,12 +199,11 @@ internal unsafe struct NoiseJobMathV : IJobParallelFor
return lerpY1 + (lerpY2 - lerpY1) * uX; return lerpY1 + (lerpY2 - lerpY1) * uX;
} }
public void Execute(int loopIndex, int threadIndex) public void Execute(int startIndex, int endIndex, int threadIndex)
{ {
// --------------------------------------------------------- for (int i = startIndex; i < endIndex; i++)
// IMPORTANT: Loop Stride is now 8! {
// --------------------------------------------------------- var baseIndex = i * 8;
var baseIndex = loopIndex * 8;
// Safety check // Safety check
if (baseIndex + 7 >= width * height) if (baseIndex + 7 >= width * height)
@@ -190,6 +228,7 @@ internal unsafe struct NoiseJobMathV : IJobParallelFor
// Store 8 floats (32 bytes) // Store 8 floats (32 bytes)
result.Store(buffers + baseIndex); result.Store(buffers + baseIndex);
} }
}
} }
internal unsafe struct NoiseJobMathSPMD : IJobSPMD<float> internal unsafe struct NoiseJobMathSPMD : IJobSPMD<float>

View File

@@ -5,8 +5,8 @@
<TargetFramework>net10.0</TargetFramework> <TargetFramework>net10.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings> <ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable> <Nullable>enable</Nullable>
<PublishAot>True</PublishAot>
<AllowUnsafeBlocks>True</AllowUnsafeBlocks> <AllowUnsafeBlocks>True</AllowUnsafeBlocks>
<PublishAot>True</PublishAot>
</PropertyGroup> </PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'"> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
@@ -17,6 +17,7 @@
<ItemGroup> <ItemGroup>
<PackageReference Include="BenchmarkDotNet" Version="0.15.2" /> <PackageReference Include="BenchmarkDotNet" Version="0.15.2" />
<PackageReference Include="Microsoft.VisualStudio.DiagnosticsHub.BenchmarkDotNetDiagnosers" Version="18.3.36812.1" />
<PackageReference Include="MSTest" Version="3.10.1" /> <PackageReference Include="MSTest" Version="3.10.1" />
</ItemGroup> </ItemGroup>

View File

@@ -1 +1,6 @@
BenchmarkDotNet.Running.BenchmarkRunner.Run<Misaki.HighPerformance.Test.Benchmark.MathematicsBenchmark>(); using BenchmarkDotNet.Running;
using Misaki.HighPerformance.Mathematics.SPMD;
using Misaki.HighPerformance.Test.Benchmark;
using Misaki.HighPerformance.Test.Jobs;
BenchmarkRunner.Run<SPMDBenchmark>();

View File

@@ -353,7 +353,7 @@ public unsafe class TestJobSystem
height = size, height = size,
}; };
spmdJob.Run(size * size, -1); //spmdJob.Run(size * size, -1);
var eq = vs.SequenceCompareTo(ss); var eq = vs.SequenceCompareTo(ss);
Assert.AreEqual(0, eq); Assert.AreEqual(0, eq);

View File

@@ -50,25 +50,31 @@ internal unsafe struct KahanSumJob : IJob
} }
} }
internal unsafe struct ParallelAddJob : IJobParallelFor internal unsafe struct ParallelAddJob : IJobParallel
{ {
public float value; public float value;
public float* inout; public float* inout;
public void Execute(int loopIndex, int threadIndex) public void Execute(int startIndex, int endIndex, int threadIndex)
{ {
inout[loopIndex] += value; for (var i = startIndex; i < endIndex; i++)
{
inout[i] += value;
}
} }
} }
internal unsafe struct ParallelMultiplyJob : IJobParallelFor internal unsafe struct ParallelMultiplyJob : IJobParallel
{ {
public float multiplier; public float multiplier;
public float* inout; public float* inout;
public void Execute(int loopIndex, int threadIndex) public void Execute(int startIndex, int endIndex, int threadIndex)
{ {
inout[loopIndex] *= multiplier; for (var i = startIndex; i < endIndex; i++)
{
inout[i] *= multiplier;
}
} }
} }

View File

@@ -8,7 +8,6 @@
<Authors>Misaki</Authors> <Authors>Misaki</Authors>
<AssemblyVersion>1.0.4</AssemblyVersion> <AssemblyVersion>1.0.4</AssemblyVersion>
<Version>$(AssemblyVersion)</Version> <Version>$(AssemblyVersion)</Version>
<GeneratePackageOnBuild>True</GeneratePackageOnBuild>
<PackageProjectUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</PackageProjectUrl> <PackageProjectUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</PackageProjectUrl>
<RepositoryUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</RepositoryUrl> <RepositoryUrl>https://git.personalnas.com/Misaki/Misaki.HighPerformance.git</RepositoryUrl>
</PropertyGroup> </PropertyGroup>