108 lines
3.8 KiB
C#
108 lines
3.8 KiB
C#
using Misaki.HighPerformance.Jobs;
|
|
using System.Numerics;
|
|
using System.Runtime.CompilerServices;
|
|
|
|
namespace Misaki.HighPerformance.Mathematics.SPMD;
|
|
|
|
public interface IJobSPMD<TNumber>
|
|
where TNumber : unmanaged, INumber<TNumber>, IMinMaxValue<TNumber>, IBitwiseOperators<TNumber, TNumber, TNumber>
|
|
{
|
|
void Execute<TLane>(int baseIndex, int threadIndex)
|
|
where TLane : ISPMD<TLane, TNumber>;
|
|
}
|
|
|
|
internal struct SPMDJobWrapper<T, TNumber> : IJobParallelFor
|
|
where T : unmanaged, IJobSPMD<TNumber>
|
|
where TNumber : unmanaged, INumber<TNumber>, IMinMaxValue<TNumber>, IBitwiseOperators<TNumber, TNumber, TNumber>
|
|
{
|
|
public T innerJob;
|
|
public int totalCount;
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public void Execute(int loopIndex, int threadIndex)
|
|
{
|
|
var baseIndex = loopIndex * WideLane<TNumber>.LaneWidth;
|
|
var remaining = totalCount - baseIndex;
|
|
|
|
if (remaining >= WideLane<TNumber>.LaneWidth)
|
|
{
|
|
innerJob.Execute<WideLane<TNumber>>(baseIndex, threadIndex);
|
|
}
|
|
else
|
|
{
|
|
for (var i = 0; i < remaining; i++)
|
|
{
|
|
innerJob.Execute<ScalarLane<TNumber>>(baseIndex + i, threadIndex);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
internal struct SPMDScalerJobWrapper<T, TNumber> : IJobParallelFor
|
|
where T : unmanaged, IJobSPMD<TNumber>
|
|
where TNumber : unmanaged, INumber<TNumber>, IMinMaxValue<TNumber>, IBitwiseOperators<TNumber, TNumber, TNumber>
|
|
{
|
|
public T innerJob;
|
|
public int totalCount;
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public void Execute(int loopIndex, int threadIndex)
|
|
{
|
|
innerJob.Execute<ScalarLane<TNumber>>(loopIndex, threadIndex);
|
|
}
|
|
}
|
|
|
|
public static class IJobParallelForSPMDExtensions
|
|
{
|
|
public static void Run<T, TNumber>(this ref T job, int totalCount, int threadIndex)
|
|
where T : unmanaged, IJobSPMD<TNumber>
|
|
where TNumber : unmanaged, INumber<TNumber>, IMinMaxValue<TNumber>, IBitwiseOperators<TNumber, TNumber, TNumber>
|
|
{
|
|
var iterations = (totalCount + WideLane<TNumber>.LaneWidth - 1) / WideLane<TNumber>.LaneWidth;
|
|
for (var loopIndex = 0; loopIndex < iterations; loopIndex++)
|
|
{
|
|
var baseIndex = loopIndex * WideLane<TNumber>.LaneWidth;
|
|
var remaining = totalCount - baseIndex;
|
|
|
|
if (remaining >= WideLane<TNumber>.LaneWidth)
|
|
{
|
|
job.Execute<WideLane<TNumber>>(baseIndex, threadIndex);
|
|
}
|
|
else
|
|
{
|
|
for (var i = 0; i < remaining; i++)
|
|
{
|
|
job.Execute<ScalarLane<TNumber>>(baseIndex + i, threadIndex);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
public static JobHandle ScheduleParallelSPDM<T, TNumber>(this JobScheduler jobScheduler, ref T job, int totalCount, int batchSize, int threadIndex, JobHandle dependency)
|
|
where T : unmanaged, IJobSPMD<TNumber>
|
|
where TNumber : unmanaged, INumber<TNumber>, IMinMaxValue<TNumber>, IBitwiseOperators<TNumber, TNumber, TNumber>
|
|
{
|
|
if (WideLane.IsSupported)
|
|
{
|
|
var warper = new SPMDJobWrapper<T, TNumber>
|
|
{
|
|
innerJob = job,
|
|
totalCount = totalCount,
|
|
};
|
|
|
|
var iterations = (totalCount + WideLane<TNumber>.LaneWidth - 1) / WideLane<TNumber>.LaneWidth;
|
|
return jobScheduler.ScheduleParallel(ref warper, iterations, batchSize, threadIndex, dependency);
|
|
}
|
|
else
|
|
{
|
|
var warper = new SPMDScalerJobWrapper<T, TNumber>
|
|
{
|
|
innerJob = job,
|
|
totalCount = totalCount,
|
|
};
|
|
|
|
return jobScheduler.ScheduleParallel(ref warper, totalCount, batchSize, threadIndex, dependency);
|
|
}
|
|
}
|
|
}
|