Refactor SPMD jobs for true vectorized/masked execution
- Change IJobSPMD.Execute to (indices, mask, ctx) signature for all arities, enabling proper vectorized/masked SPMD execution. - Update all SPMD job wrappers, extension methods, and test jobs to use new interface. - Add AVX2 gather/masked gather support to MathV.GatherVector2/3/4 and related methods; use [ConstantExpected] byte scale. - Improve gather/select logic, pointer arithmetic, and overloads for ref/int* index access. - Refactor GGXMipGenerationBenchmark and jobs for SPMD, with per-mip-level vectorized jobs and improved memory access. - Clean up code, fix naming, update comments, and bump version to 1.3.6.
This commit is contained in:
@@ -13,7 +13,7 @@ namespace Misaki.HighPerformance.Mathematics.SPMD;
|
||||
public interface IJobSPMD<TNumber0>
|
||||
where TNumber0 : unmanaged, INumber<TNumber0>, IBinaryNumber<TNumber0>, IMinMaxValue<TNumber0>, IBitwiseOperators<TNumber0, TNumber0, TNumber0>
|
||||
{
|
||||
void Execute<TLane0>(int baseIndex, ref readonly JobExecutionContext ctx)
|
||||
void Execute<TLane0>(TLane0 indices, TLane0 mask, ref readonly JobExecutionContext ctx)
|
||||
where TLane0 : unmanaged, ISPMDLane<TLane0, TNumber0>;
|
||||
}
|
||||
|
||||
@@ -27,19 +27,10 @@ internal struct SPMDJobWrapper<T, TNumber0> : IJobParallelFor
|
||||
public void Execute(int loopIndex, ref readonly JobExecutionContext ctx)
|
||||
{
|
||||
var baseIndex = loopIndex * WideLane<TNumber0>.LaneWidth;
|
||||
var remaining = totalIteration - baseIndex;
|
||||
var indices = WideLane<TNumber0>.Sequence(TNumber0.CreateTruncating(baseIndex), TNumber0.One);
|
||||
var mask = indices < TNumber0.CreateTruncating(totalIteration);
|
||||
|
||||
if (remaining >= WideLane<TNumber0>.LaneWidth)
|
||||
{
|
||||
innerJob.Execute<WideLane<TNumber0>>(baseIndex, in ctx);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var j = 0; j < remaining; j++)
|
||||
{
|
||||
innerJob.Execute<ScalarLane<TNumber0>>(baseIndex + j, in ctx);
|
||||
}
|
||||
}
|
||||
innerJob.Execute<WideLane<TNumber0>>(indices, mask, in ctx);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -52,7 +43,7 @@ internal struct SPMDScalerJobWrapper<T, TNumber0> : IJobParallelFor
|
||||
|
||||
public void Execute(int loopIndex, ref readonly JobExecutionContext ctx)
|
||||
{
|
||||
innerJob.Execute<ScalarLane<TNumber0>>(loopIndex, in ctx);
|
||||
innerJob.Execute<ScalarLane<TNumber0>>(TNumber0.CreateTruncating(loopIndex), ScalarLane<TNumber0>.AllBitsSet, in ctx);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -68,7 +59,7 @@ public interface IJobSPMD<TNumber0, TNumber1>
|
||||
where TNumber0 : unmanaged, INumber<TNumber0>, IBinaryNumber<TNumber0>, IMinMaxValue<TNumber0>, IBitwiseOperators<TNumber0, TNumber0, TNumber0>
|
||||
where TNumber1 : unmanaged, INumber<TNumber1>, IBinaryNumber<TNumber1>, IMinMaxValue<TNumber1>, IBitwiseOperators<TNumber1, TNumber1, TNumber1>
|
||||
{
|
||||
void Execute<TLane0, TLane1>(int baseIndex, ref readonly JobExecutionContext ctx)
|
||||
void Execute<TLane0, TLane1>(TLane0 indices, TLane0 mask, ref readonly JobExecutionContext ctx)
|
||||
where TLane0 : unmanaged, ISPMDLane<TLane0, TNumber0>
|
||||
where TLane1 : unmanaged, ISPMDLane<TLane1, TNumber1>;
|
||||
}
|
||||
@@ -84,19 +75,10 @@ internal struct SPMDJobWrapper<T, TNumber0, TNumber1> : IJobParallelFor
|
||||
public void Execute(int loopIndex, ref readonly JobExecutionContext ctx)
|
||||
{
|
||||
var baseIndex = loopIndex * WideLane<TNumber0>.LaneWidth;
|
||||
var remaining = totalIteration - baseIndex;
|
||||
var indices = WideLane<TNumber0>.Sequence(TNumber0.CreateTruncating(baseIndex), TNumber0.One);
|
||||
var mask = indices < TNumber0.CreateTruncating(totalIteration);
|
||||
|
||||
if (remaining >= WideLane<TNumber0>.LaneWidth)
|
||||
{
|
||||
innerJob.Execute<WideLane<TNumber0>, WideLane<TNumber1>>(baseIndex, in ctx);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var j = 0; j < remaining; j++)
|
||||
{
|
||||
innerJob.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>>(baseIndex + j, in ctx);
|
||||
}
|
||||
}
|
||||
innerJob.Execute<WideLane<TNumber0>, WideLane<TNumber1>>(indices, mask, in ctx);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -110,7 +92,7 @@ internal struct SPMDScalerJobWrapper<T, TNumber0, TNumber1> : IJobParallelFor
|
||||
|
||||
public void Execute(int loopIndex, ref readonly JobExecutionContext ctx)
|
||||
{
|
||||
innerJob.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>>(loopIndex, in ctx);
|
||||
innerJob.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>>(TNumber0.CreateTruncating(loopIndex), ScalarLane<TNumber0>.AllBitsSet, in ctx);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -128,7 +110,7 @@ public interface IJobSPMD<TNumber0, TNumber1, TNumber2>
|
||||
where TNumber1 : unmanaged, INumber<TNumber1>, IBinaryNumber<TNumber1>, IMinMaxValue<TNumber1>, IBitwiseOperators<TNumber1, TNumber1, TNumber1>
|
||||
where TNumber2 : unmanaged, INumber<TNumber2>, IBinaryNumber<TNumber2>, IMinMaxValue<TNumber2>, IBitwiseOperators<TNumber2, TNumber2, TNumber2>
|
||||
{
|
||||
void Execute<TLane0, TLane1, TLane2>(int baseIndex, ref readonly JobExecutionContext ctx)
|
||||
void Execute<TLane0, TLane1, TLane2>(TLane0 indices, TLane0 mask, ref readonly JobExecutionContext ctx)
|
||||
where TLane0 : unmanaged, ISPMDLane<TLane0, TNumber0>
|
||||
where TLane1 : unmanaged, ISPMDLane<TLane1, TNumber1>
|
||||
where TLane2 : unmanaged, ISPMDLane<TLane2, TNumber2>;
|
||||
@@ -146,19 +128,10 @@ internal struct SPMDJobWrapper<T, TNumber0, TNumber1, TNumber2> : IJobParallelFo
|
||||
public void Execute(int loopIndex, ref readonly JobExecutionContext ctx)
|
||||
{
|
||||
var baseIndex = loopIndex * WideLane<TNumber0>.LaneWidth;
|
||||
var remaining = totalIteration - baseIndex;
|
||||
var indices = WideLane<TNumber0>.Sequence(TNumber0.CreateTruncating(baseIndex), TNumber0.One);
|
||||
var mask = indices < TNumber0.CreateTruncating(totalIteration);
|
||||
|
||||
if (remaining >= WideLane<TNumber0>.LaneWidth)
|
||||
{
|
||||
innerJob.Execute<WideLane<TNumber0>, WideLane<TNumber1>, WideLane<TNumber2>>(baseIndex, in ctx);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var j = 0; j < remaining; j++)
|
||||
{
|
||||
innerJob.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>>(baseIndex + j, in ctx);
|
||||
}
|
||||
}
|
||||
innerJob.Execute<WideLane<TNumber0>, WideLane<TNumber1>, WideLane<TNumber2>>(indices, mask, in ctx);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -173,7 +146,7 @@ internal struct SPMDScalerJobWrapper<T, TNumber0, TNumber1, TNumber2> : IJobPara
|
||||
|
||||
public void Execute(int loopIndex, ref readonly JobExecutionContext ctx)
|
||||
{
|
||||
innerJob.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>>(loopIndex, in ctx);
|
||||
innerJob.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>>(TNumber0.CreateTruncating(loopIndex), ScalarLane<TNumber0>.AllBitsSet, in ctx);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -193,7 +166,7 @@ public interface IJobSPMD<TNumber0, TNumber1, TNumber2, TNumber3>
|
||||
where TNumber2 : unmanaged, INumber<TNumber2>, IBinaryNumber<TNumber2>, IMinMaxValue<TNumber2>, IBitwiseOperators<TNumber2, TNumber2, TNumber2>
|
||||
where TNumber3 : unmanaged, INumber<TNumber3>, IBinaryNumber<TNumber3>, IMinMaxValue<TNumber3>, IBitwiseOperators<TNumber3, TNumber3, TNumber3>
|
||||
{
|
||||
void Execute<TLane0, TLane1, TLane2, TLane3>(int baseIndex, ref readonly JobExecutionContext ctx)
|
||||
void Execute<TLane0, TLane1, TLane2, TLane3>(TLane0 indices, TLane0 mask, ref readonly JobExecutionContext ctx)
|
||||
where TLane0 : unmanaged, ISPMDLane<TLane0, TNumber0>
|
||||
where TLane1 : unmanaged, ISPMDLane<TLane1, TNumber1>
|
||||
where TLane2 : unmanaged, ISPMDLane<TLane2, TNumber2>
|
||||
@@ -213,19 +186,10 @@ internal struct SPMDJobWrapper<T, TNumber0, TNumber1, TNumber2, TNumber3> : IJob
|
||||
public void Execute(int loopIndex, ref readonly JobExecutionContext ctx)
|
||||
{
|
||||
var baseIndex = loopIndex * WideLane<TNumber0>.LaneWidth;
|
||||
var remaining = totalIteration - baseIndex;
|
||||
var indices = WideLane<TNumber0>.Sequence(TNumber0.CreateTruncating(baseIndex), TNumber0.One);
|
||||
var mask = indices < TNumber0.CreateTruncating(totalIteration);
|
||||
|
||||
if (remaining >= WideLane<TNumber0>.LaneWidth)
|
||||
{
|
||||
innerJob.Execute<WideLane<TNumber0>, WideLane<TNumber1>, WideLane<TNumber2>, WideLane<TNumber3>>(baseIndex, in ctx);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var j = 0; j < remaining; j++)
|
||||
{
|
||||
innerJob.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>, ScalarLane<TNumber3>>(baseIndex + j, in ctx);
|
||||
}
|
||||
}
|
||||
innerJob.Execute<WideLane<TNumber0>, WideLane<TNumber1>, WideLane<TNumber2>, WideLane<TNumber3>>(indices, mask, in ctx);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -241,7 +205,7 @@ internal struct SPMDScalerJobWrapper<T, TNumber0, TNumber1, TNumber2, TNumber3>
|
||||
|
||||
public void Execute(int loopIndex, ref readonly JobExecutionContext ctx)
|
||||
{
|
||||
innerJob.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>, ScalarLane<TNumber3>>(loopIndex, in ctx);
|
||||
innerJob.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>, ScalarLane<TNumber3>>(TNumber0.CreateTruncating(loopIndex), ScalarLane<TNumber0>.AllBitsSet, in ctx);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -263,7 +227,7 @@ public interface IJobSPMD<TNumber0, TNumber1, TNumber2, TNumber3, TNumber4>
|
||||
where TNumber3 : unmanaged, INumber<TNumber3>, IBinaryNumber<TNumber3>, IMinMaxValue<TNumber3>, IBitwiseOperators<TNumber3, TNumber3, TNumber3>
|
||||
where TNumber4 : unmanaged, INumber<TNumber4>, IBinaryNumber<TNumber4>, IMinMaxValue<TNumber4>, IBitwiseOperators<TNumber4, TNumber4, TNumber4>
|
||||
{
|
||||
void Execute<TLane0, TLane1, TLane2, TLane3, TLane4>(int baseIndex, ref readonly JobExecutionContext ctx)
|
||||
void Execute<TLane0, TLane1, TLane2, TLane3, TLane4>(TLane0 indices, TLane0 mask, ref readonly JobExecutionContext ctx)
|
||||
where TLane0 : unmanaged, ISPMDLane<TLane0, TNumber0>
|
||||
where TLane1 : unmanaged, ISPMDLane<TLane1, TNumber1>
|
||||
where TLane2 : unmanaged, ISPMDLane<TLane2, TNumber2>
|
||||
@@ -285,19 +249,10 @@ internal struct SPMDJobWrapper<T, TNumber0, TNumber1, TNumber2, TNumber3, TNumbe
|
||||
public void Execute(int loopIndex, ref readonly JobExecutionContext ctx)
|
||||
{
|
||||
var baseIndex = loopIndex * WideLane<TNumber0>.LaneWidth;
|
||||
var remaining = totalIteration - baseIndex;
|
||||
var indices = WideLane<TNumber0>.Sequence(TNumber0.CreateTruncating(baseIndex), TNumber0.One);
|
||||
var mask = indices < TNumber0.CreateTruncating(totalIteration);
|
||||
|
||||
if (remaining >= WideLane<TNumber0>.LaneWidth)
|
||||
{
|
||||
innerJob.Execute<WideLane<TNumber0>, WideLane<TNumber1>, WideLane<TNumber2>, WideLane<TNumber3>, WideLane<TNumber4>>(baseIndex, in ctx);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var j = 0; j < remaining; j++)
|
||||
{
|
||||
innerJob.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>, ScalarLane<TNumber3>, ScalarLane<TNumber4>>(baseIndex + j, in ctx);
|
||||
}
|
||||
}
|
||||
innerJob.Execute<WideLane<TNumber0>, WideLane<TNumber1>, WideLane<TNumber2>, WideLane<TNumber3>, WideLane<TNumber4>>(indices, mask, in ctx);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -314,7 +269,7 @@ internal struct SPMDScalerJobWrapper<T, TNumber0, TNumber1, TNumber2, TNumber3,
|
||||
|
||||
public void Execute(int loopIndex, ref readonly JobExecutionContext ctx)
|
||||
{
|
||||
innerJob.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>, ScalarLane<TNumber3>, ScalarLane<TNumber4>>(loopIndex, in ctx);
|
||||
innerJob.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>, ScalarLane<TNumber3>, ScalarLane<TNumber4>>(TNumber0.CreateTruncating(loopIndex), ScalarLane<TNumber0>.AllBitsSet, in ctx);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -338,7 +293,7 @@ public interface IJobSPMD<TNumber0, TNumber1, TNumber2, TNumber3, TNumber4, TNum
|
||||
where TNumber4 : unmanaged, INumber<TNumber4>, IBinaryNumber<TNumber4>, IMinMaxValue<TNumber4>, IBitwiseOperators<TNumber4, TNumber4, TNumber4>
|
||||
where TNumber5 : unmanaged, INumber<TNumber5>, IBinaryNumber<TNumber5>, IMinMaxValue<TNumber5>, IBitwiseOperators<TNumber5, TNumber5, TNumber5>
|
||||
{
|
||||
void Execute<TLane0, TLane1, TLane2, TLane3, TLane4, TLane5>(int baseIndex, ref readonly JobExecutionContext ctx)
|
||||
void Execute<TLane0, TLane1, TLane2, TLane3, TLane4, TLane5>(TLane0 indices, TLane0 mask, ref readonly JobExecutionContext ctx)
|
||||
where TLane0 : unmanaged, ISPMDLane<TLane0, TNumber0>
|
||||
where TLane1 : unmanaged, ISPMDLane<TLane1, TNumber1>
|
||||
where TLane2 : unmanaged, ISPMDLane<TLane2, TNumber2>
|
||||
@@ -362,19 +317,10 @@ internal struct SPMDJobWrapper<T, TNumber0, TNumber1, TNumber2, TNumber3, TNumbe
|
||||
public void Execute(int loopIndex, ref readonly JobExecutionContext ctx)
|
||||
{
|
||||
var baseIndex = loopIndex * WideLane<TNumber0>.LaneWidth;
|
||||
var remaining = totalIteration - baseIndex;
|
||||
var indices = WideLane<TNumber0>.Sequence(TNumber0.CreateTruncating(baseIndex), TNumber0.One);
|
||||
var mask = indices < TNumber0.CreateTruncating(totalIteration);
|
||||
|
||||
if (remaining >= WideLane<TNumber0>.LaneWidth)
|
||||
{
|
||||
innerJob.Execute<WideLane<TNumber0>, WideLane<TNumber1>, WideLane<TNumber2>, WideLane<TNumber3>, WideLane<TNumber4>, WideLane<TNumber5>>(baseIndex, in ctx);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var j = 0; j < remaining; j++)
|
||||
{
|
||||
innerJob.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>, ScalarLane<TNumber3>, ScalarLane<TNumber4>, ScalarLane<TNumber5>>(baseIndex + j, in ctx);
|
||||
}
|
||||
}
|
||||
innerJob.Execute<WideLane<TNumber0>, WideLane<TNumber1>, WideLane<TNumber2>, WideLane<TNumber3>, WideLane<TNumber4>, WideLane<TNumber5>>(indices, mask, in ctx);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -392,7 +338,7 @@ internal struct SPMDScalerJobWrapper<T, TNumber0, TNumber1, TNumber2, TNumber3,
|
||||
|
||||
public void Execute(int loopIndex, ref readonly JobExecutionContext ctx)
|
||||
{
|
||||
innerJob.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>, ScalarLane<TNumber3>, ScalarLane<TNumber4>, ScalarLane<TNumber5>>(loopIndex, in ctx);
|
||||
innerJob.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>, ScalarLane<TNumber3>, ScalarLane<TNumber4>, ScalarLane<TNumber5>>(TNumber0.CreateTruncating(loopIndex), ScalarLane<TNumber0>.AllBitsSet, in ctx);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -418,7 +364,7 @@ public interface IJobSPMD<TNumber0, TNumber1, TNumber2, TNumber3, TNumber4, TNum
|
||||
where TNumber5 : unmanaged, INumber<TNumber5>, IBinaryNumber<TNumber5>, IMinMaxValue<TNumber5>, IBitwiseOperators<TNumber5, TNumber5, TNumber5>
|
||||
where TNumber6 : unmanaged, INumber<TNumber6>, IBinaryNumber<TNumber6>, IMinMaxValue<TNumber6>, IBitwiseOperators<TNumber6, TNumber6, TNumber6>
|
||||
{
|
||||
void Execute<TLane0, TLane1, TLane2, TLane3, TLane4, TLane5, TLane6>(int baseIndex, ref readonly JobExecutionContext ctx)
|
||||
void Execute<TLane0, TLane1, TLane2, TLane3, TLane4, TLane5, TLane6>(TLane0 indices, TLane0 mask, ref readonly JobExecutionContext ctx)
|
||||
where TLane0 : unmanaged, ISPMDLane<TLane0, TNumber0>
|
||||
where TLane1 : unmanaged, ISPMDLane<TLane1, TNumber1>
|
||||
where TLane2 : unmanaged, ISPMDLane<TLane2, TNumber2>
|
||||
@@ -444,19 +390,10 @@ internal struct SPMDJobWrapper<T, TNumber0, TNumber1, TNumber2, TNumber3, TNumbe
|
||||
public void Execute(int loopIndex, ref readonly JobExecutionContext ctx)
|
||||
{
|
||||
var baseIndex = loopIndex * WideLane<TNumber0>.LaneWidth;
|
||||
var remaining = totalIteration - baseIndex;
|
||||
var indices = WideLane<TNumber0>.Sequence(TNumber0.CreateTruncating(baseIndex), TNumber0.One);
|
||||
var mask = indices < TNumber0.CreateTruncating(totalIteration);
|
||||
|
||||
if (remaining >= WideLane<TNumber0>.LaneWidth)
|
||||
{
|
||||
innerJob.Execute<WideLane<TNumber0>, WideLane<TNumber1>, WideLane<TNumber2>, WideLane<TNumber3>, WideLane<TNumber4>, WideLane<TNumber5>, WideLane<TNumber6>>(baseIndex, in ctx);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var j = 0; j < remaining; j++)
|
||||
{
|
||||
innerJob.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>, ScalarLane<TNumber3>, ScalarLane<TNumber4>, ScalarLane<TNumber5>, ScalarLane<TNumber6>>(baseIndex + j, in ctx);
|
||||
}
|
||||
}
|
||||
innerJob.Execute<WideLane<TNumber0>, WideLane<TNumber1>, WideLane<TNumber2>, WideLane<TNumber3>, WideLane<TNumber4>, WideLane<TNumber5>, WideLane<TNumber6>>(indices, mask, in ctx);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -475,7 +412,7 @@ internal struct SPMDScalerJobWrapper<T, TNumber0, TNumber1, TNumber2, TNumber3,
|
||||
|
||||
public void Execute(int loopIndex, ref readonly JobExecutionContext ctx)
|
||||
{
|
||||
innerJob.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>, ScalarLane<TNumber3>, ScalarLane<TNumber4>, ScalarLane<TNumber5>, ScalarLane<TNumber6>>(loopIndex, in ctx);
|
||||
innerJob.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>, ScalarLane<TNumber3>, ScalarLane<TNumber4>, ScalarLane<TNumber5>, ScalarLane<TNumber6>>(TNumber0.CreateTruncating(loopIndex), ScalarLane<TNumber0>.AllBitsSet, in ctx);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -503,7 +440,7 @@ public interface IJobSPMD<TNumber0, TNumber1, TNumber2, TNumber3, TNumber4, TNum
|
||||
where TNumber6 : unmanaged, INumber<TNumber6>, IBinaryNumber<TNumber6>, IMinMaxValue<TNumber6>, IBitwiseOperators<TNumber6, TNumber6, TNumber6>
|
||||
where TNumber7 : unmanaged, INumber<TNumber7>, IBinaryNumber<TNumber7>, IMinMaxValue<TNumber7>, IBitwiseOperators<TNumber7, TNumber7, TNumber7>
|
||||
{
|
||||
void Execute<TLane0, TLane1, TLane2, TLane3, TLane4, TLane5, TLane6, TLane7>(int baseIndex, ref readonly JobExecutionContext ctx)
|
||||
void Execute<TLane0, TLane1, TLane2, TLane3, TLane4, TLane5, TLane6, TLane7>(TLane0 indices, TLane0 mask, ref readonly JobExecutionContext ctx)
|
||||
where TLane0 : unmanaged, ISPMDLane<TLane0, TNumber0>
|
||||
where TLane1 : unmanaged, ISPMDLane<TLane1, TNumber1>
|
||||
where TLane2 : unmanaged, ISPMDLane<TLane2, TNumber2>
|
||||
@@ -531,19 +468,10 @@ internal struct SPMDJobWrapper<T, TNumber0, TNumber1, TNumber2, TNumber3, TNumbe
|
||||
public void Execute(int loopIndex, ref readonly JobExecutionContext ctx)
|
||||
{
|
||||
var baseIndex = loopIndex * WideLane<TNumber0>.LaneWidth;
|
||||
var remaining = totalIteration - baseIndex;
|
||||
var indices = WideLane<TNumber0>.Sequence(TNumber0.CreateTruncating(baseIndex), TNumber0.One);
|
||||
var mask = indices < TNumber0.CreateTruncating(totalIteration);
|
||||
|
||||
if (remaining >= WideLane<TNumber0>.LaneWidth)
|
||||
{
|
||||
innerJob.Execute<WideLane<TNumber0>, WideLane<TNumber1>, WideLane<TNumber2>, WideLane<TNumber3>, WideLane<TNumber4>, WideLane<TNumber5>, WideLane<TNumber6>, WideLane<TNumber7>>(baseIndex, in ctx);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var j = 0; j < remaining; j++)
|
||||
{
|
||||
innerJob.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>, ScalarLane<TNumber3>, ScalarLane<TNumber4>, ScalarLane<TNumber5>, ScalarLane<TNumber6>, ScalarLane<TNumber7>>(baseIndex + j, in ctx);
|
||||
}
|
||||
}
|
||||
innerJob.Execute<WideLane<TNumber0>, WideLane<TNumber1>, WideLane<TNumber2>, WideLane<TNumber3>, WideLane<TNumber4>, WideLane<TNumber5>, WideLane<TNumber6>, WideLane<TNumber7>>(indices, mask, in ctx);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -563,7 +491,7 @@ internal struct SPMDScalerJobWrapper<T, TNumber0, TNumber1, TNumber2, TNumber3,
|
||||
|
||||
public void Execute(int loopIndex, ref readonly JobExecutionContext ctx)
|
||||
{
|
||||
innerJob.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>, ScalarLane<TNumber3>, ScalarLane<TNumber4>, ScalarLane<TNumber5>, ScalarLane<TNumber6>, ScalarLane<TNumber7>>(loopIndex, in ctx);
|
||||
innerJob.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>, ScalarLane<TNumber3>, ScalarLane<TNumber4>, ScalarLane<TNumber5>, ScalarLane<TNumber6>, ScalarLane<TNumber7>>(TNumber0.CreateTruncating(loopIndex), ScalarLane<TNumber0>.AllBitsSet, in ctx);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -590,26 +518,17 @@ public static class IJobParallelForSPMDExtensions
|
||||
for (var loopIndex = 0; loopIndex < iterations; loopIndex++)
|
||||
{
|
||||
var baseIndex = loopIndex * WideLane<TNumber0>.LaneWidth;
|
||||
var remaining = totalIteration - baseIndex;
|
||||
var indices = WideLane<TNumber0>.Sequence(TNumber0.CreateTruncating(baseIndex), TNumber0.One);
|
||||
var mask = indices < TNumber0.CreateTruncating(totalIteration);
|
||||
|
||||
if (remaining >= WideLane<TNumber0>.LaneWidth)
|
||||
{
|
||||
job.Execute<WideLane<TNumber0>>(baseIndex, in ctx);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var i = 0; i < remaining; i++)
|
||||
{
|
||||
job.Execute<ScalarLane<TNumber0>>(baseIndex + i, in ctx);
|
||||
}
|
||||
}
|
||||
job.Execute<WideLane<TNumber0>>(indices, mask, in ctx);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var loopIndex = 0; loopIndex < totalIteration; loopIndex++)
|
||||
{
|
||||
job.Execute<ScalarLane<TNumber0>>(loopIndex, in ctx);
|
||||
job.Execute<ScalarLane<TNumber0>>(TNumber0.CreateTruncating(loopIndex), ScalarLane<TNumber0>.AllBitsSet, in ctx);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -677,26 +596,17 @@ public static class IJobParallelForSPMDExtensions
|
||||
for (var loopIndex = 0; loopIndex < iterations; loopIndex++)
|
||||
{
|
||||
var baseIndex = loopIndex * WideLane<TNumber0>.LaneWidth;
|
||||
var remaining = totalIteration - baseIndex;
|
||||
var indices = WideLane<TNumber0>.Sequence(TNumber0.CreateTruncating(baseIndex), TNumber0.One);
|
||||
var mask = indices < TNumber0.CreateTruncating(totalIteration);
|
||||
|
||||
if (remaining >= WideLane<TNumber0>.LaneWidth)
|
||||
{
|
||||
job.Execute<WideLane<TNumber0>, WideLane<TNumber1>>(baseIndex, in ctx);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var i = 0; i < remaining; i++)
|
||||
{
|
||||
job.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>>(baseIndex + i, in ctx);
|
||||
}
|
||||
}
|
||||
job.Execute<WideLane<TNumber0>, WideLane<TNumber1>>(indices, mask, in ctx);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var loopIndex = 0; loopIndex < totalIteration; loopIndex++)
|
||||
{
|
||||
job.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>>(loopIndex, in ctx);
|
||||
job.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>>(TNumber0.CreateTruncating(loopIndex), ScalarLane<TNumber0>.AllBitsSet, in ctx);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -768,26 +678,17 @@ public static class IJobParallelForSPMDExtensions
|
||||
for (var loopIndex = 0; loopIndex < iterations; loopIndex++)
|
||||
{
|
||||
var baseIndex = loopIndex * WideLane<TNumber0>.LaneWidth;
|
||||
var remaining = totalIteration - baseIndex;
|
||||
var indices = WideLane<TNumber0>.Sequence(TNumber0.CreateTruncating(baseIndex), TNumber0.One);
|
||||
var mask = indices < TNumber0.CreateTruncating(totalIteration);
|
||||
|
||||
if (remaining >= WideLane<TNumber0>.LaneWidth)
|
||||
{
|
||||
job.Execute<WideLane<TNumber0>, WideLane<TNumber1>, WideLane<TNumber2>>(baseIndex, in ctx);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var i = 0; i < remaining; i++)
|
||||
{
|
||||
job.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>>(baseIndex + i, in ctx);
|
||||
}
|
||||
}
|
||||
job.Execute<WideLane<TNumber0>, WideLane<TNumber1>, WideLane<TNumber2>>(indices, mask, in ctx);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var loopIndex = 0; loopIndex < totalIteration; loopIndex++)
|
||||
{
|
||||
job.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>>(loopIndex, in ctx);
|
||||
job.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>>(TNumber0.CreateTruncating(loopIndex), ScalarLane<TNumber0>.AllBitsSet, in ctx);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -863,26 +764,17 @@ public static class IJobParallelForSPMDExtensions
|
||||
for (var loopIndex = 0; loopIndex < iterations; loopIndex++)
|
||||
{
|
||||
var baseIndex = loopIndex * WideLane<TNumber0>.LaneWidth;
|
||||
var remaining = totalIteration - baseIndex;
|
||||
var indices = WideLane<TNumber0>.Sequence(TNumber0.CreateTruncating(baseIndex), TNumber0.One);
|
||||
var mask = indices < TNumber0.CreateTruncating(totalIteration);
|
||||
|
||||
if (remaining >= WideLane<TNumber0>.LaneWidth)
|
||||
{
|
||||
job.Execute<WideLane<TNumber0>, WideLane<TNumber1>, WideLane<TNumber2>, WideLane<TNumber3>>(baseIndex, in ctx);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var i = 0; i < remaining; i++)
|
||||
{
|
||||
job.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>, ScalarLane<TNumber3>>(baseIndex + i, in ctx);
|
||||
}
|
||||
}
|
||||
job.Execute<WideLane<TNumber0>, WideLane<TNumber1>, WideLane<TNumber2>, WideLane<TNumber3>>(indices, mask, in ctx);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var loopIndex = 0; loopIndex < totalIteration; loopIndex++)
|
||||
{
|
||||
job.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>, ScalarLane<TNumber3>>(loopIndex, in ctx);
|
||||
job.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>, ScalarLane<TNumber3>>(TNumber0.CreateTruncating(loopIndex), ScalarLane<TNumber0>.AllBitsSet, in ctx);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -962,26 +854,17 @@ public static class IJobParallelForSPMDExtensions
|
||||
for (var loopIndex = 0; loopIndex < iterations; loopIndex++)
|
||||
{
|
||||
var baseIndex = loopIndex * WideLane<TNumber0>.LaneWidth;
|
||||
var remaining = totalIteration - baseIndex;
|
||||
var indices = WideLane<TNumber0>.Sequence(TNumber0.CreateTruncating(baseIndex), TNumber0.One);
|
||||
var mask = indices < TNumber0.CreateTruncating(totalIteration);
|
||||
|
||||
if (remaining >= WideLane<TNumber0>.LaneWidth)
|
||||
{
|
||||
job.Execute<WideLane<TNumber0>, WideLane<TNumber1>, WideLane<TNumber2>, WideLane<TNumber3>, WideLane<TNumber4>>(baseIndex, in ctx);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var i = 0; i < remaining; i++)
|
||||
{
|
||||
job.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>, ScalarLane<TNumber3>, ScalarLane<TNumber4>>(baseIndex + i, in ctx);
|
||||
}
|
||||
}
|
||||
job.Execute<WideLane<TNumber0>, WideLane<TNumber1>, WideLane<TNumber2>, WideLane<TNumber3>, WideLane<TNumber4>>(indices, mask, in ctx);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var loopIndex = 0; loopIndex < totalIteration; loopIndex++)
|
||||
{
|
||||
job.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>, ScalarLane<TNumber3>, ScalarLane<TNumber4>>(loopIndex, in ctx);
|
||||
job.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>, ScalarLane<TNumber3>, ScalarLane<TNumber4>>(TNumber0.CreateTruncating(loopIndex), ScalarLane<TNumber0>.AllBitsSet, in ctx);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1065,26 +948,17 @@ public static class IJobParallelForSPMDExtensions
|
||||
for (var loopIndex = 0; loopIndex < iterations; loopIndex++)
|
||||
{
|
||||
var baseIndex = loopIndex * WideLane<TNumber0>.LaneWidth;
|
||||
var remaining = totalIteration - baseIndex;
|
||||
var indices = WideLane<TNumber0>.Sequence(TNumber0.CreateTruncating(baseIndex), TNumber0.One);
|
||||
var mask = indices < TNumber0.CreateTruncating(totalIteration);
|
||||
|
||||
if (remaining >= WideLane<TNumber0>.LaneWidth)
|
||||
{
|
||||
job.Execute<WideLane<TNumber0>, WideLane<TNumber1>, WideLane<TNumber2>, WideLane<TNumber3>, WideLane<TNumber4>, WideLane<TNumber5>>(baseIndex, in ctx);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var i = 0; i < remaining; i++)
|
||||
{
|
||||
job.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>, ScalarLane<TNumber3>, ScalarLane<TNumber4>, ScalarLane<TNumber5>>(baseIndex + i, in ctx);
|
||||
}
|
||||
}
|
||||
job.Execute<WideLane<TNumber0>, WideLane<TNumber1>, WideLane<TNumber2>, WideLane<TNumber3>, WideLane<TNumber4>, WideLane<TNumber5>>(indices, mask, in ctx);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var loopIndex = 0; loopIndex < totalIteration; loopIndex++)
|
||||
{
|
||||
job.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>, ScalarLane<TNumber3>, ScalarLane<TNumber4>, ScalarLane<TNumber5>>(loopIndex, in ctx);
|
||||
job.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>, ScalarLane<TNumber3>, ScalarLane<TNumber4>, ScalarLane<TNumber5>>(TNumber0.CreateTruncating(loopIndex), ScalarLane<TNumber0>.AllBitsSet, in ctx);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1172,26 +1046,17 @@ public static class IJobParallelForSPMDExtensions
|
||||
for (var loopIndex = 0; loopIndex < iterations; loopIndex++)
|
||||
{
|
||||
var baseIndex = loopIndex * WideLane<TNumber0>.LaneWidth;
|
||||
var remaining = totalIteration - baseIndex;
|
||||
var indices = WideLane<TNumber0>.Sequence(TNumber0.CreateTruncating(baseIndex), TNumber0.One);
|
||||
var mask = indices < TNumber0.CreateTruncating(totalIteration);
|
||||
|
||||
if (remaining >= WideLane<TNumber0>.LaneWidth)
|
||||
{
|
||||
job.Execute<WideLane<TNumber0>, WideLane<TNumber1>, WideLane<TNumber2>, WideLane<TNumber3>, WideLane<TNumber4>, WideLane<TNumber5>, WideLane<TNumber6>>(baseIndex, in ctx);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var i = 0; i < remaining; i++)
|
||||
{
|
||||
job.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>, ScalarLane<TNumber3>, ScalarLane<TNumber4>, ScalarLane<TNumber5>, ScalarLane<TNumber6>>(baseIndex + i, in ctx);
|
||||
}
|
||||
}
|
||||
job.Execute<WideLane<TNumber0>, WideLane<TNumber1>, WideLane<TNumber2>, WideLane<TNumber3>, WideLane<TNumber4>, WideLane<TNumber5>, WideLane<TNumber6>>(indices, mask, in ctx);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var loopIndex = 0; loopIndex < totalIteration; loopIndex++)
|
||||
{
|
||||
job.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>, ScalarLane<TNumber3>, ScalarLane<TNumber4>, ScalarLane<TNumber5>, ScalarLane<TNumber6>>(loopIndex, in ctx);
|
||||
job.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>, ScalarLane<TNumber3>, ScalarLane<TNumber4>, ScalarLane<TNumber5>, ScalarLane<TNumber6>>(TNumber0.CreateTruncating(loopIndex), ScalarLane<TNumber0>.AllBitsSet, in ctx);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1283,26 +1148,17 @@ public static class IJobParallelForSPMDExtensions
|
||||
for (var loopIndex = 0; loopIndex < iterations; loopIndex++)
|
||||
{
|
||||
var baseIndex = loopIndex * WideLane<TNumber0>.LaneWidth;
|
||||
var remaining = totalIteration - baseIndex;
|
||||
var indices = WideLane<TNumber0>.Sequence(TNumber0.CreateTruncating(baseIndex), TNumber0.One);
|
||||
var mask = indices < TNumber0.CreateTruncating(totalIteration);
|
||||
|
||||
if (remaining >= WideLane<TNumber0>.LaneWidth)
|
||||
{
|
||||
job.Execute<WideLane<TNumber0>, WideLane<TNumber1>, WideLane<TNumber2>, WideLane<TNumber3>, WideLane<TNumber4>, WideLane<TNumber5>, WideLane<TNumber6>, WideLane<TNumber7>>(baseIndex, in ctx);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var i = 0; i < remaining; i++)
|
||||
{
|
||||
job.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>, ScalarLane<TNumber3>, ScalarLane<TNumber4>, ScalarLane<TNumber5>, ScalarLane<TNumber6>, ScalarLane<TNumber7>>(baseIndex + i, in ctx);
|
||||
}
|
||||
}
|
||||
job.Execute<WideLane<TNumber0>, WideLane<TNumber1>, WideLane<TNumber2>, WideLane<TNumber3>, WideLane<TNumber4>, WideLane<TNumber5>, WideLane<TNumber6>, WideLane<TNumber7>>(indices, mask, in ctx);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var loopIndex = 0; loopIndex < totalIteration; loopIndex++)
|
||||
{
|
||||
job.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>, ScalarLane<TNumber3>, ScalarLane<TNumber4>, ScalarLane<TNumber5>, ScalarLane<TNumber6>, ScalarLane<TNumber7>>(loopIndex, in ctx);
|
||||
job.Execute<ScalarLane<TNumber0>, ScalarLane<TNumber1>, ScalarLane<TNumber2>, ScalarLane<TNumber3>, ScalarLane<TNumber4>, ScalarLane<TNumber5>, ScalarLane<TNumber6>, ScalarLane<TNumber7>>(TNumber0.CreateTruncating(loopIndex), ScalarLane<TNumber0>.AllBitsSet, in ctx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@ for (var i = 0; i < 8; i++) { #>
|
||||
public interface IJobSPMD<<#= ForEachDimension(i + 1, j => $"TNumber{j}") #>>
|
||||
<#= GetTNumberRestrictions(i + 1) #>
|
||||
{
|
||||
void Execute<<#= ForEachDimension(i + 1, j => $"TLane{j}") #>>(int baseIndex, ref readonly JobExecutionContext ctx)
|
||||
void Execute<<#= ForEachDimension(i + 1, j => $"TLane{j}") #>>(TLane0 indices, TLane0 mask, ref readonly JobExecutionContext ctx)
|
||||
<#= GetTLaneRestrictions(i + 1, " ") #>;
|
||||
}
|
||||
|
||||
@@ -42,19 +42,10 @@ internal struct SPMDJobWrapper<T, <#= ForEachDimension(i + 1, j => $"TNumber{j}"
|
||||
public void Execute(int loopIndex, ref readonly JobExecutionContext ctx)
|
||||
{
|
||||
var baseIndex = loopIndex * WideLane<TNumber0>.LaneWidth;
|
||||
var remaining = totalIteration - baseIndex;
|
||||
var indices = WideLane<TNumber0>.Sequence(TNumber0.CreateTruncating(baseIndex), TNumber0.One);
|
||||
var mask = indices < TNumber0.CreateTruncating(totalIteration);
|
||||
|
||||
if (remaining >= WideLane<TNumber0>.LaneWidth)
|
||||
{
|
||||
innerJob.Execute<<#= ForEachDimension(i + 1, j => $"WideLane<TNumber{j}>") #>>(baseIndex, in ctx);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var j = 0; j < remaining; j++)
|
||||
{
|
||||
innerJob.Execute<<#= ForEachDimension(i + 1, j => $"ScalarLane<TNumber{j}>") #>>(baseIndex + j, in ctx);
|
||||
}
|
||||
}
|
||||
innerJob.Execute<<#= ForEachDimension(i + 1, j => $"WideLane<TNumber{j}>") #>>(indices, mask, in ctx);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -67,7 +58,7 @@ internal struct SPMDScalerJobWrapper<T, <#= ForEachDimension(i + 1, j => $"TNumb
|
||||
|
||||
public void Execute(int loopIndex, ref readonly JobExecutionContext ctx)
|
||||
{
|
||||
innerJob.Execute<<#= ForEachDimension(i + 1, j => $"ScalarLane<TNumber{j}>") #>>(loopIndex, in ctx);
|
||||
innerJob.Execute<<#= ForEachDimension(i + 1, j => $"ScalarLane<TNumber{j}>") #>>(TNumber0.CreateTruncating(loopIndex), ScalarLane<TNumber0>.AllBitsSet, in ctx);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -96,26 +87,17 @@ public static class IJobParallelForSPMDExtensions
|
||||
for (var loopIndex = 0; loopIndex < iterations; loopIndex++)
|
||||
{
|
||||
var baseIndex = loopIndex * WideLane<TNumber0>.LaneWidth;
|
||||
var remaining = totalIteration - baseIndex;
|
||||
var indices = WideLane<TNumber0>.Sequence(TNumber0.CreateTruncating(baseIndex), TNumber0.One);
|
||||
var mask = indices < TNumber0.CreateTruncating(totalIteration);
|
||||
|
||||
if (remaining >= WideLane<TNumber0>.LaneWidth)
|
||||
{
|
||||
job.Execute<<#= ForEachDimension(i + 1, j => $"WideLane<TNumber{j}>") #>>(baseIndex, in ctx);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var i = 0; i < remaining; i++)
|
||||
{
|
||||
job.Execute<<#= ForEachDimension(i + 1, j => $"ScalarLane<TNumber{j}>") #>>(baseIndex + i, in ctx);
|
||||
}
|
||||
}
|
||||
job.Execute<<#= ForEachDimension(i + 1, j => $"WideLane<TNumber{j}>") #>>(indices, mask, in ctx);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (var loopIndex = 0; loopIndex < totalIteration; loopIndex++)
|
||||
{
|
||||
job.Execute<<#= ForEachDimension(i + 1, j => $"ScalarLane<TNumber{j}>") #>>(loopIndex, in ctx);
|
||||
job.Execute<<#= ForEachDimension(i + 1, j => $"ScalarLane<TNumber{j}>") #>>(TNumber0.CreateTruncating(loopIndex), ScalarLane<TNumber0>.AllBitsSet, in ctx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -9,11 +9,13 @@
|
||||
/// Changes to this file may cause incorrect behavior and will be lost if the code is regenerated.
|
||||
/// </auto-generated>
|
||||
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Numerics;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.Intrinsics;
|
||||
using System.Runtime.Intrinsics.X86;
|
||||
|
||||
namespace Misaki.HighPerformance.Mathematics.SPMD;
|
||||
|
||||
<#
|
||||
const string TLane = "TLane";
|
||||
const string TNumber = "TNumber";
|
||||
@@ -121,10 +123,94 @@ public static unsafe partial class MathV
|
||||
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static <#= vectorType #> GatherVector<#= dimension #><<#= GenericParameters #>>(<#= TNumber #>* pData, <#= TLane #> indices, int scale)
|
||||
public static <#= vectorType #> GatherVector<#= dimension #><<#= GenericParameters #>>(<#= TNumber #>* pData, <#= TLane #> indices, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
|
||||
<#= TLaneRestrictions #>
|
||||
<#= TNumberRestrictions #>
|
||||
{
|
||||
if (Avx2.IsSupported)
|
||||
{
|
||||
if (TLane.LaneWidth == Vector128<TNumber>.Count)
|
||||
{
|
||||
if (sizeof(TNumber) == sizeof(uint))
|
||||
{
|
||||
ref var v = ref Unsafe.As<TLane, Vector128<TNumber>>(ref indices);
|
||||
var vidx = v.AsInt32();
|
||||
|
||||
#pragma warning disable CA1857 // A constant is expected for the parameter
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
var v<#= components[i] #> = Avx2.GatherVector128((uint*)(pData + <#= i #>), vidx, scale);
|
||||
<# } #>
|
||||
#pragma warning restore CA1857 // A constant is expected for the parameter
|
||||
|
||||
return new <#= vectorType #>
|
||||
{
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
<#= components[i] #> = Unsafe.As<Vector128<uint>, TLane>(ref v<#= components[i] #>),
|
||||
<# } #>
|
||||
};
|
||||
}
|
||||
|
||||
if (sizeof(TNumber) == sizeof(ulong))
|
||||
{
|
||||
ref var v = ref Unsafe.As<TLane, Vector128<TNumber>>(ref indices);
|
||||
var vidx = v.AsInt64();
|
||||
|
||||
#pragma warning disable CA1857 // A constant is expected for the parameter
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
var v<#= components[i] #> = Avx2.GatherVector128((ulong*)(pData + <#= i #>), vidx, scale);
|
||||
<# } #>
|
||||
#pragma warning restore CA1857 // A constant is expected for the parameter
|
||||
|
||||
return new <#= vectorType #>
|
||||
{
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
<#= components[i] #> = Unsafe.As<Vector128<ulong>, TLane>(ref v<#= components[i] #>),
|
||||
<# } #>
|
||||
};
|
||||
}
|
||||
}
|
||||
else if (TLane.LaneWidth == Vector256<TNumber>.Count)
|
||||
{
|
||||
if (sizeof(TNumber) == sizeof(uint))
|
||||
{
|
||||
ref var v = ref Unsafe.As<TLane, Vector256<TNumber>>(ref indices);
|
||||
var vidx = v.AsInt32();
|
||||
|
||||
#pragma warning disable CA1857 // A constant is expected for the parameter
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
var v<#= components[i] #> = Avx2.GatherVector256((uint*)(pData + <#= i #>), vidx, scale);
|
||||
<# } #>
|
||||
#pragma warning restore CA1857 // A constant is expected for the parameter
|
||||
|
||||
return new <#= vectorType #>
|
||||
{
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
<#= components[i] #> = Unsafe.As<Vector256<uint>, TLane>(ref v<#= components[i] #>),
|
||||
<# } #>
|
||||
};
|
||||
}
|
||||
|
||||
if (sizeof(TNumber) == sizeof(ulong))
|
||||
{
|
||||
ref var v = ref Unsafe.As<TLane, Vector256<TNumber>>(ref indices);
|
||||
var vidx = v.AsInt64();
|
||||
|
||||
#pragma warning disable CA1857 // A constant is expected for the parameter
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
var v<#= components[i] #> = Avx2.GatherVector256((ulong*)(pData + <#= i #>), vidx, scale);
|
||||
<# } #>
|
||||
#pragma warning restore CA1857 // A constant is expected for the parameter
|
||||
|
||||
return new <#= vectorType #>
|
||||
{
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
<#= components[i] #> = Unsafe.As<Vector256<ulong>, TLane>(ref v<#= components[i] #>),
|
||||
<# } #>
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
Unsafe.SkipInit(out TLane <#= components[i] #>);
|
||||
var p<#= components[i] #> = (<#= TNumber #>*)&<#= components[i] #>;
|
||||
@@ -135,7 +221,7 @@ public static unsafe partial class MathV
|
||||
var scalarIdx = int.CreateTruncating(indices[i]);
|
||||
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
p<#= components[i] #>[i] = pData[scalarIdx + <#= i #> * scale];
|
||||
p<#= components[i] #>[i] = *(TNumber*)((byte*)pData + ((scalarIdx + <#= i #>) * scale));
|
||||
<# } #>
|
||||
}
|
||||
|
||||
@@ -148,10 +234,90 @@ public static unsafe partial class MathV
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static <#= vectorType #> GatherVector<#= dimension #><<#= GenericParameters #>>(<#= TNumber #>* pData, int* pIndices, int scale)
|
||||
public static <#= vectorType #> GatherVector<#= dimension #><<#= GenericParameters #>>(<#= TNumber #>* pData, int* pIndices, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
|
||||
<#= TLaneRestrictions #>
|
||||
<#= TNumberRestrictions #>
|
||||
{
|
||||
if (Avx2.IsSupported)
|
||||
{
|
||||
if (TLane.LaneWidth == Vector128<TNumber>.Count)
|
||||
{
|
||||
if (sizeof(TNumber) == sizeof(uint))
|
||||
{
|
||||
var vidx = Vector128.Load(pIndices);
|
||||
|
||||
#pragma warning disable CA1857 // A constant is expected for the parameter
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
var v<#= components[i] #> = Avx2.GatherVector128((uint*)(pData + <#= i #>), vidx, scale);
|
||||
<# } #>
|
||||
#pragma warning restore CA1857 // A constant is expected for the parameter
|
||||
|
||||
return new <#= vectorType #>
|
||||
{
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
<#= components[i] #> = Unsafe.As<Vector128<uint>, TLane>(ref v<#= components[i] #>),
|
||||
<# } #>
|
||||
};
|
||||
}
|
||||
|
||||
if (sizeof(TNumber) == sizeof(ulong))
|
||||
{
|
||||
var vidx = Vector128.Load(pIndices);
|
||||
|
||||
#pragma warning disable CA1857 // A constant is expected for the parameter
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
var v<#= components[i] #> = Avx2.GatherVector128((ulong*)(pData + <#= i #>), vidx, scale);
|
||||
<# } #>
|
||||
#pragma warning restore CA1857 // A constant is expected for the parameter
|
||||
|
||||
return new <#= vectorType #>
|
||||
{
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
<#= components[i] #> = Unsafe.As<Vector128<ulong>, TLane>(ref v<#= components[i] #>),
|
||||
<# } #>
|
||||
};
|
||||
}
|
||||
}
|
||||
else if (TLane.LaneWidth == Vector256<TNumber>.Count)
|
||||
{
|
||||
if (sizeof(TNumber) == sizeof(uint))
|
||||
{
|
||||
var vidx = Vector256.Load(pIndices);
|
||||
|
||||
#pragma warning disable CA1857 // A constant is expected for the parameter
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
var v<#= components[i] #> = Avx2.GatherVector256((uint*)(pData + <#= i #>), vidx, scale);
|
||||
<# } #>
|
||||
#pragma warning restore CA1857 // A constant is expected for the parameter
|
||||
|
||||
return new <#= vectorType #>
|
||||
{
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
<#= components[i] #> = Unsafe.As<Vector256<uint>, TLane>(ref v<#= components[i] #>),
|
||||
<# } #>
|
||||
};
|
||||
}
|
||||
|
||||
if (sizeof(TNumber) == sizeof(ulong))
|
||||
{
|
||||
var vidx = Vector128.Load(pIndices);
|
||||
|
||||
#pragma warning disable CA1857 // A constant is expected for the parameter
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
var v<#= components[i] #> = Avx2.GatherVector256((ulong*)(pData + <#= i #>), vidx, scale);
|
||||
<# } #>
|
||||
#pragma warning restore CA1857 // A constant is expected for the parameter
|
||||
|
||||
return new <#= vectorType #>
|
||||
{
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
<#= components[i] #> = Unsafe.As<Vector256<ulong>, TLane>(ref v<#= components[i] #>),
|
||||
<# } #>
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
Unsafe.SkipInit(out TLane <#= components[i] #>);
|
||||
var p<#= components[i] #> = (<#= TNumber #>*)&<#= components[i] #>;
|
||||
@@ -162,7 +328,7 @@ public static unsafe partial class MathV
|
||||
var scalerIdx = pIndices[i];
|
||||
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
p<#= components[i] #>[i] = pData[scalerIdx + <#= i #> * scale];
|
||||
p<#= components[i] #>[i] = *(TNumber*)((byte*)pData + ((scalerIdx + <#= i #>) * scale));
|
||||
<# } #>
|
||||
}
|
||||
|
||||
@@ -175,10 +341,110 @@ public static unsafe partial class MathV
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static <#= vectorType #> GatherVector<#= dimension #><<#= GenericParameters #>>(ref <#= TNumber #> baseAddress, <#= TLane #> indices, int scale)
|
||||
public static <#= vectorType #> GatherVector<#= dimension #><<#= GenericParameters #>>(ref <#= TNumber #> baseAddress, <#= TLane #> indices, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
|
||||
<#= TLaneRestrictions #>
|
||||
<#= TNumberRestrictions #>
|
||||
{
|
||||
return GatherVector<#= dimension #><<#= GenericParameters #>>((<#= TNumber #>*)Unsafe.AsPointer(ref baseAddress), indices, scale);
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static <#= vectorType #> GatherVector<#= dimension #><<#= GenericParameters #>>(ref <#= TNumber #> baseAddress, ref int baseIndex, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
|
||||
<#= TLaneRestrictions #>
|
||||
<#= TNumberRestrictions #>
|
||||
{
|
||||
return GatherVector<#= dimension #><<#= GenericParameters #>>((<#= TNumber #>*)Unsafe.AsPointer(ref baseAddress), (int*)Unsafe.AsPointer(ref baseIndex), scale);
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static <#= vectorType #> MaskGatherVector<#= dimension #><TLane, TNumber>(<#= TNumber #>* pData, <#= TLane #> indices, <#= TLane #> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
|
||||
where TLane : unmanaged, ISPMDLane<TLane, TNumber>
|
||||
where TNumber : unmanaged, INumber<TNumber>, IBinaryNumber<TNumber>, IMinMaxValue<TNumber>, IBitwiseOperators<TNumber, TNumber, TNumber>
|
||||
{
|
||||
if (Avx2.IsSupported)
|
||||
{
|
||||
if (TLane.LaneWidth == Vector128<TNumber>.Count)
|
||||
{
|
||||
if (sizeof(TNumber) == sizeof(uint))
|
||||
{
|
||||
ref var vidx = ref Unsafe.As<TLane, Vector128<int>>(ref indices);
|
||||
ref var vmask = ref Unsafe.As<TLane, Vector128<uint>>(ref mask);
|
||||
|
||||
#pragma warning disable CA1857 // A constant is expected for the parameter
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
var v<#= components[i] #> = Avx2.GatherMaskVector128(Vector128<uint>.Zero, (uint*)(pData + <#= i #>), vidx, vmask, scale);
|
||||
<# } #>
|
||||
#pragma warning restore CA1857 // A constant is expected for the parameter
|
||||
|
||||
return new <#= vectorType #>
|
||||
{
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
<#= components[i] #> = Unsafe.As<Vector128<uint>, TLane>(ref v<#= components[i] #>),
|
||||
<# } #>
|
||||
};
|
||||
}
|
||||
|
||||
if (sizeof(TNumber) == sizeof(ulong))
|
||||
{
|
||||
ref var vidx = ref Unsafe.As<TLane, Vector128<int>>(ref indices);
|
||||
var vmask = Unsafe.As<TLane, Vector128<ulong>>(ref mask);
|
||||
|
||||
#pragma warning disable CA1857 // A constant is expected for the parameter
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
var v<#= components[i] #> = Avx2.GatherMaskVector128(Vector128<ulong>.Zero, (ulong*)(pData + <#= i #>), vidx, vmask, scale);
|
||||
<# } #>
|
||||
#pragma warning restore CA1857 // A constant is expected for the parameter
|
||||
|
||||
return new <#= vectorType #>
|
||||
{
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
<#= components[i] #> = Unsafe.As<Vector128<ulong>, TLane>(ref v<#= components[i] #>),
|
||||
<# } #>
|
||||
};
|
||||
}
|
||||
}
|
||||
else if (TLane.LaneWidth == Vector256<TNumber>.Count)
|
||||
{
|
||||
if (sizeof(TNumber) == sizeof(uint))
|
||||
{
|
||||
ref var vidx = ref Unsafe.As<TLane, Vector256<int>>(ref indices);
|
||||
var vmask = Unsafe.As<TLane, Vector256<uint>>(ref mask);
|
||||
|
||||
#pragma warning disable CA1857 // A constant is expected for the parameter
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
var v<#= components[i] #> = Avx2.GatherMaskVector256(Vector256<uint>.Zero, (uint*)(pData + <#= i #>), vidx, vmask, scale);
|
||||
<# } #>
|
||||
#pragma warning restore CA1857 // A constant is expected for the parameter
|
||||
|
||||
return new <#= vectorType #>
|
||||
{
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
<#= components[i] #> = Unsafe.As<Vector256<uint>, TLane>(ref v<#= components[i] #>),
|
||||
<# } #>
|
||||
};
|
||||
}
|
||||
|
||||
if (sizeof(TNumber) == sizeof(ulong))
|
||||
{
|
||||
ref var vidx = ref Unsafe.As<TLane, Vector128<int>>(ref indices);
|
||||
var vmask = Unsafe.As<TLane, Vector256<ulong>>(ref mask);
|
||||
|
||||
#pragma warning disable CA1857 // A constant is expected for the parameter
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
var v<#= components[i] #> = Avx2.GatherMaskVector256(Vector256<ulong>.Zero, (ulong*)(pData + <#= i #>), vidx, vmask, scale);
|
||||
<# } #>
|
||||
#pragma warning restore CA1857 // A constant is expected for the parameter
|
||||
|
||||
return new <#= vectorType #>
|
||||
{
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
<#= components[i] #> = Unsafe.As<Vector256<ulong>, TLane>(ref v<#= components[i] #>),
|
||||
<# } #>
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
Unsafe.SkipInit(out TLane <#= components[i] #>);
|
||||
var p<#= components[i] #> = (<#= TNumber #>*)&<#= components[i] #>;
|
||||
@@ -186,10 +452,15 @@ public static unsafe partial class MathV
|
||||
|
||||
for (var i = 0; i < TLane.LaneWidth; i++)
|
||||
{
|
||||
var scalarIdx = int.CreateTruncating(indices[i]);
|
||||
if (mask[i] == TNumber.Zero)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var scalerIdx = int.CreateTruncating(indices[i]);
|
||||
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
p<#= components[i] #>[i] = Unsafe.Add(ref baseAddress, scalarIdx + <#= i #> * scale);
|
||||
p<#= components[i] #>[i] = *(TNumber*)((byte*)pData + ((scalerIdx + <#= i #>) * scale));
|
||||
<# } #>
|
||||
}
|
||||
|
||||
@@ -202,10 +473,94 @@ public static unsafe partial class MathV
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static <#= vectorType #> GatherVector<#= dimension #><<#= GenericParameters #>>(ref <#= TNumber #> baseAddress, ref int baseIndex, int scale)
|
||||
<#= TLaneRestrictions #>
|
||||
<#= TNumberRestrictions #>
|
||||
public static <#= vectorType #> MaskGatherVector<#= dimension #><TLane, TNumber>(<#= TNumber #>* pData, int* pIndices, <#= TLane #> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
|
||||
where TLane : unmanaged, ISPMDLane<TLane, TNumber>
|
||||
where TNumber : unmanaged, INumber<TNumber>, IBinaryNumber<TNumber>, IMinMaxValue<TNumber>, IBitwiseOperators<TNumber, TNumber, TNumber>
|
||||
{
|
||||
if (Avx2.IsSupported)
|
||||
{
|
||||
if (TLane.LaneWidth == Vector128<TNumber>.Count)
|
||||
{
|
||||
if (sizeof(TNumber) == sizeof(uint))
|
||||
{
|
||||
var vidx = Vector128.Load(pIndices);
|
||||
ref var vmask = ref Unsafe.As<TLane, Vector128<uint>>(ref mask);
|
||||
|
||||
#pragma warning disable CA1857 // A constant is expected for the parameter
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
var v<#= components[i] #> = Avx2.GatherMaskVector128(Vector128<uint>.Zero, (uint*)(pData + <#= i #>), vidx, vmask, scale);
|
||||
<# } #>
|
||||
#pragma warning restore CA1857 // A constant is expected for the parameter
|
||||
|
||||
return new <#= vectorType #>
|
||||
{
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
<#= components[i] #> = Unsafe.As<Vector128<uint>, TLane>(ref v<#= components[i] #>),
|
||||
<# } #>
|
||||
};
|
||||
}
|
||||
|
||||
if (sizeof(TNumber) == sizeof(ulong))
|
||||
{
|
||||
var vidx = Vector128.Load(pIndices);
|
||||
var vmask = Unsafe.As<TLane, Vector128<ulong>>(ref mask);
|
||||
|
||||
#pragma warning disable CA1857 // A constant is expected for the parameter
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
var v<#= components[i] #> = Avx2.GatherMaskVector128(Vector128<ulong>.Zero, (ulong*)(pData + <#= i #>), vidx, vmask, scale);
|
||||
<# } #>
|
||||
#pragma warning restore CA1857 // A constant is expected for the parameter
|
||||
|
||||
return new <#= vectorType #>
|
||||
{
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
<#= components[i] #> = Unsafe.As<Vector128<ulong>, TLane>(ref v<#= components[i] #>),
|
||||
<# } #>
|
||||
};
|
||||
}
|
||||
}
|
||||
else if (TLane.LaneWidth == Vector256<TNumber>.Count)
|
||||
{
|
||||
if (sizeof(TNumber) == sizeof(uint))
|
||||
{
|
||||
var vidx = Vector256.Load(pIndices);
|
||||
var vmask = Unsafe.As<TLane, Vector256<uint>>(ref mask);
|
||||
|
||||
#pragma warning disable CA1857 // A constant is expected for the parameter
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
var v<#= components[i] #> = Avx2.GatherMaskVector256(Vector256<uint>.Zero, (uint*)(pData + <#= i #>), vidx, vmask, scale);
|
||||
<# } #>
|
||||
#pragma warning restore CA1857 // A constant is expected for the parameter
|
||||
|
||||
return new <#= vectorType #>
|
||||
{
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
<#= components[i] #> = Unsafe.As<Vector256<uint>, TLane>(ref v<#= components[i] #>),
|
||||
<# } #>
|
||||
};
|
||||
}
|
||||
|
||||
if (sizeof(TNumber) == sizeof(ulong))
|
||||
{
|
||||
var vidx = Vector128.Load(pIndices);
|
||||
var vmask = Unsafe.As<TLane, Vector256<ulong>>(ref mask);
|
||||
|
||||
#pragma warning disable CA1857 // A constant is expected for the parameter
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
var v<#= components[i] #> = Avx2.GatherMaskVector256(Vector256<ulong>.Zero, (ulong*)(pData + <#= i #>), vidx, vmask, scale);
|
||||
<# } #>
|
||||
#pragma warning restore CA1857 // A constant is expected for the parameter
|
||||
|
||||
return new <#= vectorType #>
|
||||
{
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
<#= components[i] #> = Unsafe.As<Vector256<ulong>, TLane>(ref v<#= components[i] #>),
|
||||
<# } #>
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
Unsafe.SkipInit(out TLane <#= components[i] #>);
|
||||
var p<#= components[i] #> = (<#= TNumber #>*)&<#= components[i] #>;
|
||||
@@ -213,10 +568,15 @@ public static unsafe partial class MathV
|
||||
|
||||
for (var i = 0; i < TLane.LaneWidth; i++)
|
||||
{
|
||||
var scalarIdx = Unsafe.Add(ref baseIndex, i);
|
||||
if (mask[i] == TNumber.Zero)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var scalerIdx = pIndices[i];
|
||||
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
p<#= components[i] #>[i] = Unsafe.Add(ref baseAddress, scalarIdx + <#= i #> * scale);
|
||||
p<#= components[i] #>[i] = *(TNumber*)((byte*)pData + ((scalerIdx + <#= i #>) * scale));
|
||||
<# } #>
|
||||
}
|
||||
|
||||
@@ -228,6 +588,22 @@ public static unsafe partial class MathV
|
||||
};
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static <#= vectorType #> MaskGatherVector<#= dimension #><<#= GenericParameters #>>(ref <#= TNumber #> baseAddress, <#= TLane #> indices, <#= TLane #> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
|
||||
<#= TLaneRestrictions #>
|
||||
<#= TNumberRestrictions #>
|
||||
{
|
||||
return MaskGatherVector<#= dimension #><<#= GenericParameters #>>((<#= TNumber #>*)Unsafe.AsPointer(ref baseAddress), indices, mask, scale);
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static <#= vectorType #> MaskGatherVector<#= dimension #><<#= GenericParameters #>>(ref <#= TNumber #> baseAddress, ref int baseIndex, <#= TLane #> mask, [ConstantExpected(Min = (byte)(1), Max = (byte)(8))] byte scale)
|
||||
<#= TLaneRestrictions #>
|
||||
<#= TNumberRestrictions #>
|
||||
{
|
||||
return MaskGatherVector<#= dimension #><<#= GenericParameters #>>((<#= TNumber #>*)Unsafe.AsPointer(ref baseAddress), (int*)Unsafe.AsPointer(ref baseIndex), mask, scale);
|
||||
}
|
||||
|
||||
|
||||
// Math Functions
|
||||
|
||||
@@ -501,27 +877,27 @@ public static unsafe partial class MathV
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static <#= vectorType #> Select<<#= GenericParameters #>>(<#= TLane #> condition, in <#= vectorType #> a, in <#= vectorType #> b)
|
||||
public static <#= vectorType #> Select<<#= GenericParameters #>>(<#= TLane #> condition, in <#= vectorType #> isTrue, in <#= vectorType #> isFalse)
|
||||
<#= TLaneRestrictions #>
|
||||
<#= TNumberRestrictions #>
|
||||
{
|
||||
return new <#= vectorType #>
|
||||
{
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
<#= components[i] #> = <#= TLane #>.Select(condition, b.<#= components[i] #>, a.<#= components[i] #>),
|
||||
<#= components[i] #> = <#= TLane #>.Select(condition, isTrue.<#= components[i] #>, isFalse.<#= components[i] #>),
|
||||
<# } #>
|
||||
};
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static <#= vectorType #> Select<<#= GenericParameters #>>(<#= vectorType #> condition, in <#= vectorType #> a, in <#= vectorType #> b)
|
||||
public static <#= vectorType #> Select<<#= GenericParameters #>>(<#= vectorType #> condition, in <#= vectorType #> isTrue, in <#= vectorType #> isFalse)
|
||||
<#= TLaneRestrictions #>
|
||||
<#= TNumberRestrictions #>
|
||||
{
|
||||
return new <#= vectorType #>
|
||||
{
|
||||
<# for (int i = 0; i < dimension; i++) { #>
|
||||
<#= components[i] #> = <#= TLane #>.Select(condition.<#= components[i] #>, b.<#= components[i] #>, a.<#= components[i] #>),
|
||||
<#= components[i] #> = <#= TLane #>.Select(condition.<#= components[i] #>, isTrue.<#= components[i] #>, isFalse.<#= components[i] #>),
|
||||
<# } #>
|
||||
};
|
||||
}
|
||||
|
||||
@@ -83,14 +83,14 @@ public unsafe struct Vector2<TLane, TNumber> : IEquatable<Vector2<TLane, TNumber
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public void Store(TNumber* px, TNumber* py)
|
||||
public void Store(TNumber* px, TNumber* py)
|
||||
{
|
||||
x.Store(px);
|
||||
y.Store(py);
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public void Store(ref TNumber x, ref TNumber y)
|
||||
public void Store(ref TNumber x, ref TNumber y)
|
||||
{
|
||||
this.x.Store(ref x);
|
||||
this.y.Store(ref y);
|
||||
|
||||
@@ -89,7 +89,7 @@ public unsafe struct Vector3<TLane, TNumber> : IEquatable<Vector3<TLane, TNumber
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public void Store(TNumber* px, TNumber* py, TNumber* pz)
|
||||
public void Store(TNumber* px, TNumber* py, TNumber* pz)
|
||||
{
|
||||
x.Store(px);
|
||||
y.Store(py);
|
||||
@@ -97,7 +97,7 @@ public unsafe struct Vector3<TLane, TNumber> : IEquatable<Vector3<TLane, TNumber
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public void Store(ref TNumber x, ref TNumber y, ref TNumber z)
|
||||
public void Store(ref TNumber x, ref TNumber y, ref TNumber z)
|
||||
{
|
||||
this.x.Store(ref x);
|
||||
this.y.Store(ref y);
|
||||
|
||||
@@ -95,7 +95,7 @@ public unsafe struct Vector4<TLane, TNumber> : IEquatable<Vector4<TLane, TNumber
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public void Store(TNumber* px, TNumber* py, TNumber* pz, TNumber* pw)
|
||||
public void Store(TNumber* px, TNumber* py, TNumber* pz, TNumber* pw)
|
||||
{
|
||||
x.Store(px);
|
||||
y.Store(py);
|
||||
@@ -104,7 +104,7 @@ public unsafe struct Vector4<TLane, TNumber> : IEquatable<Vector4<TLane, TNumber
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public void Store(ref TNumber x, ref TNumber y, ref TNumber z, ref TNumber w)
|
||||
public void Store(ref TNumber x, ref TNumber y, ref TNumber z, ref TNumber w)
|
||||
{
|
||||
this.x.Store(ref x);
|
||||
this.y.Store(ref y);
|
||||
|
||||
@@ -114,13 +114,13 @@ public unsafe struct {typeName} : IEquatable<{typeName}>
|
||||
}}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public void Store({ForEachDimension(dimension, 1, ", ", (dim, sb) => sb.Append($"{TNumber}* p{components[dim]}"), false)})
|
||||
public void Store({ForEachDimension(dimension, 1, ",", (dim, sb) => sb.Append($"{TNumber}* p{components[dim]}"), false)})
|
||||
{{
|
||||
{ForEachDimension(dimension, 8, Environment.NewLine, (dim, sb) => sb.Append($"{components[dim]}.Store(p{components[dim]});"))}
|
||||
}}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public void Store({ForEachDimension(dimension, 1, ", ", (dim, sb) => sb.Append($"ref {TNumber} {components[dim]}"), false)})
|
||||
public void Store({ForEachDimension(dimension, 1, ",", (dim, sb) => sb.Append($"ref {TNumber} {components[dim]}"), false)})
|
||||
{{
|
||||
{ForEachDimension(dimension, 8, Environment.NewLine, (dim, sb) => sb.Append($"this.{components[dim]}.Store(ref {components[dim]});"))}
|
||||
}}
|
||||
|
||||
Reference in New Issue
Block a user