feat(render): add meshlet rendering and ECS query ref API

Introduces meshlet-based rendering pipeline with new HLSL structures and push constant layouts. Refactors meshlet upload/cooking, updates RenderGraphContext for global/view/instance data, and enhances ECS QueryBuilder with ref returns and [UnscopedRef] for fluent chaining. Improves resource management and disposal patterns, updates D3D12 interop for compatibility, and refines test/app infrastructure. Includes dependency updates, bug fixes, and code cleanups.
This commit is contained in:
2026-03-25 20:27:46 +09:00
parent b729ca86f5
commit 447a4e6904
28 changed files with 407 additions and 165 deletions

View File

@@ -194,7 +194,7 @@ public struct Mesh : IResourceReleasable
this.ComputeBounds();
}
public readonly void ReleaseCpuResources()
public void ReleaseCpuResources()
{
_vertices.Dispose();
_indices.Dispose();
@@ -238,7 +238,7 @@ public struct Mesh : IResourceReleasable
MeshletUtility.Build(config, clodMesh, Unsafe.AsPointer(ref this), MeshletOutputCallback);
}
private static unsafe int MeshletOutputCallback(void* context, ClodGroup group, ReadOnlyUnsafeCollection<ClodCluster>clusters)
private static unsafe int MeshletOutputCallback(void* context, ClodGroup group, ReadOnlyUnsafeCollection<ClodCluster> clusters)
{
var mesh = (Mesh*)context;
ref var data = ref mesh->_meshletData;
@@ -277,13 +277,13 @@ public struct Mesh : IResourceReleasable
data.meshletVertices.Add(cluster.uniqueVertices[j]);
}
// Add local triangles (packed into uints)
nuint triangleCount = cluster.localIndexCount / 3;
var triangleCount = cluster.localIndexCount / 3;
for (nuint j = 0; j < triangleCount; j++)
{
uint i0 = cluster.localIndices[j * 3 + 0];
uint i1 = cluster.localIndices[j * 3 + 1];
uint i2 = cluster.localIndices[j * 3 + 2];
uint packedTriangle = i0 | (i1 << 8) | (i2 << 16);
var packedTriangle = i0 | (i1 << 8) | (i2 << 16);
data.meshletTriangles.Add(packedTriangle);
}
}
@@ -291,7 +291,7 @@ public struct Mesh : IResourceReleasable
return 0;
}
public readonly void ReleaseResource(IResourceDatabase database)
public void ReleaseResource(IResourceDatabase database)
{
ReleaseCpuResources();

View File

@@ -38,7 +38,7 @@ public struct RenderList : IDisposable
{
while (_listIndex < _length)
{
if (_itemIndex < _pList[_listIndex].Count)
if (_itemIndex < _pList[_listIndex].Count - 1)
{
_itemIndex++;
return true;

View File

@@ -93,7 +93,7 @@ public readonly unsafe ref struct RenderingContext
return mesh;
}
ref readonly var meshData = ref r.Value;
ref var meshData = ref r.Value;
var vertexHandle = meshData.VertexBuffer.AsResource();
var indexHandle = meshData.IndexBuffer.AsResource();
@@ -105,9 +105,12 @@ public readonly unsafe ref struct RenderingContext
if (staticMesh)
{
meshData.CookMeshlets();
meshData.ReleaseCpuResources();
TransitionBarrier(vertexHandle, false, BarrierLayout.Undefined, BarrierAccess.ShaderResource, BarrierSync.VertexShading);
TransitionBarrier(indexHandle, false, BarrierLayout.Undefined, BarrierAccess.IndexBuffer, BarrierSync.IndexInput);
UploadMeshlets(mesh);
}
return mesh;
@@ -205,17 +208,7 @@ public readonly unsafe ref struct RenderingContext
_directCmd.UploadBuffer(meshRef.MeshLetBuffer, meshletData.meshlets.AsSpan());
_directCmd.UploadBuffer(meshRef.MeshletVerticesBuffer, meshletData.meshletVertices.AsSpan());
// Padding for triangle data if needed
if (trianglesSize > meshletData.meshletTriangles.Count)
{
var paddedData = new uint[trianglesSize];
meshletData.meshletTriangles.AsSpan().CopyTo(paddedData);
_directCmd.UploadBuffer(meshRef.MeshletTrianglesBuffer, paddedData.AsSpan());
}
else
{
_directCmd.UploadBuffer(meshRef.MeshletTrianglesBuffer, meshletData.meshletTriangles.AsSpan());
}
_directCmd.UploadBuffer(meshRef.MeshletTrianglesBuffer, meshletData.meshletTriangles.AsSpan());
TransitionBarrier(meshRef.MeshLetBuffer.AsResource(), false, BarrierLayout.Undefined, BarrierAccess.ShaderResource, BarrierSync.NonPixelShading | BarrierSync.PixelShading);
TransitionBarrier(meshRef.MeshletVerticesBuffer.AsResource(), false, BarrierLayout.Undefined, BarrierAccess.ShaderResource, BarrierSync.NonPixelShading | BarrierSync.PixelShading);

View File

@@ -18,7 +18,7 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Misaki.HighPerformance.Analyzer" Version="1.0.0">
<PackageReference Include="Misaki.HighPerformance.Analyzer" Version="1.1.0">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>

View File

@@ -1,4 +1,5 @@
using Ghost.Core;
#if false
// Obsolete
using Ghost.Core.Graphics;
using Ghost.DSL.ShaderCompiler;
using Ghost.Graphics.Core;
@@ -335,3 +336,4 @@ internal class MeshRenderPass : IRenderPass
}
}
}
#endif

View File

@@ -22,6 +22,9 @@ public interface IRasterRenderContext : IRenderGraphContext
{
int ActiveMeshIndexCount { get; }
void SetGlobalData(uint globalIndex, uint viewIndex);
void SetInstanceIndex(uint instanceIndex);
void SetActiveMaterial(Handle<Material> material);
void SetActiveMaterial(ref readonly Material material);
void SetActiveMesh(Handle<Mesh> mesh);
@@ -58,6 +61,10 @@ internal sealed class RenderGraphContext : IRasterRenderContext, IComputeRenderC
private Handle<GraphicsBuffer> _activePerMeshData;
private int _activeMeshIndexCount;
private uint _activeGlobalIndex;
private uint _activeViewIndex;
private uint _activeInstanceIndex;
public ResourceManager ResourceManager => _resourceManager;
public IResourceDatabase ResourceDatabase => _resourceDatabase;
@@ -221,12 +228,26 @@ internal sealed class RenderGraphContext : IRasterRenderContext, IComputeRenderC
_activeMeshIndexCount = mesh.IndexCount;
}
public void SetGlobalData(uint globalIndex, uint viewIndex)
{
_activeGlobalIndex = globalIndex;
_activeViewIndex = viewIndex;
}
public void SetInstanceIndex(uint instanceIndex)
{
_activeInstanceIndex = instanceIndex;
}
public unsafe void DispatchMesh(uint3 threadGroupCount)
{
// TODO: Global and view constants
// TODO: Global, view, and instance constants
var data = new PushConstantsData
{
globalIndex = _activeGlobalIndex,
viewIndex = _activeViewIndex,
objectIndex = _resourceDatabase.GetBindlessIndex(_activePerMeshData.AsResource()),
instanceIndex = _activeInstanceIndex,
materialIndex = _resourceDatabase.GetBindlessIndex(_activePerMaterialData.AsResource()),
};

View File

@@ -8,18 +8,6 @@ struct PixelInput
float4 uv : TEXCOORD0;
};
struct Meshlet
{
float4 boundingSphere;
float3 boundingBoxMin;
float3 boundingBoxMax;
uint vertexOffset;
uint triangleOffset;
uint groupIndex;
float parentError;
uint packedCounts; // byte vertexCount, byte triangleCount, byte localMaterialIndex, byte lodLevel
};
[numthreads(128, 1, 1)] // 128 threads to cover max 64 vertices and 124 triangles
[outputtopology("triangle")]
void MSMain(
@@ -50,7 +38,9 @@ void MSMain(
// Basic MVP transform not needed if already in world space, but usually we need localToWorld and ViewProj
PerViewData perViewData = LoadData<PerViewData>(g_PushConstantData.perViewBuffer, 0);
float4 worldPos = mul(perObjectData.localToWorld, float4(v.position.xyz, 1.0f));
PerInstanceData perInstanceData = LoadData<PerInstanceData>(g_PushConstantData.perInstanceBuffer, 0);
float4 worldPos = mul(perInstanceData.localToWorld, float4(v.position.xyz, 1.0f));
outVerts[groupThreadID.x].position = mul(perViewData.viewMatrix, worldPos);
outVerts[groupThreadID.x].position = mul(perViewData.projectionMatrix, outVerts[groupThreadID.x].position);
@@ -62,7 +52,7 @@ void MSMain(
if (groupThreadID.x < triangleCount)
{
uint triangleIndex = groupThreadID.x;
// Load the packed 32-bit integer containing the 3 local indices
uint packedIndices = meshletTrianglesBuffer.Load((m.triangleOffset + triangleIndex) * 4);
@@ -76,13 +66,13 @@ void MSMain(
float4 PSMain(PixelInput input) : SV_TARGET
{
PerMaterialData perMaterialData = LoadData<PerMaterialData>(g_PushConstantData.perMaterialBuffer, 0);
float4 color1 = SAMPLE_TEXTURE2D(perMaterialData.texture1, perMaterialData.tex_sampler, input.uv.xy);
float4 color2 = SAMPLE_TEXTURE2D(perMaterialData.texture2, perMaterialData.tex_sampler, input.uv.xy);
float4 color3 = SAMPLE_TEXTURE2D(perMaterialData.texture3, perMaterialData.tex_sampler, input.uv.xy);
float4 color4 = SAMPLE_TEXTURE2D(perMaterialData.texture4, perMaterialData.tex_sampler, input.uv.xy);
float4 blendedColor = (color1 + color2 + color3 + color4) * 0.25f;
return perMaterialData.color * blendedColor + input.color;
// PerMaterialData perMaterialData = LoadData<PerMaterialData>(g_PushConstantData.perMaterialBuffer, 0);
//
// float4 color1 = SAMPLE_TEXTURE2D(perMaterialData.texture1, perMaterialData.tex_sampler, input.uv.xy);
// float4 color2 = SAMPLE_TEXTURE2D(perMaterialData.texture2, perMaterialData.tex_sampler, input.uv.xy);
// float4 color3 = SAMPLE_TEXTURE2D(perMaterialData.texture3, perMaterialData.tex_sampler, input.uv.xy);
// float4 color4 = SAMPLE_TEXTURE2D(perMaterialData.texture4, perMaterialData.tex_sampler, input.uv.xy);
//
// float4 blendedColor = (color1 + color2 + color3 + color4) * 0.25f;
// return perMaterialData.color * blendedColor + input.color;
}

View File

@@ -422,6 +422,7 @@ public class RenderSystem : IDisposable
}
_renderPipeline.Dispose();
_resourceManager.Dispose();
_graphicsEngine.Dispose();
_shutdownEvent.Dispose();

View File

@@ -10,6 +10,18 @@ struct Vertex
float4 color;
};
struct Meshlet
{
float4 boundingSphere;
float3 boundingBoxMin;
float3 boundingBoxMax;
uint vertexOffset;
uint triangleOffset;
uint groupIndex;
float parentError;
uint packedCounts; // byte vertexCount, byte triangleCount, byte localMaterialIndex, byte lodLevel
};
// Resource descriptor heap definitions
#define GLOBAL_TEXTURE2D_HEAP ResourceDescriptorHeap

View File

@@ -5,11 +5,20 @@
struct PushConstantData
{
BYTE_ADDRESS_BUFFER globalBuffer;
BYTE_ADDRESS_BUFFER perViewBuffer;
BYTE_ADDRESS_BUFFER perObjectBuffer;
BYTE_ADDRESS_BUFFER perInstanceBuffer;
BYTE_ADDRESS_BUFFER perMaterialBuffer;
uint globalIndex;
uint viewIndex;
uint objectIndex;
uint instanceIndex;
uint materialIndex;
};
struct GlobalFrameData
{
uint viewBufferIndex;
uint instanceBufferIndex;
uint viewBufferCount;
uint instanceBufferCount;
uint userBufferIndex;
};
struct PerViewData
@@ -23,6 +32,11 @@ struct PerViewData
float4 screenSize; // xy: size, zw: 1/size
};
struct PerInstanceData
{
float4x4 localToWorld;
};
struct PerObjectData
{
float3 worldBoundsMin;

View File

@@ -2,12 +2,12 @@ shader "MyShader/Standard"
{
properties
{
float4 color = { 1, 1, 1, 1 };
tex2d texture1 = { black };
tex2d texture2 = { white };
tex2d texture3 = { grey };
tex2d texture4 = { normal };
sampler tex_sampler;
//float4 color = { 1, 1, 1, 1 };
//tex2d texture1 = { black };
//tex2d texture2 = { white };
//tex2d texture3 = { grey };
//tex2d texture4 = { normal };
//sampler tex_sampler;
}
pass "Forward"
@@ -21,7 +21,96 @@ shader "MyShader/Standard"
color_mask = all;
}
mesh "F:/csharp/GhostEngine/src/Runtime//Ghost.Graphics/RenderPipeline/ShaderCode.hlsl" : "MSMain";
pixel "F:/csharp/GhostEngine/src/Runtime//Ghost.Graphics/RenderPipeline/ShaderCode.hlsl" : "PSMain";
includes
{
"F:/csharp/GhostEngine/src/Runtime/Ghost.Graphics/Shaders/Includes/Properties.hlsl";
}
hlsl
{
struct PixelInput
{
float4 position : SV_POSITION;
float4 color : COLOR;
float4 uv : TEXCOORD0;
};
[numthreads(128, 1, 1)] // 128 threads to cover max 64 vertices and 124 triangles
[outputtopology("triangle")]
void MSMain(
uint3 groupThreadID : SV_GroupThreadID,
uint3 groupID : SV_GroupID,
out vertices PixelInput outVerts[64],
out indices uint3 outTris[124])
{
PerObjectData perObjectData = LoadData<PerObjectData>(g_PushConstantData.objectIndex, 0);
ByteAddressBuffer meshletBuffer = GET_BUFFER(perObjectData.meshletBuffer);
Meshlet m = meshletBuffer.Load<Meshlet>(groupID.x * sizeof(Meshlet));
uint vertexCount = m.packedCounts & 0xFF;
uint triangleCount = (m.packedCounts >> 8) & 0xFF;
SetMeshOutputCounts(vertexCount, triangleCount);
ByteAddressBuffer meshletVerticesBuffer = GET_BUFFER(perObjectData.meshletVerticesBuffer);
ByteAddressBuffer meshletTrianglesBuffer = GET_BUFFER(perObjectData.meshletTrianglesBuffer);
// Write vertex output
if (groupThreadID.x < vertexCount)
{
uint vertexIndex = meshletVerticesBuffer.Load((m.vertexOffset + groupThreadID.x) * 4);
ByteAddressBuffer vertices = GET_BUFFER(perObjectData.vertexBuffer);
Vertex v = vertices.Load<Vertex>(vertexIndex * sizeof(Vertex));
GlobalFrameData globalFrameData = LoadData<GlobalFrameData>(g_PushConstantData.globalIndex, 0);
PerViewData perViewData = LoadData<PerViewData>(g_PushConstantData.viewIndex, 0);
PerInstanceData perInstanceData = LoadData<PerInstanceData>(globalFrameData.instanceBufferIndex, g_PushConstantData.instanceIndex);
float4 worldPos = mul(perInstanceData.localToWorld, float4(v.position.xyz, 1.0f));
float4 viewPos = mul(perViewData.viewMatrix, worldPos);
outVerts[groupThreadID.x].position = mul(perViewData.projectionMatrix, viewPos);
outVerts[groupThreadID.x].color = v.color;
outVerts[groupThreadID.x].uv = v.uv;
}
// Write triangle output (1 thread processes 1 triangle)
if (groupThreadID.x < triangleCount)
{
uint triangleIndex = groupThreadID.x;
// Load the packed 32-bit integer containing the 3 local indices
uint packedIndices = meshletTrianglesBuffer.Load((m.triangleOffset + triangleIndex) * 4);
uint i0 = packedIndices & 0xFF;
uint i1 = (packedIndices >> 8) & 0xFF;
uint i2 = (packedIndices >> 16) & 0xFF;
outTris[triangleIndex] = uint3(i0, i1, i2);
}
}
float4 PSMain(PixelInput input) : SV_TARGET
{
// PerMaterialData perMaterialData = LoadData<PerMaterialData>(g_PushConstantData.materialIndex, 0);
//
// float4 color1 = SAMPLE_TEXTURE2D(perMaterialData.texture1, perMaterialData.tex_sampler, input.uv.xy);
// float4 color2 = SAMPLE_TEXTURE2D(perMaterialData.texture2, perMaterialData.tex_sampler, input.uv.xy);
// float4 color3 = SAMPLE_TEXTURE2D(perMaterialData.texture3, perMaterialData.tex_sampler, input.uv.xy);
// float4 color4 = SAMPLE_TEXTURE2D(perMaterialData.texture4, perMaterialData.tex_sampler, input.uv.xy);
//
// float4 blendedColor = (color1 + color2 + color3 + color4) * 0.25f;
// return perMaterialData.color * blendedColor + input.color;
// TODO: Randome color on meshlet.
return float4(1, 0, 0, 1);
}
}
mesh "hlsl_block" : "MSMain";
pixel "hlsl_block" : "PSMain";
}
}