feat: implement CPU meshlet baking and update pipeline shaders

This commit is contained in:
2026-03-20 07:53:23 +00:00
parent db0be367ef
commit a35321df89
8 changed files with 257 additions and 17 deletions

View File

@@ -20,8 +20,8 @@
<ItemGroup>
<PackageReference Include="Misaki.HighPerformance" Version="1.0.4" />
<PackageReference Include="Misaki.HighPerformance.Jobs" Version="1.5.2" />
<PackageReference Include="Misaki.HighPerformance.LowLevel" Version="1.5.1">
<PackageReference Include="Misaki.HighPerformance.Jobs" Version="1.5.3" />
<PackageReference Include="Misaki.HighPerformance.LowLevel" Version="1.5.2">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>

View File

@@ -72,4 +72,7 @@ public struct PerObjectData
public uint vertexBuffer;
public float3 worldBoundsMax;
public uint indexBuffer;
public uint meshletBuffer;
public uint meshletVerticesBuffer;
public uint meshletTrianglesBuffer;
};

View File

@@ -6,6 +6,8 @@ using Misaki.HighPerformance.LowLevel.Collections;
using Misaki.HighPerformance.LowLevel.Utilities;
using Misaki.HighPerformance.Mathematics;
using Misaki.HighPerformance.Mathematics.Geometry;
using System.Runtime.InteropServices;
using System.Runtime.CompilerServices;
namespace Ghost.Graphics.Core;
@@ -68,6 +70,8 @@ public struct Mesh : IResourceReleasable
private UnsafeList<uint> _indices;
private MeshletMeshData _meshletData;
public MeshletMeshData MeshletData => _meshletData;
internal bool IsMeshDataDirty
{
get; private set;
@@ -149,6 +153,22 @@ public struct Mesh : IResourceReleasable
get; internal set;
}
/// <summary>
/// Gets the handle to the meshlet vertices buffer on the GPU.
/// </summary>
public Handle<GraphicsBuffer> MeshletVerticesBuffer
{
get; internal set;
}
/// <summary>
/// Gets the handle to the meshlet triangles buffer on the GPU.
/// </summary>
public Handle<GraphicsBuffer> MeshletTrianglesBuffer
{
get; internal set;
}
/// <summary>
/// Gets the handle to the mesh data buffer on the GPU.
/// </summary>
@@ -176,6 +196,92 @@ public struct Mesh : IResourceReleasable
_meshletData.Dispose();
}
public unsafe void CookMeshlets()
{
// 1. Prepare Configuration
var config = new ClodConfig
{
maxVertices = 64,
minTriangles = 32,
maxTriangles = 124,
partitionSize = 128,
clusterSpatial = true,
clusterFillWeight = 1.0f,
clusterSplitFactor = 1.0f,
simplifyRatio = 0.5f,
simplifyThreshold = 0.5f,
simplifyErrorMergePrevious = 0.5f,
simplifyErrorMergeAdditive = 0.5f,
simplifyErrorFactorSloppy = 1.0f,
simplifyErrorEdgeLimit = 1.0f,
optimizeBounds = true,
optimizeClusters = true
};
// 2. Map Mesh to ClodMesh
ClodMesh clodMesh = new ClodMesh
{
vertexPositions = (float*)_vertices.GetUnsafePtr(),
vertexCount = (nuint)_vertices.Count,
vertexPositionsStride = (nuint)sizeof(Vertex),
indices = (uint*)_indices.GetUnsafePtr(),
indexCount = (nuint)_indices.Count,
attributeProtectMask = 0
};
// 3. Build
MeshletUtility.Build(config, clodMesh, Unsafe.AsPointer(ref this), MeshletOutputCallback);
}
private static unsafe int MeshletOutputCallback(void* context, ClodGroup group, ClodCluster* clusters, nuint clusterCount)
{
Mesh* mesh = (Mesh*)context;
ref var data = ref mesh->_meshletData;
// Ensure lists are initialized
if (!data.groups.IsCreated) data.groups = new UnsafeList<MeshletGroup>(16, Allocator.Persistent);
if (!data.meshlets.IsCreated) data.meshlets = new UnsafeList<Meshlet>(64, Allocator.Persistent);
if (!data.meshletVertices.IsCreated) data.meshletVertices = new UnsafeList<uint>(128, Allocator.Persistent);
if (!data.meshletTriangles.IsCreated) data.meshletTriangles = new UnsafeList<byte>(128, Allocator.Persistent);
var meshletGroup = new MeshletGroup
{
meshletStartIndex = (uint)data.meshlets.Count,
meshletCount = (uint)clusterCount,
lodLevel = (uint)group.depth
};
data.groups.Add(meshletGroup);
for (nuint i = 0; i < clusterCount; i++)
{
var cluster = clusters[i];
var meshlet = new Meshlet
{
vertexCount = (byte)cluster.vertexCount,
triangleCount = (byte)(cluster.indexCount / 3),
vertexOffset = (uint)data.meshletVertices.Count,
triangleOffset = (uint)data.meshletTriangles.Count,
groupIndex = (uint)data.groups.Count - 1
};
data.meshlets.Add(meshlet);
// Add indices
for (nuint j = 0; j < cluster.indexCount; j++)
{
data.meshletVertices.Add(cluster.indices[j]);
}
// Add triangles (packed indices or byte offsets)
// Assuming 8-bit local indices for meshlets as per standard convention
for (nuint j = 0; j < cluster.indexCount; j++)
{
data.meshletTriangles.Add((byte)j);
}
}
return 0;
}
public readonly void ReleaseResource(IResourceDatabase database)
{
ReleaseCpuResources();
@@ -183,6 +289,8 @@ public struct Mesh : IResourceReleasable
database.ReleaseResource(VertexBuffer.AsResource());
database.ReleaseResource(IndexBuffer.AsResource());
database.ReleaseResource(MeshLetBuffer.AsResource());
database.ReleaseResource(MeshletVerticesBuffer.AsResource());
database.ReleaseResource(MeshletTrianglesBuffer.AsResource());
database.ReleaseResource(ObjectDataBuffer.AsResource());
}
}

View File

@@ -157,6 +157,68 @@ public readonly unsafe ref struct RenderingContext
}
}
public void UploadMeshlets(Handle<Mesh> mesh)
{
var r = _resourceManager.GetMeshReference(mesh);
if (r.IsFailure) return;
ref var meshRef = ref r.Value;
var meshletData = meshRef.MeshletData;
if (!meshletData.meshlets.IsCreated || meshletData.meshlets.Count == 0) return;
var meshletDesc = new BufferDesc
{
Size = (uint)(meshletData.meshlets.Count * sizeof(Meshlet)),
Stride = (uint)sizeof(Meshlet),
Usage = BufferUsage.Raw | BufferUsage.ShaderResource,
MemoryType = ResourceMemoryType.Default,
};
var verticesDesc = new BufferDesc
{
Size = (uint)(meshletData.meshletVertices.Count * sizeof(uint)),
Stride = sizeof(uint),
Usage = BufferUsage.Raw | BufferUsage.ShaderResource,
MemoryType = ResourceMemoryType.Default,
};
// Ensure size is multiple of 4 for Raw buffer
var trianglesSize = (uint)meshletData.meshletTriangles.Count;
trianglesSize = (trianglesSize + 3u) & ~3u;
var trianglesDesc = new BufferDesc
{
Size = trianglesSize,
Stride = sizeof(byte),
Usage = BufferUsage.Raw | BufferUsage.ShaderResource,
MemoryType = ResourceMemoryType.Default,
};
meshRef.MeshLetBuffer = _engine.ResourceAllocator.CreateBuffer(in meshletDesc, "Meshlets");
meshRef.MeshletVerticesBuffer = _engine.ResourceAllocator.CreateBuffer(in verticesDesc, "MeshletVertices");
meshRef.MeshletTrianglesBuffer = _engine.ResourceAllocator.CreateBuffer(in trianglesDesc, "MeshletTriangles");
TransitionBarrier(meshRef.MeshLetBuffer.AsResource(), false, BarrierLayout.Undefined, BarrierAccess.CopyDest, BarrierSync.Copy);
TransitionBarrier(meshRef.MeshletVerticesBuffer.AsResource(), false, BarrierLayout.Undefined, BarrierAccess.CopyDest, BarrierSync.Copy);
TransitionBarrier(meshRef.MeshletTrianglesBuffer.AsResource(), false, BarrierLayout.Undefined, BarrierAccess.CopyDest, BarrierSync.Copy);
_directCmd.UploadBuffer(meshRef.MeshLetBuffer, meshletData.meshlets.AsSpan());
_directCmd.UploadBuffer(meshRef.MeshletVerticesBuffer, meshletData.meshletVertices.AsSpan());
// Padding for triangle data if needed
if (trianglesSize > meshletData.meshletTriangles.Count)
{
var paddedData = new byte[trianglesSize];
meshletData.meshletTriangles.AsSpan().CopyTo(paddedData);
_directCmd.UploadBuffer(meshRef.MeshletTrianglesBuffer, paddedData.AsSpan());
}
else
{
_directCmd.UploadBuffer(meshRef.MeshletTrianglesBuffer, meshletData.meshletTriangles.AsSpan());
}
TransitionBarrier(meshRef.MeshLetBuffer.AsResource(), false, BarrierLayout.Undefined, BarrierAccess.ShaderResource, BarrierSync.NonPixelShading | BarrierSync.PixelShading);
TransitionBarrier(meshRef.MeshletVerticesBuffer.AsResource(), false, BarrierLayout.Undefined, BarrierAccess.ShaderResource, BarrierSync.NonPixelShading | BarrierSync.PixelShading);
TransitionBarrier(meshRef.MeshletTrianglesBuffer.AsResource(), false, BarrierLayout.Undefined, BarrierAccess.ShaderResource, BarrierSync.NonPixelShading | BarrierSync.PixelShading);
}
public void UpdateObjectData(Handle<Mesh> mesh, float4x4 localToWorld)
{
var r = _resourceManager.GetMeshReference(mesh);
@@ -173,6 +235,9 @@ public readonly unsafe ref struct RenderingContext
worldBoundsMax = meshData.BoundingBox.Max,
vertexBuffer = _engine.ResourceDatabase.GetBindlessIndex(meshData.VertexBuffer.AsResource()),
indexBuffer = _engine.ResourceDatabase.GetBindlessIndex(meshData.IndexBuffer.AsResource()),
meshletBuffer = meshData.MeshLetBuffer.IsInvalid ? 0 : _engine.ResourceDatabase.GetBindlessIndex(meshData.MeshLetBuffer.AsResource()),
meshletVerticesBuffer = meshData.MeshletVerticesBuffer.IsInvalid ? 0 : _engine.ResourceDatabase.GetBindlessIndex(meshData.MeshletVerticesBuffer.AsResource()),
meshletTrianglesBuffer = meshData.MeshletTrianglesBuffer.IsInvalid ? 0 : _engine.ResourceDatabase.GetBindlessIndex(meshData.MeshletTrianglesBuffer.AsResource()),
};
var bufferHandle = meshData.ObjectDataBuffer.AsResource();

View File

@@ -194,6 +194,16 @@ internal class MeshRenderPass : IRenderPass
MeshBuilder.CreateCube(0.75f, default, Misaki.HighPerformance.LowLevel.Buffer.Allocator.Persistent, out var vertices, out var indices);
_mesh = ctx.CreateMesh(vertices, indices, true);
// Cook meshlets for the mesh
var meshRef = ctx.ResourceManager.GetMeshReference(_mesh);
if (meshRef.IsSuccess)
{
meshRef.Value.CookMeshlets();
}
ctx.UploadMeshlets(_mesh);
ctx.UpdateObjectData(_mesh, float4x4.identity);
_textures = new Handle<Texture>[_textureFiles.Length];

View File

@@ -8,30 +8,80 @@ struct PixelInput
float4 uv : TEXCOORD0;
};
[numthreads(3, 1, 1)] // 3 threads per triangle
struct Meshlet
{
float4 boundingSphere;
float3 boundingBoxMin;
float3 boundingBoxMax;
uint vertexOffset;
uint triangleOffset;
uint groupIndex;
float parentError;
uint packedCounts; // byte vertexCount, byte triangleCount, byte localMaterialIndex, byte lodLevel
};
[numthreads(64, 1, 1)] // 64 threads for max 64 vertices and up to 124 triangles
[OUTPUT_TRIANGLE_TOPOLOGY]
void MSMain(
uint3 groupThreadID : SV_GroupThreadID,
uint groupID : SV_GroupID,
out vertices PixelInput outVerts[3],
out indices uint3 outTris[1])
out vertices PixelInput outVerts[64],
out indices uint3 outTris[124])
{
uint vertexId = groupThreadID.x;
PerObjectData perObjectData = LoadData<PerObjectData>(g_PushConstantData.perObjectBuffer, 0);
Vertex v = LoadVertexData(vertexId, groupID.x, perObjectData.vertexBuffer, perObjectData.indexBuffer);
SetMeshOutputCounts(3, 1);
ByteAddressBuffer meshletBuffer = GET_BUFFER(perObjectData.meshletBuffer);
Meshlet m = meshletBuffer.Load<Meshlet>(groupID.x * sizeof(Meshlet));
uint vertexCount = m.packedCounts & 0xFF;
uint triangleCount = (m.packedCounts >> 8) & 0xFF;
SetMeshOutputCounts(vertexCount, triangleCount);
ByteAddressBuffer meshletVerticesBuffer = GET_BUFFER(perObjectData.meshletVerticesBuffer);
ByteAddressBuffer meshletTrianglesBuffer = GET_BUFFER(perObjectData.meshletTrianglesBuffer);
// Write vertex output
outVerts[vertexId].position = v.position;
outVerts[vertexId].color = v.color;
outVerts[vertexId].uv = v.uv;
// Thread 0 defines topology
if (vertexId == 0)
if (groupThreadID.x < vertexCount)
{
outTris[0] = uint3(0, 1, 2);
uint vertexIndex = meshletVerticesBuffer.Load((m.vertexOffset + groupThreadID.x) * 4);
ByteAddressBuffer vertices = GET_BUFFER(perObjectData.vertexBuffer);
Vertex v = vertices.Load<Vertex>(vertexIndex * sizeof(Vertex));
// Basic MVP transform not needed if already in world space, but usually we need localToWorld and ViewProj
PerViewData perViewData = LoadData<PerViewData>(g_PushConstantData.perViewBuffer, 0);
float4 worldPos = mul(perObjectData.localToWorld, float4(v.position.xyz, 1.0f));
outVerts[groupThreadID.x].position = mul(perViewData.viewMatrix, worldPos);
outVerts[groupThreadID.x].position = mul(perViewData.projectionMatrix, outVerts[groupThreadID.x].position);
outVerts[groupThreadID.x].color = v.color;
outVerts[groupThreadID.x].uv = v.uv;
}
// Write triangle output (1 thread processes 1 triangle)
// We could pack 3 indices in a uint or just use byte offset
// In our CPU code, we packed it as individual bytes, so 3 bytes per triangle.
// For 124 triangles, we have 372 bytes.
if (groupThreadID.x < triangleCount)
{
uint triangleIndex = groupThreadID.x;
uint baseOffset = m.triangleOffset + triangleIndex * 3;
// Load 4 bytes to get the 3 index bytes
// Needs byte-aligned loading
uint wordOffset = baseOffset & ~3;
uint shift = (baseOffset & 3) * 8;
uint packedIndices1 = meshletTrianglesBuffer.Load(wordOffset);
uint packedIndices2 = meshletTrianglesBuffer.Load(wordOffset + 4);
uint64_t combined = ((uint64_t)packedIndices2 << 32) | packedIndices1;
uint packedIndices = (uint)(combined >> shift);
uint i0 = packedIndices & 0xFF;
uint i1 = (packedIndices >> 8) & 0xFF;
uint i2 = (packedIndices >> 16) & 0xFF;
outTris[triangleIndex] = uint3(i0, i1, i2);
}
}

View File

@@ -29,6 +29,9 @@ struct PerObjectData
BYTE_ADDRESS_BUFFER vertexBuffer;
float3 worldBoundsMax;
BYTE_ADDRESS_BUFFER indexBuffer;
BYTE_ADDRESS_BUFFER meshletBuffer;
BYTE_ADDRESS_BUFFER meshletVerticesBuffer;
BYTE_ADDRESS_BUFFER meshletTrianglesBuffer;
};
PushConstantData g_PushConstantData : register(b0);