feat: implement CPU meshlet baking and update pipeline shaders
This commit is contained in:
@@ -20,8 +20,8 @@
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Misaki.HighPerformance" Version="1.0.4" />
|
||||
<PackageReference Include="Misaki.HighPerformance.Jobs" Version="1.5.2" />
|
||||
<PackageReference Include="Misaki.HighPerformance.LowLevel" Version="1.5.1">
|
||||
<PackageReference Include="Misaki.HighPerformance.Jobs" Version="1.5.3" />
|
||||
<PackageReference Include="Misaki.HighPerformance.LowLevel" Version="1.5.2">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
||||
</PackageReference>
|
||||
|
||||
@@ -72,4 +72,7 @@ public struct PerObjectData
|
||||
public uint vertexBuffer;
|
||||
public float3 worldBoundsMax;
|
||||
public uint indexBuffer;
|
||||
public uint meshletBuffer;
|
||||
public uint meshletVerticesBuffer;
|
||||
public uint meshletTrianglesBuffer;
|
||||
};
|
||||
|
||||
@@ -6,6 +6,8 @@ using Misaki.HighPerformance.LowLevel.Collections;
|
||||
using Misaki.HighPerformance.LowLevel.Utilities;
|
||||
using Misaki.HighPerformance.Mathematics;
|
||||
using Misaki.HighPerformance.Mathematics.Geometry;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
namespace Ghost.Graphics.Core;
|
||||
|
||||
@@ -68,6 +70,8 @@ public struct Mesh : IResourceReleasable
|
||||
private UnsafeList<uint> _indices;
|
||||
private MeshletMeshData _meshletData;
|
||||
|
||||
public MeshletMeshData MeshletData => _meshletData;
|
||||
|
||||
internal bool IsMeshDataDirty
|
||||
{
|
||||
get; private set;
|
||||
@@ -149,6 +153,22 @@ public struct Mesh : IResourceReleasable
|
||||
get; internal set;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the handle to the meshlet vertices buffer on the GPU.
|
||||
/// </summary>
|
||||
public Handle<GraphicsBuffer> MeshletVerticesBuffer
|
||||
{
|
||||
get; internal set;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the handle to the meshlet triangles buffer on the GPU.
|
||||
/// </summary>
|
||||
public Handle<GraphicsBuffer> MeshletTrianglesBuffer
|
||||
{
|
||||
get; internal set;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the handle to the mesh data buffer on the GPU.
|
||||
/// </summary>
|
||||
@@ -176,6 +196,92 @@ public struct Mesh : IResourceReleasable
|
||||
_meshletData.Dispose();
|
||||
}
|
||||
|
||||
public unsafe void CookMeshlets()
|
||||
{
|
||||
// 1. Prepare Configuration
|
||||
var config = new ClodConfig
|
||||
{
|
||||
maxVertices = 64,
|
||||
minTriangles = 32,
|
||||
maxTriangles = 124,
|
||||
partitionSize = 128,
|
||||
clusterSpatial = true,
|
||||
clusterFillWeight = 1.0f,
|
||||
clusterSplitFactor = 1.0f,
|
||||
simplifyRatio = 0.5f,
|
||||
simplifyThreshold = 0.5f,
|
||||
simplifyErrorMergePrevious = 0.5f,
|
||||
simplifyErrorMergeAdditive = 0.5f,
|
||||
simplifyErrorFactorSloppy = 1.0f,
|
||||
simplifyErrorEdgeLimit = 1.0f,
|
||||
optimizeBounds = true,
|
||||
optimizeClusters = true
|
||||
};
|
||||
|
||||
// 2. Map Mesh to ClodMesh
|
||||
ClodMesh clodMesh = new ClodMesh
|
||||
{
|
||||
vertexPositions = (float*)_vertices.GetUnsafePtr(),
|
||||
vertexCount = (nuint)_vertices.Count,
|
||||
vertexPositionsStride = (nuint)sizeof(Vertex),
|
||||
indices = (uint*)_indices.GetUnsafePtr(),
|
||||
indexCount = (nuint)_indices.Count,
|
||||
attributeProtectMask = 0
|
||||
};
|
||||
|
||||
// 3. Build
|
||||
MeshletUtility.Build(config, clodMesh, Unsafe.AsPointer(ref this), MeshletOutputCallback);
|
||||
}
|
||||
|
||||
private static unsafe int MeshletOutputCallback(void* context, ClodGroup group, ClodCluster* clusters, nuint clusterCount)
|
||||
{
|
||||
Mesh* mesh = (Mesh*)context;
|
||||
ref var data = ref mesh->_meshletData;
|
||||
|
||||
// Ensure lists are initialized
|
||||
if (!data.groups.IsCreated) data.groups = new UnsafeList<MeshletGroup>(16, Allocator.Persistent);
|
||||
if (!data.meshlets.IsCreated) data.meshlets = new UnsafeList<Meshlet>(64, Allocator.Persistent);
|
||||
if (!data.meshletVertices.IsCreated) data.meshletVertices = new UnsafeList<uint>(128, Allocator.Persistent);
|
||||
if (!data.meshletTriangles.IsCreated) data.meshletTriangles = new UnsafeList<byte>(128, Allocator.Persistent);
|
||||
|
||||
var meshletGroup = new MeshletGroup
|
||||
{
|
||||
meshletStartIndex = (uint)data.meshlets.Count,
|
||||
meshletCount = (uint)clusterCount,
|
||||
lodLevel = (uint)group.depth
|
||||
};
|
||||
data.groups.Add(meshletGroup);
|
||||
|
||||
for (nuint i = 0; i < clusterCount; i++)
|
||||
{
|
||||
var cluster = clusters[i];
|
||||
|
||||
var meshlet = new Meshlet
|
||||
{
|
||||
vertexCount = (byte)cluster.vertexCount,
|
||||
triangleCount = (byte)(cluster.indexCount / 3),
|
||||
vertexOffset = (uint)data.meshletVertices.Count,
|
||||
triangleOffset = (uint)data.meshletTriangles.Count,
|
||||
groupIndex = (uint)data.groups.Count - 1
|
||||
};
|
||||
data.meshlets.Add(meshlet);
|
||||
|
||||
// Add indices
|
||||
for (nuint j = 0; j < cluster.indexCount; j++)
|
||||
{
|
||||
data.meshletVertices.Add(cluster.indices[j]);
|
||||
}
|
||||
// Add triangles (packed indices or byte offsets)
|
||||
// Assuming 8-bit local indices for meshlets as per standard convention
|
||||
for (nuint j = 0; j < cluster.indexCount; j++)
|
||||
{
|
||||
data.meshletTriangles.Add((byte)j);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
public readonly void ReleaseResource(IResourceDatabase database)
|
||||
{
|
||||
ReleaseCpuResources();
|
||||
@@ -183,6 +289,8 @@ public struct Mesh : IResourceReleasable
|
||||
database.ReleaseResource(VertexBuffer.AsResource());
|
||||
database.ReleaseResource(IndexBuffer.AsResource());
|
||||
database.ReleaseResource(MeshLetBuffer.AsResource());
|
||||
database.ReleaseResource(MeshletVerticesBuffer.AsResource());
|
||||
database.ReleaseResource(MeshletTrianglesBuffer.AsResource());
|
||||
database.ReleaseResource(ObjectDataBuffer.AsResource());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -157,6 +157,68 @@ public readonly unsafe ref struct RenderingContext
|
||||
}
|
||||
}
|
||||
|
||||
public void UploadMeshlets(Handle<Mesh> mesh)
|
||||
{
|
||||
var r = _resourceManager.GetMeshReference(mesh);
|
||||
if (r.IsFailure) return;
|
||||
|
||||
ref var meshRef = ref r.Value;
|
||||
var meshletData = meshRef.MeshletData;
|
||||
|
||||
if (!meshletData.meshlets.IsCreated || meshletData.meshlets.Count == 0) return;
|
||||
|
||||
var meshletDesc = new BufferDesc
|
||||
{
|
||||
Size = (uint)(meshletData.meshlets.Count * sizeof(Meshlet)),
|
||||
Stride = (uint)sizeof(Meshlet),
|
||||
Usage = BufferUsage.Raw | BufferUsage.ShaderResource,
|
||||
MemoryType = ResourceMemoryType.Default,
|
||||
};
|
||||
var verticesDesc = new BufferDesc
|
||||
{
|
||||
Size = (uint)(meshletData.meshletVertices.Count * sizeof(uint)),
|
||||
Stride = sizeof(uint),
|
||||
Usage = BufferUsage.Raw | BufferUsage.ShaderResource,
|
||||
MemoryType = ResourceMemoryType.Default,
|
||||
};
|
||||
// Ensure size is multiple of 4 for Raw buffer
|
||||
var trianglesSize = (uint)meshletData.meshletTriangles.Count;
|
||||
trianglesSize = (trianglesSize + 3u) & ~3u;
|
||||
var trianglesDesc = new BufferDesc
|
||||
{
|
||||
Size = trianglesSize,
|
||||
Stride = sizeof(byte),
|
||||
Usage = BufferUsage.Raw | BufferUsage.ShaderResource,
|
||||
MemoryType = ResourceMemoryType.Default,
|
||||
};
|
||||
|
||||
meshRef.MeshLetBuffer = _engine.ResourceAllocator.CreateBuffer(in meshletDesc, "Meshlets");
|
||||
meshRef.MeshletVerticesBuffer = _engine.ResourceAllocator.CreateBuffer(in verticesDesc, "MeshletVertices");
|
||||
meshRef.MeshletTrianglesBuffer = _engine.ResourceAllocator.CreateBuffer(in trianglesDesc, "MeshletTriangles");
|
||||
|
||||
TransitionBarrier(meshRef.MeshLetBuffer.AsResource(), false, BarrierLayout.Undefined, BarrierAccess.CopyDest, BarrierSync.Copy);
|
||||
TransitionBarrier(meshRef.MeshletVerticesBuffer.AsResource(), false, BarrierLayout.Undefined, BarrierAccess.CopyDest, BarrierSync.Copy);
|
||||
TransitionBarrier(meshRef.MeshletTrianglesBuffer.AsResource(), false, BarrierLayout.Undefined, BarrierAccess.CopyDest, BarrierSync.Copy);
|
||||
|
||||
_directCmd.UploadBuffer(meshRef.MeshLetBuffer, meshletData.meshlets.AsSpan());
|
||||
_directCmd.UploadBuffer(meshRef.MeshletVerticesBuffer, meshletData.meshletVertices.AsSpan());
|
||||
// Padding for triangle data if needed
|
||||
if (trianglesSize > meshletData.meshletTriangles.Count)
|
||||
{
|
||||
var paddedData = new byte[trianglesSize];
|
||||
meshletData.meshletTriangles.AsSpan().CopyTo(paddedData);
|
||||
_directCmd.UploadBuffer(meshRef.MeshletTrianglesBuffer, paddedData.AsSpan());
|
||||
}
|
||||
else
|
||||
{
|
||||
_directCmd.UploadBuffer(meshRef.MeshletTrianglesBuffer, meshletData.meshletTriangles.AsSpan());
|
||||
}
|
||||
|
||||
TransitionBarrier(meshRef.MeshLetBuffer.AsResource(), false, BarrierLayout.Undefined, BarrierAccess.ShaderResource, BarrierSync.NonPixelShading | BarrierSync.PixelShading);
|
||||
TransitionBarrier(meshRef.MeshletVerticesBuffer.AsResource(), false, BarrierLayout.Undefined, BarrierAccess.ShaderResource, BarrierSync.NonPixelShading | BarrierSync.PixelShading);
|
||||
TransitionBarrier(meshRef.MeshletTrianglesBuffer.AsResource(), false, BarrierLayout.Undefined, BarrierAccess.ShaderResource, BarrierSync.NonPixelShading | BarrierSync.PixelShading);
|
||||
}
|
||||
|
||||
public void UpdateObjectData(Handle<Mesh> mesh, float4x4 localToWorld)
|
||||
{
|
||||
var r = _resourceManager.GetMeshReference(mesh);
|
||||
@@ -173,6 +235,9 @@ public readonly unsafe ref struct RenderingContext
|
||||
worldBoundsMax = meshData.BoundingBox.Max,
|
||||
vertexBuffer = _engine.ResourceDatabase.GetBindlessIndex(meshData.VertexBuffer.AsResource()),
|
||||
indexBuffer = _engine.ResourceDatabase.GetBindlessIndex(meshData.IndexBuffer.AsResource()),
|
||||
meshletBuffer = meshData.MeshLetBuffer.IsInvalid ? 0 : _engine.ResourceDatabase.GetBindlessIndex(meshData.MeshLetBuffer.AsResource()),
|
||||
meshletVerticesBuffer = meshData.MeshletVerticesBuffer.IsInvalid ? 0 : _engine.ResourceDatabase.GetBindlessIndex(meshData.MeshletVerticesBuffer.AsResource()),
|
||||
meshletTrianglesBuffer = meshData.MeshletTrianglesBuffer.IsInvalid ? 0 : _engine.ResourceDatabase.GetBindlessIndex(meshData.MeshletTrianglesBuffer.AsResource()),
|
||||
};
|
||||
|
||||
var bufferHandle = meshData.ObjectDataBuffer.AsResource();
|
||||
|
||||
@@ -194,6 +194,16 @@ internal class MeshRenderPass : IRenderPass
|
||||
MeshBuilder.CreateCube(0.75f, default, Misaki.HighPerformance.LowLevel.Buffer.Allocator.Persistent, out var vertices, out var indices);
|
||||
|
||||
_mesh = ctx.CreateMesh(vertices, indices, true);
|
||||
|
||||
// Cook meshlets for the mesh
|
||||
var meshRef = ctx.ResourceManager.GetMeshReference(_mesh);
|
||||
if (meshRef.IsSuccess)
|
||||
{
|
||||
meshRef.Value.CookMeshlets();
|
||||
}
|
||||
|
||||
ctx.UploadMeshlets(_mesh);
|
||||
|
||||
ctx.UpdateObjectData(_mesh, float4x4.identity);
|
||||
|
||||
_textures = new Handle<Texture>[_textureFiles.Length];
|
||||
|
||||
@@ -8,30 +8,80 @@ struct PixelInput
|
||||
float4 uv : TEXCOORD0;
|
||||
};
|
||||
|
||||
[numthreads(3, 1, 1)] // 3 threads per triangle
|
||||
struct Meshlet
|
||||
{
|
||||
float4 boundingSphere;
|
||||
float3 boundingBoxMin;
|
||||
float3 boundingBoxMax;
|
||||
uint vertexOffset;
|
||||
uint triangleOffset;
|
||||
uint groupIndex;
|
||||
float parentError;
|
||||
uint packedCounts; // byte vertexCount, byte triangleCount, byte localMaterialIndex, byte lodLevel
|
||||
};
|
||||
|
||||
[numthreads(64, 1, 1)] // 64 threads for max 64 vertices and up to 124 triangles
|
||||
[OUTPUT_TRIANGLE_TOPOLOGY]
|
||||
void MSMain(
|
||||
uint3 groupThreadID : SV_GroupThreadID,
|
||||
uint groupID : SV_GroupID,
|
||||
out vertices PixelInput outVerts[3],
|
||||
out indices uint3 outTris[1])
|
||||
out vertices PixelInput outVerts[64],
|
||||
out indices uint3 outTris[124])
|
||||
{
|
||||
uint vertexId = groupThreadID.x;
|
||||
|
||||
PerObjectData perObjectData = LoadData<PerObjectData>(g_PushConstantData.perObjectBuffer, 0);
|
||||
Vertex v = LoadVertexData(vertexId, groupID.x, perObjectData.vertexBuffer, perObjectData.indexBuffer);
|
||||
|
||||
SetMeshOutputCounts(3, 1);
|
||||
ByteAddressBuffer meshletBuffer = GET_BUFFER(perObjectData.meshletBuffer);
|
||||
Meshlet m = meshletBuffer.Load<Meshlet>(groupID.x * sizeof(Meshlet));
|
||||
|
||||
uint vertexCount = m.packedCounts & 0xFF;
|
||||
uint triangleCount = (m.packedCounts >> 8) & 0xFF;
|
||||
|
||||
SetMeshOutputCounts(vertexCount, triangleCount);
|
||||
|
||||
ByteAddressBuffer meshletVerticesBuffer = GET_BUFFER(perObjectData.meshletVerticesBuffer);
|
||||
ByteAddressBuffer meshletTrianglesBuffer = GET_BUFFER(perObjectData.meshletTrianglesBuffer);
|
||||
|
||||
// Write vertex output
|
||||
outVerts[vertexId].position = v.position;
|
||||
outVerts[vertexId].color = v.color;
|
||||
outVerts[vertexId].uv = v.uv;
|
||||
|
||||
// Thread 0 defines topology
|
||||
if (vertexId == 0)
|
||||
if (groupThreadID.x < vertexCount)
|
||||
{
|
||||
outTris[0] = uint3(0, 1, 2);
|
||||
uint vertexIndex = meshletVerticesBuffer.Load((m.vertexOffset + groupThreadID.x) * 4);
|
||||
ByteAddressBuffer vertices = GET_BUFFER(perObjectData.vertexBuffer);
|
||||
Vertex v = vertices.Load<Vertex>(vertexIndex * sizeof(Vertex));
|
||||
|
||||
// Basic MVP transform not needed if already in world space, but usually we need localToWorld and ViewProj
|
||||
PerViewData perViewData = LoadData<PerViewData>(g_PushConstantData.perViewBuffer, 0);
|
||||
float4 worldPos = mul(perObjectData.localToWorld, float4(v.position.xyz, 1.0f));
|
||||
outVerts[groupThreadID.x].position = mul(perViewData.viewMatrix, worldPos);
|
||||
outVerts[groupThreadID.x].position = mul(perViewData.projectionMatrix, outVerts[groupThreadID.x].position);
|
||||
|
||||
outVerts[groupThreadID.x].color = v.color;
|
||||
outVerts[groupThreadID.x].uv = v.uv;
|
||||
}
|
||||
|
||||
// Write triangle output (1 thread processes 1 triangle)
|
||||
// We could pack 3 indices in a uint or just use byte offset
|
||||
// In our CPU code, we packed it as individual bytes, so 3 bytes per triangle.
|
||||
// For 124 triangles, we have 372 bytes.
|
||||
if (groupThreadID.x < triangleCount)
|
||||
{
|
||||
uint triangleIndex = groupThreadID.x;
|
||||
uint baseOffset = m.triangleOffset + triangleIndex * 3;
|
||||
|
||||
// Load 4 bytes to get the 3 index bytes
|
||||
// Needs byte-aligned loading
|
||||
uint wordOffset = baseOffset & ~3;
|
||||
uint shift = (baseOffset & 3) * 8;
|
||||
uint packedIndices1 = meshletTrianglesBuffer.Load(wordOffset);
|
||||
uint packedIndices2 = meshletTrianglesBuffer.Load(wordOffset + 4);
|
||||
|
||||
uint64_t combined = ((uint64_t)packedIndices2 << 32) | packedIndices1;
|
||||
uint packedIndices = (uint)(combined >> shift);
|
||||
|
||||
uint i0 = packedIndices & 0xFF;
|
||||
uint i1 = (packedIndices >> 8) & 0xFF;
|
||||
uint i2 = (packedIndices >> 16) & 0xFF;
|
||||
|
||||
outTris[triangleIndex] = uint3(i0, i1, i2);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -29,6 +29,9 @@ struct PerObjectData
|
||||
BYTE_ADDRESS_BUFFER vertexBuffer;
|
||||
float3 worldBoundsMax;
|
||||
BYTE_ADDRESS_BUFFER indexBuffer;
|
||||
BYTE_ADDRESS_BUFFER meshletBuffer;
|
||||
BYTE_ADDRESS_BUFFER meshletVerticesBuffer;
|
||||
BYTE_ADDRESS_BUFFER meshletTrianglesBuffer;
|
||||
};
|
||||
|
||||
PushConstantData g_PushConstantData : register(b0);
|
||||
|
||||
Reference in New Issue
Block a user