feat(meshlet): refactor meshlet pipeline & add benchmark

Refactor meshlet build pipeline for robustness and performance.
Rename DxcShaderCompiler to DXCShaderCompiler. Enhance meshlet
data structures with bounds and LOD info. Add fallback mesh
simplification. Remove obsolete MeshRenderPass. Add
MeshoptBenchmark for meshlet build performance. Update mesh
import utilities for correct handedness. Minor bug fixes and
code cleanups.
This commit is contained in:
2026-04-02 17:50:44 +09:00
parent e32a24739d
commit d03eb659fa
12 changed files with 465 additions and 408 deletions

View File

@@ -66,7 +66,7 @@ internal class D3D12GraphicsEngine : IGraphicsEngine
private readonly D3D12DebugLayer _debugLayer;
#endif
private readonly D3D12RenderDevice _device;
private readonly DxcShaderCompiler _shaderCompiler;
private readonly DXCShaderCompiler _shaderCompiler;
private readonly D3D12DescriptorAllocator _descriptorAllocator;
private readonly D3D12ResourceDatabase _resourceDatabase;
private readonly D3D12PipelineLibrary _pipelineLibrary;
@@ -92,7 +92,7 @@ internal class D3D12GraphicsEngine : IGraphicsEngine
_debugLayer = new D3D12DebugLayer();
#endif
_device = new D3D12RenderDevice();
_shaderCompiler = new DxcShaderCompiler();
_shaderCompiler = new DXCShaderCompiler();
_descriptorAllocator = new D3D12DescriptorAllocator(_device);
_resourceDatabase = new D3D12ResourceDatabase(_descriptorAllocator);

View File

@@ -14,7 +14,7 @@ using static TerraFX.Interop.DirectX.DXC;
namespace Ghost.Graphics.Core;
internal sealed partial class DxcShaderCompiler
internal sealed partial class DXCShaderCompiler
{
private static string GetProfileString(ShaderStage stage, CompilerTier version)
{
@@ -149,7 +149,7 @@ internal sealed partial class DxcShaderCompiler
}
}
internal sealed unsafe partial class DxcShaderCompiler : IShaderCompiler
internal sealed unsafe partial class DXCShaderCompiler : IShaderCompiler
{
private UniquePtr<IDxcCompiler3> _compiler;
private UniquePtr<IDxcUtils> _utils;
@@ -159,7 +159,7 @@ internal sealed unsafe partial class DxcShaderCompiler : IShaderCompiler
private bool _disposed;
public DxcShaderCompiler()
public DXCShaderCompiler()
{
// Initialize DXC _compiler.Get() and _utils.Get()
var dxccID = CLSID.CLSID_DxcCompiler;
@@ -176,7 +176,7 @@ internal sealed unsafe partial class DxcShaderCompiler : IShaderCompiler
_compiledResults = new Dictionary<Key64<ShaderVariant>, GraphicsCompiledResult>();
}
~DxcShaderCompiler()
~DXCShaderCompiler()
{
Dispose();
}

View File

@@ -204,17 +204,51 @@ public struct Mesh : IResourceReleasable
public unsafe void CookMeshlets()
{
if (_meshletData.meshlets.IsCreated)
{
_meshletData.meshlets.Dispose();
}
if (_meshletData.groups.IsCreated)
{
_meshletData.groups.Dispose();
}
if (_meshletData.hierarchyNodes.IsCreated)
{
_meshletData.hierarchyNodes.Dispose();
}
if (_meshletData.meshletVertices.IsCreated)
{
_meshletData.meshletVertices.Dispose();
}
if (_meshletData.meshletTriangles.IsCreated)
{
_meshletData.meshletTriangles.Dispose();
}
_meshletData.meshletCount = 0;
_meshletData.lodLevelCount = 0;
_meshletData.materialSlotCount = 0;
// 1. Prepare Configuration
var config = new ClodConfig
{
maxVertices = 64,
minTriangles = 32,
maxTriangles = 124,
partitionSpatial = true,
partitionSize = 16,
clusterSpatial = false,
clusterSplitFactor = 2.0f,
clusterFillWeight = 1.0f,
optimizeClusters = true,
optimizeClustersLevel = 1,
simplifyRatio = 0.5f,
simplifyThreshold = 0.85f,
simplifyErrorMergePrevious = 1.0f,
@@ -222,25 +256,38 @@ public struct Mesh : IResourceReleasable
simplifyPermissive = true,
simplifyFallbackPermissive = false,
simplifyFallbackSloppy = true,
//optimizeBounds = true,
//optimizeClusters = true
};
// 2. Map Mesh to ClodMesh
var clodMesh = new ClodMesh
{
vertexPositions = (float*)_vertices.GetUnsafePtr(),
vertexPositions = (float*)Unsafe.AsPointer(ref _vertices[0].position),
vertexCount = (nuint)_vertices.Count,
vertexPositionsStride = (nuint)sizeof(Vertex),
vertexAttributes = (float*)Unsafe.AsPointer(ref _vertices[0].normal),
vertexAttributesStride = (nuint)sizeof(Vertex),
indices = (uint*)_indices.GetUnsafePtr(),
indexCount = (nuint)_indices.Count,
attributeProtectMask = 0
attributeProtectMask = 0,
};
// 3. Build
MeshletUtility.Build(config, clodMesh, Unsafe.AsPointer(ref this), MeshletOutputCallback);
MeshletUtility.Build(in config, in clodMesh, Unsafe.AsPointer(ref this), MeshletOutputCallback);
_meshletData.meshletCount = _meshletData.meshlets.Count;
_meshletData.meshletCount = _meshletData.meshlets.IsCreated ? _meshletData.meshlets.Count : 0;
if (_meshletData.groups.IsCreated && _meshletData.groups.Count > 0)
{
var maxLodLevel = 0u;
for (var i = 0; i < _meshletData.groups.Count; i++)
{
maxLodLevel = Math.Max(maxLodLevel, _meshletData.groups[i].lodLevel);
}
_meshletData.lodLevelCount = (int)maxLodLevel + 1;
}
_meshletData.materialSlotCount = 1;
}
private static unsafe int MeshletOutputCallback(void* context, ClodGroup group, ReadOnlyUnsafeCollection<ClodCluster> clusters)
@@ -257,6 +304,9 @@ public struct Mesh : IResourceReleasable
var meshletGroup = new MeshletGroup
{
boundingSphere = new SphereBounds(group.simplified.center, group.simplified.radius),
boundingBox = new AABB(group.simplified.center - group.simplified.radius, group.simplified.center + group.simplified.radius),
parentError = group.simplified.error,
meshletStartIndex = (uint)data.meshlets.Count,
meshletCount = (uint)clusters.Count,
lodLevel = (uint)group.depth
@@ -269,11 +319,16 @@ public struct Mesh : IResourceReleasable
var meshlet = new Meshlet
{
boundingSphere = new SphereBounds(cluster.bounds.center, cluster.bounds.radius),
boundingBox = new AABB(cluster.bounds.center - cluster.bounds.radius, cluster.bounds.center + cluster.bounds.radius),
vertexCount = (byte)cluster.vertexCount,
triangleCount = (byte)(cluster.localIndexCount / 3),
vertexOffset = (uint)data.meshletVertices.Count,
triangleOffset = (uint)data.meshletTriangles.Count,
groupIndex = (uint)data.groups.Count - 1
groupIndex = (uint)data.groups.Count - 1,
parentError = cluster.bounds.error,
localMaterialIndex = 0, // TODO: support multiple materials
lodLevel = (byte)group.depth,
};
data.meshlets.Add(meshlet);

View File

@@ -1,339 +0,0 @@
#if false
// Obsolete
using Ghost.Core.Graphics;
using Ghost.DSL.ShaderCompiler;
using Ghost.Graphics.Core;
using Ghost.Graphics.Core.Contracts;
using Ghost.Graphics.RenderGraphModule;
using Ghost.Graphics.RHI;
using Ghost.Graphics.Utilities;
using Misaki.HighPerformance.Image;
using Misaki.HighPerformance.Mathematics;
using Misaki.HighPerformance.Utilities;
using System.Runtime.InteropServices;
namespace Ghost.Graphics;
internal class MeshRenderPassData
{
public Handle<Mesh> mesh;
public Handle<Material> material;
public Identifier<RGTexture> renderTarget;
}
internal class BlitPassData
{
public Identifier<RGTexture> source;
public Identifier<RGTexture> destination;
public Handle<Material> blitMaterial;
public Identifier<Sampler> sampler;
}
/// <summary>
/// Simplified bindless mesh render pass using high-level bindless APIs with fully bindless vertex/index buffer access
/// </summary>
internal class MeshRenderPass : IRenderPass
{
[StructLayout(LayoutKind.Sequential)]
private struct ShaderProperties_MyShader_Standard
{
public float4 color;
public uint texture1;
public uint texture2;
public uint texture3;
public uint texture4;
public uint tex_sampler;
private readonly uint _padding1;
private readonly uint _padding2;
private readonly uint _padding3;
}
[StructLayout(LayoutKind.Sequential)]
private struct ShaderProperties_Hidden_Blit
{
public uint mainTex;
public uint sampler_mainTex;
private readonly uint _padding1;
private readonly uint _padding2;
}
private Handle<Mesh> _mesh;
private Identifier<Shader> _shader;
private Handle<Material> _material;
private Handle<Texture>[]? _textures;
private Identifier<Sampler> _sampler;
private Identifier<Shader> _blitShader;
private Handle<Material> _blitMaterial;
// Texture file paths for this demo
private readonly string[] _textureFiles = [
"C:/Users/Misaki/Downloads/Im/Icon.png",
"C:/Users/Misaki/Downloads/Im/Backdrop.jpg",
"C:/Users/Misaki/Downloads/Im/101167591_p0.png",
"C:/Users/Misaki/Downloads/Im/yande.re 1134666 blue_archive nakamasa_ichika sugarhigh.jpg"
];
private static IEnumerable<ReadOnlyMemory<string>> GetAllVariantCombination(KeywordsGroup[] keywordsGroups)
{
if (keywordsGroups.Length == 0)
{
yield return ReadOnlyMemory<string>.Empty;
yield break;
}
var firstGroup = keywordsGroups[0];
var remainingGroups = keywordsGroups[1..];
foreach (var combination in GetAllVariantCombination(remainingGroups))
{
yield return combination;
}
foreach (var keyword in firstGroup.keywords)
{
foreach (var combination in GetAllVariantCombination(remainingGroups))
{
var array = new string[combination.Length + 1];
array[0] = keyword;
combination.Span.CopyTo(array.AsSpan(1));
yield return array;
}
}
}
private void CompileBlitShader(ref readonly RenderingContext ctx)
{
var shaderDescriptor = DSLShaderCompiler.CompileShader("F:/csharp/GhostEngine/src/Runtime/Ghost.Graphics/Shaders/Blit.gshdr", "C:/Users/Misaki/Downloads/Archive").GetValueOrThrow();
_blitShader = ctx.ResourceManager.CreateGraphicsShader(shaderDescriptor);
_blitMaterial = ctx.ResourceManager.CreateMaterial(_blitShader);
var config = new ShaderCompilationConfig
{
optimizeLevel = CompilerOptimizeLevel.O3,
options = CompilerOption.KeepReflections,
tier = CompilerTier.Tier2
};
var pass = shaderDescriptor.passes[0];
var emptyKeywords = new LocalKeywordSet();
var variantKey = RHIUtility.CreateShaderVariantKey(
RHIUtility.CreateShaderPassKey(pass.identifier),
in emptyKeywords);
ctx.ShaderCompiler.CompilePass(in pass, in config, variantKey).GetValueOrThrow();
}
public void Initialize(ref readonly RenderingContext ctx)
{
CompileBlitShader(in ctx);
var shaderDescriptor = DSLShaderCompiler.CompileShader("F:/csharp/GhostEngine/src/Runtime/Ghost.Graphics/test.gshdr", "C:/Users/Misaki/Downloads/Archive").GetValueOrThrow();
_shader = ctx.ResourceManager.CreateGraphicsShader(shaderDescriptor);
_material = ctx.ResourceManager.CreateMaterial(_shader);
for (var i = 0; i < shaderDescriptor.passes.Length; i++)
{
ref var pass = ref shaderDescriptor.passes[i];
var config = new ShaderCompilationConfig
{
optimizeLevel = CompilerOptimizeLevel.O3,
options = CompilerOption.KeepReflections,
tier = CompilerTier.Tier2
};
// TODO: Ideally, in editor mode, we compile a single variant when it's needed during rendering. Before the compilation is done, we fallback to a special "compilation in progress" shader.
// During the build process, we can precompile all the variants and store them in the cache for fast loading in runtime.
// After the compilation, we should store the compiled result in the disk cache even in editor mode. This allows us to avoid recompiling the same variant, same code hash and same version) multiple times.
if (pass.keywords.Length == 0)
{
var emptyKeywords = new LocalKeywordSet();
var variantKey = RHIUtility.CreateShaderVariantKey(
RHIUtility.CreateShaderPassKey(pass.identifier),
in emptyKeywords);
ctx.ShaderCompiler.CompilePass(in pass, in config, variantKey).GetValueOrThrow();
}
else
{
var shaderResult = ctx.ResourceManager.GetShaderReference(_shader);
if (shaderResult.IsFailure)
{
throw new InvalidOperationException("Failed to get shader reference.");
}
ref readonly var shaderRef = ref shaderResult.Value;
foreach (var keyGroup in GetAllVariantCombination(pass.keywords))
{
config.defines = keyGroup.Span;
var keywordsSet = new LocalKeywordSet();
foreach (var key in keyGroup.Span)
{
var localIndex = shaderRef.GetLocalKeywordIndex(Shader.GetKeywordID(key));
if (localIndex == -1)
{
continue;
}
keywordsSet.SetKeyword(localIndex, true);
}
var variantKey = RHIUtility.CreateShaderVariantKey(
RHIUtility.CreateShaderPassKey(pass.identifier),
in keywordsSet);
ctx.ShaderCompiler.CompilePass(in pass, in config, variantKey).GetValueOrThrow();
}
}
}
MeshBuilder.CreateCube(0.75f, default, Misaki.HighPerformance.LowLevel.Buffer.Allocator.Persistent, out var vertices, out var indices);
_mesh = ctx.CreateMesh(vertices, indices, true);
// Cook meshlets for the mesh
var meshRef = ctx.ResourceManager.GetMeshReference(_mesh);
if (meshRef.IsSuccess)
{
meshRef.Value.CookMeshlets();
}
ctx.UploadMeshlets(_mesh);
ctx.UpdateObjectData(_mesh);
_textures = new Handle<Texture>[_textureFiles.Length];
for (var i = 0; i < _textureFiles.Length; i++)
{
using var stream = File.OpenRead(_textureFiles[i]);
using var imageData = ImageResult.FromStream(stream, ColorComponents.RGBA);
var desc = new TextureDesc
{
Width = imageData.Width,
Height = imageData.Height,
Dimension = TextureDimension.Texture2D,
Format = TextureFormat.R8G8B8A8_UNorm,
MipLevels = 1,
Slice = 1,
Usage = TextureUsage.ShaderResource,
};
_textures[i] = ctx.CreateTexture<byte>(in desc, imageData.AsSpan(), $"Texture_{i}");
}
var samplerDesc = new SamplerDesc
{
AddressU = TextureAddressMode.Repeat,
AddressV = TextureAddressMode.Repeat,
AddressW = TextureAddressMode.Repeat,
FilterMode = TextureFilterMode.Bilinear,
MaxAnisotropy = 16,
};
_sampler = ctx.ResourceAllocator.CreateSampler(in samplerDesc);
var meshResult = ctx.ResourceManager.GetMaterialReference(_material);
if (meshResult.IsFailure)
{
throw new InvalidOperationException("Failed to get material reference.");
}
ref var matRef = ref meshResult.Value;
var matProps = new ShaderProperties_MyShader_Standard
{
color = new float4(1.0f, 1.0f, 1.0f, 1.0f),
texture1 = ctx.ResourceDatabase.GetBindlessIndex(_textures[0].AsResource()),
texture2 = ctx.ResourceDatabase.GetBindlessIndex(_textures[1].AsResource()),
texture3 = ctx.ResourceDatabase.GetBindlessIndex(_textures[2].AsResource()),
texture4 = ctx.ResourceDatabase.GetBindlessIndex(_textures[3].AsResource()),
tex_sampler = (uint)_sampler.Value,
};
matRef.SetPropertyCache(in matProps).ThrowIfFailed();
matRef.UploadData(ctx.DirectCommandBuffer, ctx.ResourceDatabase);
}
public void Build(RenderGraph graph, Identifier<RGTexture> backbuffer)
{
Identifier<RGTexture> renderTarget;
using (var builder = graph.AddRasterRenderPass<MeshRenderPassData>("Mesh Render Pass", out var passData))
{
passData.mesh = _mesh;
passData.material = _material;
passData.renderTarget = builder.CreateTexture(RGTextureDesc.Relative(1.0f, TextureFormat.R8G8B8A8_UNorm), "Render Target");
builder.SetColorAttachment(passData.renderTarget, 0);
renderTarget = passData.renderTarget;
builder.SetRenderFunc<MeshRenderPassData>(static (data, ctx) =>
{
ctx.SetActiveMaterial(data.material);
ctx.SetActiveMesh(data.mesh);
var threadGroupCountX = ((uint)ctx.ActiveMeshIndexCount + 2u) / 3u;
ctx.DispatchMesh(new uint3(threadGroupCountX, 1u, 1u));
});
}
using (var builder = graph.AddUnsafeRenderPass<BlitPassData>("Blit Pass", out var passData))
{
passData.source = renderTarget;
passData.destination = backbuffer;
passData.blitMaterial = _blitMaterial;
passData.sampler = _sampler;
builder.UseTexture(passData.source, AccessFlags.Read);
builder.UseTexture(passData.destination, AccessFlags.WriteAll);
builder.SetRenderFunc<BlitPassData>(static (data, ctx) =>
{
var r = ctx.ResourceManager.GetMaterialReference(data.blitMaterial);
if (r.IsFailure)
{
return;
}
ref var matRef = ref r.Value;
var blitProps = new ShaderProperties_Hidden_Blit
{
mainTex = ctx.ResourceDatabase.GetBindlessIndex(ctx.GetActualResource(data.source.AsResource())),
sampler_mainTex = (uint)data.sampler.Value,
};
matRef.SetPropertyCache(in blitProps).ThrowIfFailed();
matRef.UploadData(ctx.CommandBuffer, ctx.ResourceDatabase);
ctx.CommandBuffer.SetRenderTargets([ctx.GetActualTexture(data.destination)], Handle<Texture>.Invalid);
ctx.SetActiveMaterial(data.blitMaterial);
ctx.SetActiveMesh(Handle<Mesh>.Invalid); // Generate a full-screen triangle dynamically in mesh shader.
ctx.DispatchMesh(new uint3(1, 1, 1));
});
}
}
public void Cleanup(ResourceManager resourceManager, IResourceDatabase resourceDatabase)
{
resourceManager.ReleaseMaterial(_blitMaterial);
resourceManager.ReleaseMaterial(_material);
resourceManager.ReleaseShader(_shader);
resourceManager.ReleaseMesh(_mesh);
resourceDatabase.ReleaseSampler(_sampler);
if (_textures != null)
{
foreach (var texture in _textures)
{
resourceDatabase.ReleaseResource(texture.AsResource());
}
}
}
}
#endif

View File

@@ -370,8 +370,8 @@ internal sealed class ResourceAliasingManager
// ===== PASS 2: Create a single heap of the peak size and do the real allocation =====
_heap.Reset();
_heap.size = peakMemoryUsage;
_heap.Reset();
// Allocate each logical resource in the heap
foreach (var (logicalIndex, logicalResource) in logicalResources)

View File

@@ -1,3 +1,4 @@
using Ghost.Graphics.Core;
using Ghost.MeshOptimizer;
using Misaki.HighPerformance.LowLevel.Buffer;
using Misaki.HighPerformance.LowLevel.Collections;
@@ -84,6 +85,8 @@ public struct ClodConfig
public bool optimizeBounds;
/// <summary> Whether to optimize clusters post-build. </summary>
public bool optimizeClusters;
/// <summary> Level of cluster optimization. </summary>
public int optimizeClustersLevel;
}
/// <summary>
@@ -158,7 +161,7 @@ public unsafe delegate int ClodOutputDelegate(void* context, ClodGroup group, Re
public static unsafe class MeshletUtility
{
private static ClodBounds ComputeBounds(ClodMesh mesh, UnsafeList<uint> indices, float error)
private static ClodBounds ComputeBounds(ref readonly ClodMesh mesh, UnsafeList<uint> indices, float error)
{
var bounds = MeshOptApi.ComputeClusterBounds((uint*)indices.GetUnsafePtr(), (nuint)indices.Count, mesh.vertexPositions, mesh.vertexCount, mesh.vertexPositionsStride);
return new ClodBounds
@@ -199,7 +202,7 @@ public static unsafe class MeshletUtility
};
}
private static UnsafeList<Cluster> Clusterize(ClodConfig config, ClodMesh mesh, uint* indices, nuint indexCount, Allocator allocator)
private static UnsafeList<Cluster> Clusterize(ref readonly ClodConfig config, ref readonly ClodMesh mesh, uint* indices, nuint indexCount, Allocator allocator)
{
var maxMeshlets = MeshOptApi.BuildMeshletsBound(indexCount, config.maxVertices, config.minTriangles);
@@ -252,9 +255,9 @@ public static unsafe class MeshletUtility
var cluster = new Cluster
{
vertices = meshlet.vertex_count,
indices = new UnsafeList<uint>((int)(meshlet.triangle_count * 3), Allocator.Persistent),
uniqueVertices = new UnsafeList<uint>((int)meshlet.vertex_count, Allocator.Persistent),
localIndices = new UnsafeList<byte>((int)(meshlet.triangle_count * 3), Allocator.Persistent),
indices = new UnsafeList<uint>((int)(meshlet.triangle_count * 3), Allocator.FreeList),
uniqueVertices = new UnsafeList<uint>((int)meshlet.vertex_count, Allocator.FreeList),
localIndices = new UnsafeList<byte>((int)(meshlet.triangle_count * 3), Allocator.FreeList),
group = -1,
refined = -1
};
@@ -321,12 +324,16 @@ public static unsafe class MeshletUtility
}
}
private static UnsafeList<UnsafeList<int>> Partition(ClodConfig config, ClodMesh mesh, UnsafeList<Cluster> clusters, UnsafeList<int> pending, UnsafeArray<uint> remap, Allocator allocator)
private static UnsafeList<UnsafeList<int>> Partition(ref readonly ClodConfig config, ref readonly ClodMesh mesh, UnsafeList<Cluster> clusters, UnsafeList<int> pending, UnsafeArray<uint> remap, Allocator allocator)
{
if (pending.Count <= (int)config.partitionSize)
{
var single = new UnsafeList<UnsafeList<int>>(1, allocator);
single.Add(pending);
var pendingcpy = new UnsafeList<int>(pending.Count, Allocator.FreeList);
pendingcpy.AddRange(pending.AsSpan());
single.Add(pendingcpy);
return single;
}
@@ -374,13 +381,13 @@ public static unsafe class MeshletUtility
for (var i = 0; i < pending.Count; i++)
{
partitions[(int)((uint*)clusterPart.GetUnsafePtr())[i]].Add(pending[i]);
partitions[(int)clusterPart[i]].Add(pending[i]);
}
return partitions;
}
private static int OutputGroup(ClodConfig config, ClodMesh mesh, UnsafeList<Cluster> clusters, UnsafeList<int> group, ClodBounds simplified, int depth, void* outputContext, ClodOutputDelegate? outputCallback)
private static int OutputGroup(ref readonly ClodConfig config, ref readonly ClodMesh mesh, UnsafeList<Cluster> clusters, UnsafeList<int> group, ClodBounds simplified, int depth, void* outputContext, ClodOutputDelegate? outputCallback)
{
using var groupClusters = new UnsafeList<ClodCluster>(group.Count, Allocator.FreeList);
@@ -391,7 +398,7 @@ public static unsafe class MeshletUtility
{
refined = srcCluster.refined,
bounds = (config.optimizeBounds && srcCluster.refined != -1)
? ComputeBounds(mesh, srcCluster.indices, srcCluster.bounds.error)
? ComputeBounds(in mesh, srcCluster.indices, srcCluster.bounds.error)
: srcCluster.bounds,
indices = (uint*)srcCluster.indices.GetUnsafePtr(),
indexCount = (nuint)srcCluster.indices.Count,
@@ -410,7 +417,50 @@ public static unsafe class MeshletUtility
return result;
}
public static UnsafeArray<uint> Simplify(ClodConfig config, ClodMesh mesh, ReadOnlyUnsafeCollection<uint> indices, ReadOnlyUnsafeCollection<byte> locks, nuint targetCount, float* error, Allocator allocator)
private struct SloppyVertex
{
public float x, y, z;
public uint id;
}
private static void SimplifyFallback(ref UnsafeArray<uint> lod, ref readonly ClodMesh mesh, ReadOnlyUnsafeCollection<uint> indices, ReadOnlyUnsafeCollection<byte> locks, nuint target_count, float* error)
{
using var subset = new UnsafeArray<SloppyVertex>(indices.Count, Allocator.FreeList);
using var subset_locks = new UnsafeArray<byte>(indices.Count, Allocator.FreeList);
lod.Resize(indices.Count);
var positions_stride = mesh.vertexPositionsStride / sizeof(float);
// deindex the mesh subset to avoid calling simplifySloppy on the entire vertex buffer (which is prohibitively expensive without sparsity)
for (var i = 0; i<indices.Count; ++i)
{
var v = indices[i];
Debug.Assert(v<mesh.vertexCount);
subset[i].x = mesh.vertexPositions[v * positions_stride + 0];
subset[i].y = mesh.vertexPositions[v * positions_stride + 1];
subset[i].z = mesh.vertexPositions[v * positions_stride + 2];
subset[i].id = v;
subset_locks[i] = locks[v];
lod[i] = (uint)i;
}
var newSize = MeshOptApi.SimplifySloppy((uint*)lod.GetUnsafePtr(), (uint*)lod.GetUnsafePtr(), (nuint)lod.Count, (float*)subset.GetUnsafePtr(), (nuint)subset.Count, (nuint)sizeof(SloppyVertex), (byte*)subset_locks.GetUnsafePtr(), target_count, float.MaxValue, error);
lod.Resize((int)newSize);
// convert error to absolute
* error *= MeshOptApi.SimplifyScale((float*)subset.GetUnsafePtr(), (nuint)subset.Count, (nuint)sizeof(SloppyVertex));
// restore original vertex indices
for (var i = 0; i < lod.Count; ++i)
{
lod[i] = subset[lod[i]].id;
}
}
public static UnsafeArray<uint> Simplify(ref readonly ClodConfig config, ref readonly ClodMesh mesh, ReadOnlyUnsafeCollection<uint> indices, ReadOnlyUnsafeCollection<byte> locks, nuint targetCount, float* error, Allocator allocator)
{
var lod = new UnsafeArray<uint>(indices.Count, allocator);
@@ -477,6 +527,7 @@ public static unsafe class MeshletUtility
if ((nuint)lod.Length > targetCount && config.simplifyFallbackSloppy)
{
SimplifyFallback(ref lod, in mesh, indices, locks, targetCount, error);
*error *= config.simplifyErrorFactorSloppy;
}
@@ -520,7 +571,7 @@ public static unsafe class MeshletUtility
/// <param name="outputContext">Optional context pointer passed to the output callback.</param>
/// <param name="outputCallback">Delegate invoked for each generated LOD group.</param>
/// <returns>The total count of generated clusters.</returns>
public static nuint Build(ClodConfig config, ClodMesh mesh, void* outputContext, ClodOutputDelegate? outputCallback)
public static nuint Build(ref readonly ClodConfig config, ref readonly ClodMesh mesh, void* outputContext, ClodOutputDelegate? outputCallback)
{
Debug.Assert(mesh.vertexAttributesStride % sizeof(float) == 0, "vertexAttributesStride must be a multiple of sizeof(float)");
@@ -548,11 +599,11 @@ public static unsafe class MeshletUtility
}
}
using var clusters = Clusterize(config, mesh, mesh.indices, mesh.indexCount, Allocator.FreeList);
using var clusters = Clusterize(in config, in mesh, mesh.indices, mesh.indexCount, Allocator.FreeList);
for (var i = 0; i < clusters.Count; i++)
{
clusters[i].bounds = ComputeBounds(mesh, clusters[i].indices, 0.0f);
clusters[i].bounds = ComputeBounds(in mesh, clusters[i].indices, 0.0f);
}
using var pending = new UnsafeList<int>(clusters.Count, Allocator.FreeList);
@@ -565,7 +616,7 @@ public static unsafe class MeshletUtility
while (pending.Count > 1)
{
using var groups = Partition(config, mesh, clusters, pending, remap, Allocator.FreeList);
using var groups = Partition(in config, in mesh, clusters, pending, remap, Allocator.FreeList);
pending.Clear();
LockBoundary(locks, groups, clusters, remap, mesh.vertexLock);
@@ -576,35 +627,32 @@ public static unsafe class MeshletUtility
for (var j = 0; j < groups[i].Count; j++)
{
var clusterIndices = clusters[groups[i][j]].indices;
for (var k = 0; k < clusterIndices.Count; k++)
{
merged.Add(clusterIndices[k]);
}
merged.AddRange(clusterIndices.AsSpan());
}
var targetSize = (nuint)(merged.Count / 3 * config.simplifyRatio * 3.0f);
var bounds = MergeBounds(clusters, groups[i]);
var error = 0.0f;
using var simplified = Simplify(config, mesh, merged.AsReadOnly(), locks.AsReadOnly(), targetSize, &error, Allocator.FreeList);
using var simplified = Simplify(in config, in mesh, merged.AsReadOnly(), locks.AsReadOnly(), targetSize, &error, Allocator.FreeList);
if ((nuint)simplified.Length > (nuint)(merged.Count * config.simplifyThreshold))
{
bounds.error = float.MaxValue;
OutputGroup(config, mesh, clusters, groups[i], bounds, depth, outputContext, outputCallback);
OutputGroup(in config, in mesh, clusters, groups[i], bounds, depth, outputContext, outputCallback);
continue;
}
bounds.error = Math.Max(bounds.error * config.simplifyErrorMergePrevious, error) + error * config.simplifyErrorMergeAdditive;
var refined = OutputGroup(config, mesh, clusters, groups[i], bounds, depth, outputContext, outputCallback);
var refined = OutputGroup(in config, in mesh, clusters, groups[i], bounds, depth, outputContext, outputCallback);
for (var j = 0; j < groups[i].Count; j++)
{
clusters[groups[i][j]].Dispose();
}
using var split = Clusterize(config, mesh, (uint*)simplified.GetUnsafePtr(), (nuint)simplified.Length, Allocator.FreeList);
using var split = Clusterize(in config, in mesh, (uint*)simplified.GetUnsafePtr(), (nuint)simplified.Length, Allocator.FreeList);
for (var j = 0; j < split.Count; j++)
{
split[j].refined = refined;
@@ -626,7 +674,7 @@ public static unsafe class MeshletUtility
{
var bounds = clusters[pending[0]].bounds;
bounds.error = float.MaxValue;
OutputGroup(config, mesh, clusters, pending, bounds, depth, outputContext, outputCallback);
OutputGroup(in config, in mesh, clusters, pending, bounds, depth, outputContext, outputCallback);
}
var finalClusterCount = (nuint)clusters.Count;

View File

@@ -29,7 +29,7 @@ shader "MyShader/Standard"
hlsl
{
#line 31 "MyShader_Standard_Forward_hlsl_block"
#line 32 "MyShader_Standard_Forward_hlsl_block"
struct PixelInput
{
float4 position : SV_POSITION;
@@ -39,13 +39,15 @@ shader "MyShader/Standard"
nointerpolation uint meshletID : MESHLET_ID;
};
[numthreads(128, 1, 1)] // 128 threads to cover max 64 vertices and 124 triangles
[outputtopology("triangle")]
void MSMain(
uint3 groupThreadID : SV_GroupThreadID,
uint3 groupID : SV_GroupID,
out vertices PixelInput outVerts[64],
out indices uint3 outTris[124])
struct ASPayload
{
uint meshletIndex;
};
groupshared ASPayload s_Payload;
[numthreads(1, 1, 1)]
void ASMain(uint3 groupID : SV_GroupID)
{
InstanceData instanceData = LoadData<InstanceData>(g_PushConstantData.instanceBuffer, g_PushConstantData.instanceIndex);
MeshData meshData = LoadData<MeshData>(instanceData.meshBuffer, 0);
@@ -53,9 +55,29 @@ shader "MyShader/Standard"
ByteAddressBuffer meshletBuffer = GET_BUFFER(meshData.meshletBuffer);
Meshlet m = meshletBuffer.Load<Meshlet>(groupID.x * sizeof(Meshlet));
uint vertexCount = m.packedCounts & 0xFF;
uint triangleCount = (m.packedCounts >> 8) & 0xFF;
s_Payload.meshletIndex = groupID.x;
uint lodLevel = (m.packedCounts >> 24) & 0xFFu;
uint emitMeshlet = lodLevel == 0u ? 1u : 0u;
DispatchMesh(emitMeshlet, 1u, 1u, s_Payload);
}
[numthreads(128, 1, 1)] // 128 threads to cover max 64 vertices and 124 triangles
[outputtopology("triangle")]
void MSMain(
in payload ASPayload meshPayload,
uint3 groupThreadID : SV_GroupThreadID,
out vertices PixelInput outVerts[64],
out indices uint3 outTris[124])
{
InstanceData instanceData = LoadData<InstanceData>(g_PushConstantData.instanceBuffer, g_PushConstantData.instanceIndex);
MeshData meshData = LoadData<MeshData>(instanceData.meshBuffer, 0);
ByteAddressBuffer meshletBuffer = GET_BUFFER(meshData.meshletBuffer);
Meshlet m = meshletBuffer.Load<Meshlet>(meshPayload.meshletIndex * sizeof(Meshlet));
uint vertexCount = m.packedCounts & 0xFFu;
uint triangleCount = (m.packedCounts >> 8) & 0xFFu;
SetMeshOutputCounts(vertexCount, triangleCount);
ByteAddressBuffer meshletVerticesBuffer = GET_BUFFER(meshData.meshletVerticesBuffer);
@@ -81,7 +103,7 @@ shader "MyShader/Standard"
outVerts[groupThreadID.x].color = v.color;
outVerts[groupThreadID.x].normal = normalize(mul((float3x3)instanceData.localToWorld, v.normal));
outVerts[groupThreadID.x].uv = v.uv;
outVerts[groupThreadID.x].meshletID = groupID.x;
outVerts[groupThreadID.x].meshletID = meshPayload.meshletIndex;
}
// Write triangle output (1 thread processes 1 triangle)
@@ -123,6 +145,7 @@ shader "MyShader/Standard"
}
}
task "hlsl_block" : "ASMain";
mesh "hlsl_block" : "MSMain";
pixel "hlsl_block" : "PSMain";
}

View File

@@ -23,12 +23,6 @@ internal static class MeshUtility
return new float4(t.xyz, w);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static float3 RightHandToLeft(float3 p)
{
return new float3(p.x, p.y, -p.z);
}
public static unsafe Result LoadMesh(string filePath, Allocator allocator, out UnsafeList<Vertex> vertices, out UnsafeList<uint> indices)
{
vertices = default;
@@ -47,18 +41,8 @@ internal static class MeshUtility
var load_Opts = new ufbx_load_opts
{
target_axes = new ufbx_coordinate_axes
{
right = ufbx_coordinate_axis.UFBX_COORDINATE_AXIS_POSITIVE_X,
up = ufbx_coordinate_axis.UFBX_COORDINATE_AXIS_POSITIVE_Y,
front = ufbx_coordinate_axis.UFBX_COORDINATE_AXIS_POSITIVE_Z
},
obj_axes = new ufbx_coordinate_axes
{
right = ufbx_coordinate_axis.UFBX_COORDINATE_AXIS_POSITIVE_X,
up = ufbx_coordinate_axis.UFBX_COORDINATE_AXIS_POSITIVE_Y,
front = ufbx_coordinate_axis.UFBX_COORDINATE_AXIS_NEGATIVE_Z
},
target_axes = ufbx_coordinate_axes.left_handed_y_up,
obj_axes = ufbx_coordinate_axes.right_handed_y_up,
// Force X-axis mirroring to correctly convert handedness to Left-Handed,
// while preserving correct left/right orientation when viewed from the front.
handedness_conversion_axis = ufbx_mirror_axis.UFBX_MIRROR_AXIS_X,

View File

@@ -14,6 +14,7 @@
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\Runtime\Ghost.Graphics\Ghost.Graphics.csproj" />
<ProjectReference Include="..\..\Test\Ghost.Test.Core\Ghost.Test.Core.csproj" />
<ProjectReference Include="..\..\ThridParty\Ghost.Nvtt\Ghost.Nvtt.csproj" />
<ProjectReference Include="..\..\ThridParty\Ghost.Ufbx\Ghost.Ufbx.csproj" />

View File

@@ -0,0 +1,256 @@
using Ghost.Core;
using Ghost.Graphics.RHI;
using Ghost.Graphics.Utilities;
using Ghost.MeshOptimizer;
using Ghost.Test.Core;
using Ghost.Ufbx;
using Misaki.HighPerformance.LowLevel;
using Misaki.HighPerformance.LowLevel.Buffer;
using Misaki.HighPerformance.LowLevel.Collections;
using Misaki.HighPerformance.LowLevel.Utilities;
using Misaki.HighPerformance.Mathematics;
using System.Runtime.CompilerServices;
using System.Text;
namespace Ghost.MicroTest;
internal class MeshoptBenchmark : ITest
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static float4 ComputeTangent(float3 t, float3 n, float3 b)
{
var proj = n * math.dot(n, t);
t = math.normalize(t - proj);
var w = math.dot(math.cross(n.xyz, t.xyz), b.xyz) < 0.0f ? -1.0f : 1.0f;
return new float4(t.xyz, w);
}
public static unsafe Result LoadMesh(string filePath, Allocator allocator, out UnsafeList<Vertex> vertices, out UnsafeList<uint> indices)
{
vertices = default;
indices = default;
if (!File.Exists(filePath))
{
return Result.Failure("Invalid file path.");
}
if (!Path.GetExtension(filePath).Equals(".obj", StringComparison.OrdinalIgnoreCase)
&& !Path.GetExtension(filePath).Equals(".fbx", StringComparison.OrdinalIgnoreCase))
{
return Result.Failure("Unsupported file format. Only .obj and .fbx are supported.");
}
var load_Opts = new ufbx_load_opts
{
target_axes = ufbx_coordinate_axes.left_handed_y_up,
obj_axes = ufbx_coordinate_axes.right_handed_y_up,
// Force X-axis mirroring to correctly convert handedness to Left-Handed,
// while preserving correct left/right orientation when viewed from the front.
handedness_conversion_axis = ufbx_mirror_axis.UFBX_MIRROR_AXIS_X,
space_conversion = ufbx_space_conversion.UFBX_SPACE_CONVERSION_MODIFY_GEOMETRY,
};
var error = new ufbx_error();
using var pool = new MemoryPool<VirtualStack, VirtualStack.CreationOpts>(new VirtualStack.CreationOpts
{
reserveCapacity = 256 * 1024 * 1024 // 256 MB should be enough for most models, adjust as needed. Note that this use virtual memory and does not actually consume physical memory until allocations are made.
});
using var scope0 = pool.Allocator.CreateScope(pool.AllocationHandle);
using var str = new UnsafeArray<byte>(Encoding.UTF8.GetByteCount(filePath) + 1, scope0.AllocationHandle);
var count = Encoding.UTF8.GetBytes(filePath, str.AsSpan());
str[count] = 0;
using var scene = new DisposablePtr<ufbx_scene>(ufbx_scene.LoadFile((sbyte*)str.GetUnsafePtr(), &load_Opts, &error));
if (scene.Get() == null)
{
return Result.Failure(error.description.ToString());
}
using var flatVertices = new UnsafeList<Vertex>(1024, scope0.AllocationHandle);
//using var flatIndices = new UnsafeList<uint>(1024, scope0.AllocationHandle);
var needComputeNormals = false;
for (var i = 0u; i < scene.Get()->nodes.count; i++)
{
var node = scene.Get()->nodes.data[i];
if (node->is_root)
{
continue;
}
using var scope1 = pool.Allocator.CreateScope(pool.AllocationHandle);
if (node->mesh != null)
{
var pMesh = node->mesh;
if (pMesh->num_faces == 0)
{
continue;
}
var maxScratchIndices = (int)(pMesh->max_face_triangles * 3u);
using var triIndicesArray = new UnsafeArray<uint>(maxScratchIndices, scope1.AllocationHandle);
for (var j = 0u; j < pMesh->num_faces; j++)
{
var face = pMesh->faces.data[j];
var numTris = UfbxApi.TriangulateFace(triIndicesArray.AsSpan(0, maxScratchIndices), pMesh, face);
var totalIndices = numTris * 3;
for (var k = 0; k < totalIndices; k++)
{
var ufbxTopologyIndex = triIndicesArray[k];
var posIdx = pMesh->vertex_position.indices.data[ufbxTopologyIndex];
var normIdx = pMesh->vertex_normal.exists ? pMesh->vertex_normal.indices.data[ufbxTopologyIndex] : uint.MaxValue;
var tanIdx = pMesh->vertex_tangent.exists ? pMesh->vertex_tangent.indices.data[ufbxTopologyIndex] : uint.MaxValue;
var uvIdx = pMesh->vertex_uv.exists ? pMesh->vertex_uv.indices.data[ufbxTopologyIndex] : uint.MaxValue;
var colIdx = pMesh->vertex_color.exists ? pMesh->vertex_color.indices.data[ufbxTopologyIndex] : uint.MaxValue;
var btanIdx = pMesh->vertex_bitangent.exists ? pMesh->vertex_bitangent.indices.data[ufbxTopologyIndex] : uint.MaxValue;
var vertex = new Vertex
{
position = pMesh->vertex_position.values.data[posIdx],
normal = normIdx != uint.MaxValue ? pMesh->vertex_normal.values.data[normIdx] : default,
uv = uvIdx != uint.MaxValue ? pMesh->vertex_uv.values.data[uvIdx] : default,
color = colIdx != uint.MaxValue ? new Color128(pMesh->vertex_color.values.data[colIdx]) : default,
};
if (tanIdx != uint.MaxValue)
{
var t = pMesh->vertex_tangent.values.data[tanIdx];
var n = vertex.normal;
var b = btanIdx != uint.MaxValue ? pMesh->vertex_bitangent.values.data[btanIdx] : math.cross(n, t);
vertex.tangent = ComputeTangent(t, n, b);
}
var newIndex = (uint)flatVertices.Count;
flatVertices.Add(vertex);
if (!needComputeNormals)
{
needComputeNormals = normIdx == uint.MaxValue || tanIdx == uint.MaxValue;
}
}
}
}
}
var numIndices = (uint)flatVertices.Count;
using var weldedIndices = new UnsafeArray<uint>((int)numIndices, scope0.AllocationHandle);
using var cachedIndices = new UnsafeArray<uint>((int)numIndices, scope0.AllocationHandle);
var stream = new ufbx_vertex_stream
{
data = flatVertices.GetUnsafePtr(),
vertex_count = numIndices,
vertex_size = (nuint)sizeof(Vertex)
};
var numUniqueVertices = UfbxApi.GenerateIndices([stream], weldedIndices, null, &error);
if (numUniqueVertices == 0 && error.type != ufbx_error_type.UFBX_ERROR_NONE)
{
return Result.Failure($"Welding failed: {error.description}");
}
MeshOptApi.OptimizeVertexCache((uint*)cachedIndices.GetUnsafePtr(), (uint*)weldedIndices.GetUnsafePtr(), numIndices, numUniqueVertices);
vertices = new UnsafeList<Vertex>((int)numUniqueVertices, allocator);
indices = new UnsafeList<uint>((int)numIndices, allocator);
var finalVertexCount = MeshOptApi.OptimizeVertexFetch(vertices.GetUnsafePtr(), (uint*)cachedIndices.GetUnsafePtr(), numIndices, flatVertices.GetUnsafePtr(), numIndices, (nuint)sizeof(Vertex));
vertices.UnsafeSetCount((int)finalVertexCount);
MemoryUtility.MemCpy(indices.GetUnsafePtr(), cachedIndices.GetUnsafePtr(), numIndices * sizeof(uint));
indices.UnsafeSetCount((int)numIndices);
//if (needComputeNormals)
//{
// MeshBuilder.ComputeNormal(vertices, indices);
// MeshBuilder.ComputeTangents(vertices, indices);
//}
return Result.Success();
}
private UnsafeList<Vertex> _vertices;
private UnsafeList<uint> _indices;
private ClodConfig _config;
private ClodMesh _clodMesh;
public unsafe void Setup()
{
var opts = new AllocationManagerInitOpts
{
ArenaCapacity = 1024 * 1024 * 1024, // 1GB
StackCapacity = 1024 * 1024 * 32, // 32MB
FreeListConcurrencyLevel = Environment.ProcessorCount,
};
AllocationManager.Initialize(opts);
LoadMesh("F:/c/SimpleRayTracer/native/assets/bunny.obj", Allocator.Persistent, out _vertices, out _indices).ThrowIfFailed();
_config = new ClodConfig
{
maxVertices = 64,
minTriangles = 32,
maxTriangles = 124,
partitionSpatial = true,
partitionSize = 16,
clusterSpatial = false,
clusterSplitFactor = 2.0f,
optimizeClusters = true,
optimizeClustersLevel = 1,
simplifyRatio = 0.5f,
simplifyThreshold = 0.85f,
simplifyErrorMergePrevious = 1.0f,
simplifyErrorFactorSloppy = 2.0f,
simplifyPermissive = true,
simplifyFallbackPermissive = false,
simplifyFallbackSloppy = true,
};
// 2. Map Mesh to ClodMesh
_clodMesh = new ClodMesh
{
vertexPositions = (float*)Unsafe.AsPointer(ref _vertices[0].position),
vertexCount = (nuint)_vertices.Count,
vertexPositionsStride = (nuint)sizeof(Vertex),
vertexAttributes = (float*)Unsafe.AsPointer(ref _vertices[0].normal),
vertexAttributesStride = (nuint)sizeof(Vertex),
indices = (uint*)_indices.GetUnsafePtr(),
indexCount = (nuint)_indices.Count,
attributeProtectMask = 0,
};
}
public unsafe void Run()
{
// 3. Build
var sw = System.Diagnostics.Stopwatch.StartNew();
MeshletUtility.Build(in _config, in _clodMesh, null, null);
Console.WriteLine($"Meshlet build time: {sw.Elapsed.TotalSeconds:F3} seconds");
}
public void Cleanup()
{
_vertices.Dispose();
_indices.Dispose();
AllocationManager.Dispose();
}
}

View File

@@ -1,4 +1,4 @@
using Ghost.MicroTest;
using Ghost.Test.Core;
TestRunner.Run<UfbxBindingTest>();
TestRunner.Run<MeshoptBenchmark>();

View File

@@ -0,0 +1,29 @@
namespace Ghost.Ufbx;
public partial struct ufbx_coordinate_axes
{
public static ufbx_coordinate_axes right_handed_y_up => new ufbx_coordinate_axes
{
right = ufbx_coordinate_axis.UFBX_COORDINATE_AXIS_POSITIVE_X,
up = ufbx_coordinate_axis.UFBX_COORDINATE_AXIS_POSITIVE_Y,
front = ufbx_coordinate_axis.UFBX_COORDINATE_AXIS_POSITIVE_Z,
};
public static ufbx_coordinate_axes right_handed_z_up => new ufbx_coordinate_axes
{
right = ufbx_coordinate_axis.UFBX_COORDINATE_AXIS_POSITIVE_X,
up = ufbx_coordinate_axis.UFBX_COORDINATE_AXIS_POSITIVE_Z,
front = ufbx_coordinate_axis.UFBX_COORDINATE_AXIS_NEGATIVE_Y,
};
public static ufbx_coordinate_axes left_handed_y_up => new ufbx_coordinate_axes
{
right = ufbx_coordinate_axis.UFBX_COORDINATE_AXIS_POSITIVE_X,
up = ufbx_coordinate_axis.UFBX_COORDINATE_AXIS_POSITIVE_Y,
front = ufbx_coordinate_axis.UFBX_COORDINATE_AXIS_NEGATIVE_Z,
};
public static ufbx_coordinate_axes left_handed_z_up => new ufbx_coordinate_axes
{
right = ufbx_coordinate_axis.UFBX_COORDINATE_AXIS_POSITIVE_X,
up = ufbx_coordinate_axis.UFBX_COORDINATE_AXIS_POSITIVE_Z,
front = ufbx_coordinate_axis.UFBX_COORDINATE_AXIS_POSITIVE_Y,
};
}