feat(d3d12): add indirect command execution support

Added ICommandSignature and D3D12CommandSignature for indirect command execution in the D3D12 backend, with supporting types. Updated ICommandBuffer and IGraphicsEngine interfaces to support indirect execution and command signature creation. Refactored command buffer pooling in D3D12GraphicsEngine for more flexible reuse. Changed BeginFrame and EndFrame to void and clarified parameter names. Updated resource and frame data structures to use direct buffer indices. Added RenderingUtility for buffer and texture uploads. Removed IRenderOutput interface. Updated RenderSystem render loop and HLSL/C# code to match new buffer usage patterns.

BREAKING CHANGE: ICommandBuffer, IGraphicsEngine, and related APIs have changed signatures and behaviors. Indirect command execution is now supported and required for some advanced features.
This commit is contained in:
2026-04-05 23:11:08 +09:00
parent effd33b285
commit c6bdbe0710
21 changed files with 488 additions and 220 deletions

View File

@@ -72,6 +72,7 @@ internal sealed class RenderGraphContext : IUnsafeRenderContext
public int ActiveMeshIndexCount => _activeMeshIndexCount;
// TODO: Upload relative scale to gpu.
public float2 RelativeScale
{
get; set;

View File

@@ -210,10 +210,9 @@ public class RenderSystem : IDisposable
{
void StopRenderLoop(Result result)
{
Debug.Assert(result.IsFailure, "StopRenderLoop should only be called with a failure result.");
_isRunning = false;
_shutdownEvent.Set();
#if DEBUG
Debugger.Break();
#endif
@@ -224,113 +223,107 @@ public class RenderSystem : IDisposable
while (_isRunning)
{
_frameIndex = (uint)(_submittedFenceValue % _config.FrameBufferCount);
ref var frameResource = ref _frameResources[_frameIndex];
// Wait for either CPU ready signal or shutdown signal
waitHandles[0] = frameResource.CpuReadyEvent;
var waitResult = WaitHandle.WaitAny(waitHandles);
// If shutdown was signaled or timeout occurred, exit the loop
if (!_isRunning || waitResult == 1 || waitResult == WaitHandle.WaitTimeout)
{
break;
}
// Only proceed if CPU ready event was signaled
if (waitResult != 0)
{
continue;
}
_graphicsEngine.Device.GraphicsQueue.WaitForValue(frameResource.FenceValue);
if (!_resizeRequest.IsEmpty)
{
WaitIdle();
var keys = _resizeRequest.Keys.ToArray();
foreach (var swapChain in keys)
{
if (_resizeRequest.TryRemove(swapChain, out var newSize))
{
swapChain.Resize(newSize.x, newSize.y);
}
}
}
var completedFenceValue = _graphicsEngine.Device.GraphicsQueue.GetCompletedValue();
if (_submittedFenceValue < completedFenceValue)
{
_submittedFenceValue = completedFenceValue;
}
// Begin rendering for this frame
frameResource.CommandAllocator.Reset();
_resourceManager.BeginFrame(_submittedFenceValue);
var r = _graphicsEngine.BeginFrame(_submittedFenceValue);
if (r.IsFailure)
{
StopRenderLoop(r);
break;
}
// Start recording commands
// TODO: How can we support async compute and async copy?
var cmd = _graphicsEngine.GetPooledCommandBuffer(CommandBufferType.Graphics);
ref var renderRequests = ref frameResource.RenderRequests;
try
{
cmd.Begin(frameResource.CommandAllocator);
_frameIndex = (uint)(_submittedFenceValue % _config.FrameBufferCount);
ref var frameResource = ref _frameResources[_frameIndex];
var renderCtx = new RenderContext(_graphicsEngine, _resourceManager, cmd);
// Wait for either CPU ready signal or shutdown signal
waitHandles[0] = frameResource.CpuReadyEvent;
var waitResult = WaitHandle.WaitAny(waitHandles);
_renderPipeline.Render(renderCtx, renderRequests.AsSpan());
_swapChainManager.TransitionToPresent(cmd);
// End recording commands and submit
r = cmd.End();
if (r.IsFailure)
// If shutdown was signaled or timeout occurred, exit the loop
if (!_isRunning || waitResult == 1 || waitResult == WaitHandle.WaitTimeout)
{
StopRenderLoop(r);
break;
}
_graphicsEngine.Device.GraphicsQueue.Submit(cmd);
_swapChainManager.PresentAll(cmd);
}
finally
{
_graphicsEngine.ReturnPooledCommandBuffer(cmd);
for (var i = 0; i < renderRequests.Count; i++)
// Only proceed if CPU ready event was signaled
if (waitResult != 0)
{
renderRequests[i].Dispose();
continue;
}
renderRequests.Clear();
_graphicsEngine.Device.GraphicsQueue.WaitForValue(frameResource.FenceValue);
if (!_resizeRequest.IsEmpty)
{
WaitIdle();
var keys = _resizeRequest.Keys.ToArray();
foreach (var swapChain in keys)
{
if (_resizeRequest.TryRemove(swapChain, out var newSize))
{
swapChain.Resize(newSize.x, newSize.y);
}
}
}
var completedFrame = _graphicsEngine.Device.GraphicsQueue.GetCompletedValue();
if (_submittedFenceValue < completedFrame)
{
_submittedFenceValue = completedFrame;
}
// Begin rendering for this frame
frameResource.CommandAllocator.Reset();
_resourceManager.BeginFrame(_submittedFenceValue);
_graphicsEngine.BeginFrame(_submittedFenceValue);
// Start recording commands
// TODO: How can we support async compute and async copy?
var cmd = _graphicsEngine.GetPooledCommandBuffer(CommandBufferType.Graphics);
ref var renderRequests = ref frameResource.RenderRequests;
try
{
cmd.Begin(frameResource.CommandAllocator);
var renderCtx = new RenderContext(_graphicsEngine, _resourceManager, cmd);
_renderPipeline.Render(renderCtx, renderRequests.AsSpan());
_swapChainManager.TransitionToPresent(cmd);
// End recording commands and submit
var r = cmd.End();
if (r.IsFailure)
{
StopRenderLoop(r);
break;
}
_graphicsEngine.Device.GraphicsQueue.Submit(cmd);
_swapChainManager.PresentAll(cmd);
}
finally
{
_graphicsEngine.ReturnPooledCommandBuffer(cmd);
for (var i = 0; i < renderRequests.Count; i++)
{
renderRequests[i].Dispose();
}
renderRequests.Clear();
}
_submittedFenceValue++;
frameResource.FenceValue = _graphicsEngine.Device.GraphicsQueue.Signal(_submittedFenceValue);
frameResource.GpuReadyEvent.Set();
completedFrame = _graphicsEngine.Device.GraphicsQueue.GetCompletedValue();
// End the frame and retire resources based on the freshest observed GPU progress.
_resourceManager.EndFrame(completedFrame);
_graphicsEngine.EndFrame(completedFrame);
}
_submittedFenceValue++;
frameResource.FenceValue = _graphicsEngine.Device.GraphicsQueue.Signal(_submittedFenceValue);
frameResource.GpuReadyEvent.Set();
completedFenceValue = _graphicsEngine.Device.GraphicsQueue.GetCompletedValue();
// End the frame and retire resources based on the freshest observed GPU progress.
_resourceManager.EndFrame(completedFenceValue);
r = _graphicsEngine.EndFrame(completedFenceValue);
if (r.IsFailure)
catch (Exception ex)
{
StopRenderLoop(r);
break;
StopRenderLoop(Result.Failure($"An exception occurred during rendering: {ex.Message}"));
}
}
}

View File

@@ -54,10 +54,10 @@ public sealed partial class ResourceManager : IDisposable
Dispose();
}
internal void BeginFrame(ulong cpuFrame)
internal void BeginFrame(ulong submittedFrame)
{
Debug.Assert(!_disposed);
_submittedFrame = cpuFrame;
_submittedFrame = submittedFrame;
}
internal void EndFrame(ulong completedFrame)

View File

@@ -15,8 +15,6 @@ struct PushConstantData
struct FrameData
{
BYTE_ADDRESS_BUFFER viewBuffer;
BYTE_ADDRESS_BUFFER instanceBuffer;
BYTE_ADDRESS_BUFFER userBuffer;
};

View File

@@ -1,6 +1,8 @@
// Source: https://github.com/zeux/meshoptimizer/blob/master/demo/clusterlod.h
// Translated from C++ to C#.
// TODO: This file should be moved to editor project since there is no reason we need to build meshlets and LOD at runtime.
using Ghost.MeshOptimizer;
using Misaki.HighPerformance.LowLevel.Buffer;
using Misaki.HighPerformance.LowLevel.Collections;

View File

@@ -0,0 +1,88 @@
using Ghost.Core;
using Ghost.Graphics.RHI;
using System.Diagnostics;
namespace Ghost.Graphics.Utilities;
public static unsafe class RenderingUtility
{
public static void UploadBuffer<T>(ResourceManager resourceManager, IResourceDatabase resourceDatabase, ICommandBuffer cmd, Handle<GPUBuffer> buffer, params ReadOnlySpan<T> data)
where T : unmanaged
{
var r = resourceDatabase.GetResourceDescription(buffer.AsResource());
if (r.IsFailure)
{
return;
}
Debug.Assert(r.Value.Type == ResourceType.Buffer);
var sizeInBytes = (nuint)(data.Length * sizeof(T));
var memoryType = r.Value.BufferDescription.HeapType;
if (memoryType == HeapType.Upload)
{
fixed (T* pData = data)
{
resourceDatabase.MapResource(buffer.AsResource(), 0, null, null, pData, sizeInBytes);
}
}
else
{
var uploadDesc = new BufferDesc
{
Size = sizeInBytes,
Usage = BufferUsage.Upload,
HeapType = HeapType.Upload,
};
var uploadHandle = resourceManager.CreateTransientBuffer(in uploadDesc);
if (uploadHandle.IsInvalid)
{
throw new OutOfMemoryException("Failed to create upload buffer for buffer data.");
}
fixed (T* pData = data)
{
resourceDatabase.MapResource(uploadHandle.AsResource(), 0, null, null, pData, sizeInBytes);
}
cmd.CopyBuffer(buffer, uploadHandle, 0, 0, sizeInBytes);
}
}
public static void UploadTexture<T>(ResourceManager resourceManager, IResourceDatabase resourceDatabase, ICommandBuffer cmd, Handle<GPUTexture> texture, ReadOnlySpan<T> data)
where T : unmanaged
{
var desc = resourceDatabase.GetResourceDescription(texture.AsResource()).GetValueOrThrow();
desc.TextureDescription.Format.GetSurfaceInfo(desc.TextureDescription.Width, desc.TextureDescription.Height, out var rowPitch, out var slicePitch, out _);
var requiredSize = resourceDatabase.GetIntermediateResourceSize(texture.AsResource(), 0, 1);
var uploadDesc = new BufferDesc
{
Size = requiredSize,
Usage = BufferUsage.Upload,
HeapType = HeapType.Upload,
};
var uploadHandle = resourceManager.CreateTransientBuffer(in uploadDesc);
if (uploadHandle.IsInvalid)
{
throw new OutOfMemoryException("Failed to create upload buffer for texture data.");
}
cmd.Barrier(BarrierDesc.Texture(texture.AsResource(), BarrierSync.Copy, BarrierAccess.CopyDest, BarrierLayout.CopyDest));
fixed (T* pData = data)
{
var subresourceData = new SubResourceData
{
pData = pData,
rowPitch = rowPitch,
slicePitch = slicePitch
};
cmd.UpdateSubResources(texture.AsResource(), uploadHandle.AsResource(), subresourceData);
}
}
}

View File

@@ -90,7 +90,6 @@ shader "MyShader/Standard"
ByteAddressBuffer vertices = GET_BUFFER(meshData.vertexBuffer);
Vertex v = vertices.Load<Vertex>(vertexIndex * sizeof(Vertex));
FrameData globalFrameData = LoadData<FrameData>(g_PushConstantData.frameBuffer, 0);
ViewData viewData = LoadData<ViewData>(g_PushConstantData.viewBuffer, 0);
float4 worldPos = mul(instanceData.localToWorld, float4(v.position.xyz, 1.0f));