feat(d3d12): add indirect command execution support
Added ICommandSignature and D3D12CommandSignature for indirect command execution in the D3D12 backend, with supporting types. Updated ICommandBuffer and IGraphicsEngine interfaces to support indirect execution and command signature creation. Refactored command buffer pooling in D3D12GraphicsEngine for more flexible reuse. Changed BeginFrame and EndFrame to void and clarified parameter names. Updated resource and frame data structures to use direct buffer indices. Added RenderingUtility for buffer and texture uploads. Removed IRenderOutput interface. Updated RenderSystem render loop and HLSL/C# code to match new buffer usage patterns. BREAKING CHANGE: ICommandBuffer, IGraphicsEngine, and related APIs have changed signatures and behaviors. Indirect command execution is now supported and required for some advanced features.
This commit is contained in:
@@ -72,6 +72,7 @@ internal sealed class RenderGraphContext : IUnsafeRenderContext
|
||||
|
||||
public int ActiveMeshIndexCount => _activeMeshIndexCount;
|
||||
|
||||
// TODO: Upload relative scale to gpu.
|
||||
public float2 RelativeScale
|
||||
{
|
||||
get; set;
|
||||
|
||||
@@ -210,10 +210,9 @@ public class RenderSystem : IDisposable
|
||||
{
|
||||
void StopRenderLoop(Result result)
|
||||
{
|
||||
Debug.Assert(result.IsFailure, "StopRenderLoop should only be called with a failure result.");
|
||||
|
||||
_isRunning = false;
|
||||
_shutdownEvent.Set();
|
||||
|
||||
#if DEBUG
|
||||
Debugger.Break();
|
||||
#endif
|
||||
@@ -224,113 +223,107 @@ public class RenderSystem : IDisposable
|
||||
|
||||
while (_isRunning)
|
||||
{
|
||||
_frameIndex = (uint)(_submittedFenceValue % _config.FrameBufferCount);
|
||||
ref var frameResource = ref _frameResources[_frameIndex];
|
||||
|
||||
// Wait for either CPU ready signal or shutdown signal
|
||||
waitHandles[0] = frameResource.CpuReadyEvent;
|
||||
var waitResult = WaitHandle.WaitAny(waitHandles);
|
||||
|
||||
// If shutdown was signaled or timeout occurred, exit the loop
|
||||
if (!_isRunning || waitResult == 1 || waitResult == WaitHandle.WaitTimeout)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
// Only proceed if CPU ready event was signaled
|
||||
if (waitResult != 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
_graphicsEngine.Device.GraphicsQueue.WaitForValue(frameResource.FenceValue);
|
||||
|
||||
if (!_resizeRequest.IsEmpty)
|
||||
{
|
||||
WaitIdle();
|
||||
|
||||
var keys = _resizeRequest.Keys.ToArray();
|
||||
foreach (var swapChain in keys)
|
||||
{
|
||||
if (_resizeRequest.TryRemove(swapChain, out var newSize))
|
||||
{
|
||||
swapChain.Resize(newSize.x, newSize.y);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var completedFenceValue = _graphicsEngine.Device.GraphicsQueue.GetCompletedValue();
|
||||
if (_submittedFenceValue < completedFenceValue)
|
||||
{
|
||||
_submittedFenceValue = completedFenceValue;
|
||||
}
|
||||
|
||||
// Begin rendering for this frame
|
||||
frameResource.CommandAllocator.Reset();
|
||||
|
||||
_resourceManager.BeginFrame(_submittedFenceValue);
|
||||
var r = _graphicsEngine.BeginFrame(_submittedFenceValue);
|
||||
|
||||
if (r.IsFailure)
|
||||
{
|
||||
StopRenderLoop(r);
|
||||
break;
|
||||
}
|
||||
|
||||
// Start recording commands
|
||||
|
||||
// TODO: How can we support async compute and async copy?
|
||||
var cmd = _graphicsEngine.GetPooledCommandBuffer(CommandBufferType.Graphics);
|
||||
ref var renderRequests = ref frameResource.RenderRequests;
|
||||
|
||||
try
|
||||
{
|
||||
cmd.Begin(frameResource.CommandAllocator);
|
||||
_frameIndex = (uint)(_submittedFenceValue % _config.FrameBufferCount);
|
||||
ref var frameResource = ref _frameResources[_frameIndex];
|
||||
|
||||
var renderCtx = new RenderContext(_graphicsEngine, _resourceManager, cmd);
|
||||
// Wait for either CPU ready signal or shutdown signal
|
||||
waitHandles[0] = frameResource.CpuReadyEvent;
|
||||
var waitResult = WaitHandle.WaitAny(waitHandles);
|
||||
|
||||
_renderPipeline.Render(renderCtx, renderRequests.AsSpan());
|
||||
_swapChainManager.TransitionToPresent(cmd);
|
||||
|
||||
// End recording commands and submit
|
||||
r = cmd.End();
|
||||
if (r.IsFailure)
|
||||
// If shutdown was signaled or timeout occurred, exit the loop
|
||||
if (!_isRunning || waitResult == 1 || waitResult == WaitHandle.WaitTimeout)
|
||||
{
|
||||
StopRenderLoop(r);
|
||||
break;
|
||||
}
|
||||
|
||||
_graphicsEngine.Device.GraphicsQueue.Submit(cmd);
|
||||
_swapChainManager.PresentAll(cmd);
|
||||
}
|
||||
finally
|
||||
{
|
||||
_graphicsEngine.ReturnPooledCommandBuffer(cmd);
|
||||
|
||||
for (var i = 0; i < renderRequests.Count; i++)
|
||||
// Only proceed if CPU ready event was signaled
|
||||
if (waitResult != 0)
|
||||
{
|
||||
renderRequests[i].Dispose();
|
||||
continue;
|
||||
}
|
||||
|
||||
renderRequests.Clear();
|
||||
_graphicsEngine.Device.GraphicsQueue.WaitForValue(frameResource.FenceValue);
|
||||
|
||||
if (!_resizeRequest.IsEmpty)
|
||||
{
|
||||
WaitIdle();
|
||||
|
||||
var keys = _resizeRequest.Keys.ToArray();
|
||||
foreach (var swapChain in keys)
|
||||
{
|
||||
if (_resizeRequest.TryRemove(swapChain, out var newSize))
|
||||
{
|
||||
swapChain.Resize(newSize.x, newSize.y);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var completedFrame = _graphicsEngine.Device.GraphicsQueue.GetCompletedValue();
|
||||
if (_submittedFenceValue < completedFrame)
|
||||
{
|
||||
_submittedFenceValue = completedFrame;
|
||||
}
|
||||
|
||||
// Begin rendering for this frame
|
||||
frameResource.CommandAllocator.Reset();
|
||||
|
||||
_resourceManager.BeginFrame(_submittedFenceValue);
|
||||
_graphicsEngine.BeginFrame(_submittedFenceValue);
|
||||
|
||||
// Start recording commands
|
||||
|
||||
// TODO: How can we support async compute and async copy?
|
||||
var cmd = _graphicsEngine.GetPooledCommandBuffer(CommandBufferType.Graphics);
|
||||
ref var renderRequests = ref frameResource.RenderRequests;
|
||||
|
||||
try
|
||||
{
|
||||
cmd.Begin(frameResource.CommandAllocator);
|
||||
|
||||
var renderCtx = new RenderContext(_graphicsEngine, _resourceManager, cmd);
|
||||
|
||||
_renderPipeline.Render(renderCtx, renderRequests.AsSpan());
|
||||
_swapChainManager.TransitionToPresent(cmd);
|
||||
|
||||
// End recording commands and submit
|
||||
var r = cmd.End();
|
||||
if (r.IsFailure)
|
||||
{
|
||||
StopRenderLoop(r);
|
||||
break;
|
||||
}
|
||||
|
||||
_graphicsEngine.Device.GraphicsQueue.Submit(cmd);
|
||||
_swapChainManager.PresentAll(cmd);
|
||||
}
|
||||
finally
|
||||
{
|
||||
_graphicsEngine.ReturnPooledCommandBuffer(cmd);
|
||||
|
||||
for (var i = 0; i < renderRequests.Count; i++)
|
||||
{
|
||||
renderRequests[i].Dispose();
|
||||
}
|
||||
|
||||
renderRequests.Clear();
|
||||
}
|
||||
|
||||
_submittedFenceValue++;
|
||||
frameResource.FenceValue = _graphicsEngine.Device.GraphicsQueue.Signal(_submittedFenceValue);
|
||||
frameResource.GpuReadyEvent.Set();
|
||||
|
||||
completedFrame = _graphicsEngine.Device.GraphicsQueue.GetCompletedValue();
|
||||
|
||||
// End the frame and retire resources based on the freshest observed GPU progress.
|
||||
_resourceManager.EndFrame(completedFrame);
|
||||
_graphicsEngine.EndFrame(completedFrame);
|
||||
}
|
||||
|
||||
_submittedFenceValue++;
|
||||
frameResource.FenceValue = _graphicsEngine.Device.GraphicsQueue.Signal(_submittedFenceValue);
|
||||
frameResource.GpuReadyEvent.Set();
|
||||
|
||||
completedFenceValue = _graphicsEngine.Device.GraphicsQueue.GetCompletedValue();
|
||||
|
||||
// End the frame and retire resources based on the freshest observed GPU progress.
|
||||
_resourceManager.EndFrame(completedFenceValue);
|
||||
r = _graphicsEngine.EndFrame(completedFenceValue);
|
||||
|
||||
if (r.IsFailure)
|
||||
catch (Exception ex)
|
||||
{
|
||||
StopRenderLoop(r);
|
||||
break;
|
||||
StopRenderLoop(Result.Failure($"An exception occurred during rendering: {ex.Message}"));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -54,10 +54,10 @@ public sealed partial class ResourceManager : IDisposable
|
||||
Dispose();
|
||||
}
|
||||
|
||||
internal void BeginFrame(ulong cpuFrame)
|
||||
internal void BeginFrame(ulong submittedFrame)
|
||||
{
|
||||
Debug.Assert(!_disposed);
|
||||
_submittedFrame = cpuFrame;
|
||||
_submittedFrame = submittedFrame;
|
||||
}
|
||||
|
||||
internal void EndFrame(ulong completedFrame)
|
||||
|
||||
@@ -15,8 +15,6 @@ struct PushConstantData
|
||||
|
||||
struct FrameData
|
||||
{
|
||||
BYTE_ADDRESS_BUFFER viewBuffer;
|
||||
BYTE_ADDRESS_BUFFER instanceBuffer;
|
||||
BYTE_ADDRESS_BUFFER userBuffer;
|
||||
};
|
||||
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
// Source: https://github.com/zeux/meshoptimizer/blob/master/demo/clusterlod.h
|
||||
// Translated from C++ to C#.
|
||||
|
||||
// TODO: This file should be moved to editor project since there is no reason we need to build meshlets and LOD at runtime.
|
||||
|
||||
using Ghost.MeshOptimizer;
|
||||
using Misaki.HighPerformance.LowLevel.Buffer;
|
||||
using Misaki.HighPerformance.LowLevel.Collections;
|
||||
|
||||
88
src/Runtime/Ghost.Graphics/Utilities/RenderingUtility.cs
Normal file
88
src/Runtime/Ghost.Graphics/Utilities/RenderingUtility.cs
Normal file
@@ -0,0 +1,88 @@
|
||||
using Ghost.Core;
|
||||
using Ghost.Graphics.RHI;
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace Ghost.Graphics.Utilities;
|
||||
|
||||
public static unsafe class RenderingUtility
|
||||
{
|
||||
public static void UploadBuffer<T>(ResourceManager resourceManager, IResourceDatabase resourceDatabase, ICommandBuffer cmd, Handle<GPUBuffer> buffer, params ReadOnlySpan<T> data)
|
||||
where T : unmanaged
|
||||
{
|
||||
var r = resourceDatabase.GetResourceDescription(buffer.AsResource());
|
||||
if (r.IsFailure)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
Debug.Assert(r.Value.Type == ResourceType.Buffer);
|
||||
|
||||
var sizeInBytes = (nuint)(data.Length * sizeof(T));
|
||||
var memoryType = r.Value.BufferDescription.HeapType;
|
||||
|
||||
if (memoryType == HeapType.Upload)
|
||||
{
|
||||
fixed (T* pData = data)
|
||||
{
|
||||
resourceDatabase.MapResource(buffer.AsResource(), 0, null, null, pData, sizeInBytes);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
var uploadDesc = new BufferDesc
|
||||
{
|
||||
Size = sizeInBytes,
|
||||
Usage = BufferUsage.Upload,
|
||||
HeapType = HeapType.Upload,
|
||||
};
|
||||
|
||||
var uploadHandle = resourceManager.CreateTransientBuffer(in uploadDesc);
|
||||
if (uploadHandle.IsInvalid)
|
||||
{
|
||||
throw new OutOfMemoryException("Failed to create upload buffer for buffer data.");
|
||||
}
|
||||
|
||||
fixed (T* pData = data)
|
||||
{
|
||||
resourceDatabase.MapResource(uploadHandle.AsResource(), 0, null, null, pData, sizeInBytes);
|
||||
}
|
||||
|
||||
cmd.CopyBuffer(buffer, uploadHandle, 0, 0, sizeInBytes);
|
||||
}
|
||||
}
|
||||
|
||||
public static void UploadTexture<T>(ResourceManager resourceManager, IResourceDatabase resourceDatabase, ICommandBuffer cmd, Handle<GPUTexture> texture, ReadOnlySpan<T> data)
|
||||
where T : unmanaged
|
||||
{
|
||||
var desc = resourceDatabase.GetResourceDescription(texture.AsResource()).GetValueOrThrow();
|
||||
desc.TextureDescription.Format.GetSurfaceInfo(desc.TextureDescription.Width, desc.TextureDescription.Height, out var rowPitch, out var slicePitch, out _);
|
||||
|
||||
var requiredSize = resourceDatabase.GetIntermediateResourceSize(texture.AsResource(), 0, 1);
|
||||
var uploadDesc = new BufferDesc
|
||||
{
|
||||
Size = requiredSize,
|
||||
Usage = BufferUsage.Upload,
|
||||
HeapType = HeapType.Upload,
|
||||
};
|
||||
|
||||
var uploadHandle = resourceManager.CreateTransientBuffer(in uploadDesc);
|
||||
if (uploadHandle.IsInvalid)
|
||||
{
|
||||
throw new OutOfMemoryException("Failed to create upload buffer for texture data.");
|
||||
}
|
||||
|
||||
cmd.Barrier(BarrierDesc.Texture(texture.AsResource(), BarrierSync.Copy, BarrierAccess.CopyDest, BarrierLayout.CopyDest));
|
||||
|
||||
fixed (T* pData = data)
|
||||
{
|
||||
var subresourceData = new SubResourceData
|
||||
{
|
||||
pData = pData,
|
||||
rowPitch = rowPitch,
|
||||
slicePitch = slicePitch
|
||||
};
|
||||
|
||||
cmd.UpdateSubResources(texture.AsResource(), uploadHandle.AsResource(), subresourceData);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -90,7 +90,6 @@ shader "MyShader/Standard"
|
||||
ByteAddressBuffer vertices = GET_BUFFER(meshData.vertexBuffer);
|
||||
Vertex v = vertices.Load<Vertex>(vertexIndex * sizeof(Vertex));
|
||||
|
||||
FrameData globalFrameData = LoadData<FrameData>(g_PushConstantData.frameBuffer, 0);
|
||||
ViewData viewData = LoadData<ViewData>(g_PushConstantData.viewBuffer, 0);
|
||||
|
||||
float4 worldPos = mul(instanceData.localToWorld, float4(v.position.xyz, 1.0f));
|
||||
|
||||
Reference in New Issue
Block a user