feat(graphics): improve rendering pipeline and docs

- Refactor D3D12 backend and RenderGraph module
- Update graphics RHI and core rendering components
- Add Random.hlsl shader include
- Regenerate API documentation and update user guides
This commit is contained in:
2026-03-27 22:23:44 +09:00
parent 0a2eb619eb
commit d8a7b07624
495 changed files with 51961 additions and 892 deletions

View File

@@ -61,6 +61,8 @@ public class QueryBenchmark
}
}
// Results: 620 us, 98.5% cache hits (14731 misses), 13260812 instructions retired, 4x faster than QueryGameObjects
[Benchmark(Baseline = true)]
public void QueryEntities()
{

View File

@@ -1,11 +1,13 @@
using Ghost.Core;
using Ghost.Core.Graphics;
using Ghost.DSL.ShaderCompiler;
using Ghost.Engine.Components;
using Ghost.Graphics.Core;
using Ghost.Graphics.RenderGraphModule;
using Ghost.Graphics.RenderPipeline;
using Ghost.Graphics.RHI;
using Misaki.HighPerformance.Mathematics;
using Misaki.HighPerformance.Mathematics.Geometry;
using Misaki.HighPerformance.Utilities;
namespace Ghost.Graphics.Test.RenderPasses;
@@ -27,6 +29,7 @@ public unsafe partial class TestRenderPipeline : IRenderPipeline
public Handle<Material> material;
public uint globalIndex;
public uint viewIndex;
public uint instanceIndex;
}
private readonly RenderGraph _renderGraph;
@@ -70,6 +73,43 @@ public unsafe partial class TestRenderPipeline : IRenderPipeline
renderSystem.GraphicsEngine.ShaderCompiler.CompilePass(in pass, in config, variantKey).GetValueOrThrow();
}
private static float3 IntersectFrustumPlanes(float4 p0, float4 p1, float4 p2)
{
float3 n0 = p0.xyz;
float3 n1 = p1.xyz;
float3 n2 = p2.xyz;
float det = math.dot(math.cross(n0, n1), n2);
return (math.cross(n2, n1) * p0.w + math.cross(n0, n2) * p1.w - math.cross(n0, n1) * p2.w) * (1.0f / det);
}
private static Frustum CreateFrustum(float nearClip, float farClip, float4x4 vp, float3 viewDir, float3 viewPos)
{
var frustum = new Frustum();
Frustum.CalculateFrustumPlanes(vp, ref frustum.planes);
// We need to recalculate the near and far planes otherwise it does not work for oblique projection matrices used for reflection.
var nearPlane = Plane.CreateFromUnitNormalAndPointInPlane(viewDir, viewPos);
nearPlane.Distance -= nearClip;
var farPlane = Plane.CreateFromUnitNormalAndPointInPlane(-viewDir, viewPos);
farPlane.Distance += farClip;
frustum.planes[4] = nearPlane;
frustum.planes[5] = farPlane;
frustum.corners[0] = IntersectFrustumPlanes(frustum.planes[0], frustum.planes[3], frustum.planes[4]);
frustum.corners[1] = IntersectFrustumPlanes(frustum.planes[1], frustum.planes[3], frustum.planes[4]);
frustum.corners[2] = IntersectFrustumPlanes(frustum.planes[0], frustum.planes[2], frustum.planes[4]);
frustum.corners[3] = IntersectFrustumPlanes(frustum.planes[1], frustum.planes[2], frustum.planes[4]);
frustum.corners[4] = IntersectFrustumPlanes(frustum.planes[0], frustum.planes[3], frustum.planes[5]);
frustum.corners[5] = IntersectFrustumPlanes(frustum.planes[1], frustum.planes[3], frustum.planes[5]);
frustum.corners[6] = IntersectFrustumPlanes(frustum.planes[0], frustum.planes[2], frustum.planes[5]);
frustum.corners[7] = IntersectFrustumPlanes(frustum.planes[1], frustum.planes[2], frustum.planes[5]);
return frustum;
}
public void Render(RenderContext ctx, ReadOnlySpan<RenderRequest> requests)
{
var resourceManager = _renderSystem.ResourceManager;
@@ -86,121 +126,278 @@ public unsafe partial class TestRenderPipeline : IRenderPipeline
continue; // Nothing to render
}
var instanceDataSize = (uint)(instanceCount * sizeof(InstanceData));
var instanceBufferDesc = ResourceDesc.Buffer(new BufferDesc
Handle<Texture> rt;
if (request.swapChainIndex < 0)
{
Size = instanceDataSize,
Stride = (uint)sizeof(InstanceData),
Usage = BufferUsage.Raw | BufferUsage.ShaderResource,
MemoryType = ResourceMemoryType.Upload, // Upload directly for simplicity in testing
});
// TODO: Optimize by suballocation.
var instanceBufferHandle = resourceManager.GetPooledResource(instanceBufferDesc);
var instanceBufferResource = instanceBufferHandle.AsGraphicsBuffer();
var instanceDataArray = new InstanceData[instanceCount];
var instanceIdx = 0;
foreach (var record in request.opaqueRenderList)
{
instanceDataArray[instanceIdx++] = new InstanceData
{
localToWorld = record.localToWorld
};
rt = request.colorTarget;
}
ctx.CommandBuffer.UploadBuffer(instanceBufferResource, instanceDataArray.AsSpan());
// 2. Allocate and populate View Data buffer
var viewDataSize = (uint)sizeof(PerViewData);
var viewBufferDesc = ResourceDesc.Buffer(new BufferDesc
else if (_renderSystem.SwapChainManager.TryGetSwapChain(request.swapChainIndex, out var swapChain))
{
Size = viewDataSize,
Stride = viewDataSize,
Usage = BufferUsage.Raw | BufferUsage.ShaderResource,
MemoryType = ResourceMemoryType.Upload,
});
var viewBufferHandle = resourceManager.GetPooledResource(viewBufferDesc);
var viewBufferResource = viewBufferHandle.AsGraphicsBuffer();
var viewData = new PerViewData
{
viewMatrix = request.view.viewMatrix,
projectionMatrix = request.view.projectionMatrix,
cameraPosition = request.view.position,
nearClip = request.view.nearClipPlane,
cameraDirection = request.view.viewMatrix.c2.xyz, // check if that's correct orientation
farClip = request.view.farClipPlane,
screenSize = new float4(request.view.sensorSize.x, request.view.sensorSize.y, 1.0f / request.view.sensorSize.x, 1.0f / request.view.sensorSize.y)
};
ctx.CommandBuffer.UploadBuffer(viewBufferResource, new ReadOnlySpan<PerViewData>(in viewData));
// 3. Allocate and populate Global Frame Data buffer
var frameDataSize = (uint)sizeof(GlobalFrameData);
var frameBufferDesc = ResourceDesc.Buffer(new BufferDesc
{
Size = frameDataSize,
Stride = frameDataSize,
Usage = BufferUsage.Raw | BufferUsage.ShaderResource, // or CBV? Let's use Raw to keep it consistent
MemoryType = ResourceMemoryType.Upload,
});
var frameBufferHandle = resourceManager.GetPooledResource(frameBufferDesc);
var frameBufferResource = frameBufferHandle.AsGraphicsBuffer();
var frameData = new GlobalFrameData
{
viewBufferIndex = resourceDatabase.GetBindlessIndex(viewBufferResource.AsResource()),
instanceBufferIndex = resourceDatabase.GetBindlessIndex(instanceBufferResource.AsResource()),
};
ctx.CommandBuffer.UploadBuffer(frameBufferResource, new ReadOnlySpan<GlobalFrameData>(in frameData));
if (request.renderFunc != null)
{
request.renderFunc(in ctx, in request);
rt = swapChain.GetCurrentBackBuffer();
}
else
{
var backBuffer = _renderGraph.ImportTexture(request.colorTarget, "BackBuffer", clearAtFirstUse: true, discardAtLastUse: false);
MeshletDebugPass(backBuffer, request.opaqueRenderList, resourceDatabase.GetBindlessIndex(frameBufferResource.AsResource()), resourceDatabase.GetBindlessIndex(viewBufferResource.AsResource()));
continue;
}
// We must enqueue a return for the pooled resources so they are freed next frame.
resourceManager.ReturnPooledResource(instanceBufferHandle);
resourceManager.ReturnPooledResource(viewBufferHandle);
resourceManager.ReturnPooledResource(frameBufferHandle);
Handle<GPUResource> instanceBufferHandle = default;
Handle<GPUResource> viewBufferHandle = default;
Handle<GPUResource> frameBufferHandle = default;
try
{
var rtResult = _renderSystem.GraphicsEngine.ResourceDatabase.GetResourceDescription(rt.AsResource());
if (rtResult.IsFailure)
{
continue;
}
var rtSize = new uint2(rtResult.Value.TextureDescription.Width, rtResult.Value.TextureDescription.Height);
var aspectScreen = (float)rtSize.x / rtSize.y;
// NOTE: We assume camera's scale is always (1, 1, 1). Otherwise fastinverse will fail and we need to use regular inverse which is more expensive.
var viewMatrix = math.fastinverse(request.view.localToWorld);
var vfov = 2.0f * math.atan(request.view.sensorSize.y / 2.0f * request.view.focalLength);
var hfov = 2.0f * math.atan(request.view.sensorSize.x / 2.0f * request.view.focalLength);
var aspectSensor = request.view.sensorSize.x / request.view.sensorSize.y;
float vfovF;
switch (request.view.gateFit)
{
case GateFit.Vertical:
vfovF = vfov;
break;
case GateFit.Horizontal:
// Adjust VFOV so that the sensor width fits the screen width
var horizontalAspectBuffer = math.tan(hfov * 0.5f);
vfovF = 2.0f * math.atan(horizontalAspectBuffer / aspectScreen);
break;
case GateFit.Fill:
if (aspectSensor > aspectScreen)
{
goto case GateFit.Vertical;
}
else
{
goto case GateFit.Horizontal;
}
case GateFit.Overscan:
if (aspectSensor > aspectScreen)
{
goto case GateFit.Horizontal;
}
else
{
goto case GateFit.Vertical;
}
default:
vfovF = vfov;
break;
}
var m_00 = 1.0f / aspectScreen * math.tan(vfovF * 0.5f);
var m_11 = 1.0f / math.tan(vfovF * 0.5f);
var m_22 = -(request.view.farClipPlane + request.view.nearClipPlane) / (request.view.farClipPlane - request.view.nearClipPlane);
var m_23 = -(2.0f * request.view.farClipPlane * request.view.nearClipPlane) / (request.view.farClipPlane - request.view.nearClipPlane);
var projectionMatrix = new float4x4
(
m_00, 0, 0, 0,
0, m_11, 0, 0,
0, 0, m_22, m_23,
0, 0, -1, 0
);
//var vp = math.mul(projectionMatrix, viewMatrix);
//var viewDir = math.normalize(request.view.localToWorld.c2.xyz);
//var viewPos = request.view.localToWorld.c3.xyz;
//var frustum = CreateFrustum(request.view.nearClipPlane, request.view.farClipPlane, vp, viewDir, viewPos);
var instanceDataSize = (uint)(instanceCount * sizeof(InstanceData));
var instanceBufferDesc = ResourceDesc.Buffer(new BufferDesc
{
Size = instanceDataSize,
Stride = (uint)sizeof(InstanceData),
Usage = BufferUsage.Raw | BufferUsage.ShaderResource,
MemoryType = ResourceMemoryType.Upload, // Upload directly for simplicity in testing
});
// TODO: Optimize by suballocation.
instanceBufferHandle = resourceManager.GetPooledResource(instanceBufferDesc);
var instanceBufferResource = instanceBufferHandle.AsGraphicsBuffer();
var instanceDataArray = new InstanceData[instanceCount];
var instanceIdx = 0;
foreach (var record in request.opaqueRenderList)
{
var (mesh, error) = resourceManager.GetMeshReference(record.mesh);
if (error.IsFailure)
{
continue;
}
(var mat, error) = resourceManager.GetMaterialReference(_meshletMaterial);
if (error.IsFailure)
{
continue;
}
instanceDataArray[instanceIdx++] = new InstanceData
{
localToWorld = record.localToWorld,
meshBufferIndex = resourceDatabase.GetBindlessIndex(mesh.Get().ObjectDataBuffer.AsResource()),
materialBufferIndex = resourceDatabase.GetBindlessIndex(mat.Get()._cBufferCache.GpuResource.AsResource())
};
}
ctx.CommandBuffer.Barrier(BarrierDesc.Buffer(instanceBufferHandle, null, BarrierSync.Copy, null, BarrierAccess.CopyDest));
ctx.CommandBuffer.UploadBuffer(instanceBufferResource, instanceDataArray.AsSpan());
ctx.CommandBuffer.Barrier(BarrierDesc.Buffer(instanceBufferHandle, BarrierSync.Copy, BarrierSync.AllShading, BarrierAccess.CopyDest, BarrierAccess.ShaderResource));
// 2. Allocate and populate View Data buffer
var viewDataSize = (uint)sizeof(ViewData);
var viewBufferDesc = ResourceDesc.Buffer(new BufferDesc
{
Size = viewDataSize,
Stride = viewDataSize,
Usage = BufferUsage.Raw | BufferUsage.ShaderResource,
MemoryType = ResourceMemoryType.Upload,
});
viewBufferHandle = resourceManager.GetPooledResource(viewBufferDesc);
var viewData = new ViewData
{
viewMatrix = viewMatrix,
projectionMatrix = projectionMatrix,
cameraPosition = request.view.localToWorld.c3.xyz,
nearClip = request.view.nearClipPlane,
cameraDirection = viewMatrix.c2.xyz, // check if that's correct orientation
farClip = request.view.farClipPlane,
screenSize = new float4(request.view.sensorSize.x, request.view.sensorSize.y, 1.0f / request.view.sensorSize.x, 1.0f / request.view.sensorSize.y)
};
ctx.CommandBuffer.Barrier(BarrierDesc.Buffer(viewBufferHandle, null, BarrierSync.Copy, null, BarrierAccess.CopyDest));
ctx.CommandBuffer.UploadBuffer(viewBufferHandle.AsGraphicsBuffer(), new ReadOnlySpan<ViewData>(in viewData));
ctx.CommandBuffer.Barrier(BarrierDesc.Buffer(viewBufferHandle, BarrierSync.Copy, BarrierSync.AllShading, BarrierAccess.CopyDest, BarrierAccess.ShaderResource));
// 3. Allocate and populate Global Frame Data buffer
var frameDataSize = (uint)sizeof(FrameData);
var frameBufferDesc = ResourceDesc.Buffer(new BufferDesc
{
Size = frameDataSize,
Stride = frameDataSize,
Usage = BufferUsage.Raw | BufferUsage.ShaderResource,
MemoryType = ResourceMemoryType.Upload,
});
frameBufferHandle = resourceManager.GetPooledResource(frameBufferDesc);
var frameData = new FrameData
{
viewBufferIndex = resourceDatabase.GetBindlessIndex(viewBufferHandle),
instanceBufferIndex = resourceDatabase.GetBindlessIndex(instanceBufferResource.AsResource()),
};
ctx.CommandBuffer.Barrier(BarrierDesc.Buffer(frameBufferHandle, null, BarrierSync.Copy, null, BarrierAccess.CopyDest));
ctx.CommandBuffer.UploadBuffer(frameBufferHandle.AsGraphicsBuffer(), new ReadOnlySpan<FrameData>(in frameData));
ctx.CommandBuffer.Barrier(BarrierDesc.Buffer(frameBufferHandle, BarrierSync.Copy, BarrierSync.AllShading, BarrierAccess.CopyDest, BarrierAccess.ShaderResource));
if (request.renderFunc != null)
{
request.renderFunc(in ctx, in request);
}
else
{
_renderGraph.Reset();
var backBuffer = _renderGraph.ImportTexture(rt, "BackBuffer");
MeshletDebugPass(backBuffer, request.opaqueRenderList,
resourceDatabase.GetBindlessIndex(frameBufferHandle), resourceDatabase.GetBindlessIndex(viewBufferHandle), resourceDatabase.GetBindlessIndex(instanceBufferHandle));
var viewState = new ViewState(rtSize.x, rtSize.y, rtSize.x, rtSize.y);
_renderGraph.Compile(viewState);
_renderGraph.Execute(ctx.CommandBuffer);
}
}
finally
{
// We must enqueue a return for the pooled resources so they are freed next frame.
resourceManager.ReturnPooledResource(instanceBufferHandle);
resourceManager.ReturnPooledResource(viewBufferHandle);
resourceManager.ReturnPooledResource(frameBufferHandle);
if (request.swapChainIndex >= 0)
{
_renderSystem.SwapChainManager.ReleaseSwapChain(request.swapChainIndex);
}
}
}
}
private void MeshletDebugPass(Identifier<RGTexture> backbuffer, RenderList renderList, uint globalIndex, uint viewIndex)
// FIX: We still not change our root signature layout yet.
private void MeshletDebugPass(Identifier<RGTexture> backbuffer, RenderList renderList, uint globalIndex, uint viewIndex, uint instanceBuffer)
{
using (var builder = _renderGraph.AddRasterRenderPass<MeshletDebugPassData>("Meshlet Debug Pass", out var passData))
{
passData.backbuffer = backbuffer;
passData.renderList = renderList;
passData.globalIndex = globalIndex;
passData.viewIndex = viewIndex;
passData.instanceIndex = instanceBuffer;
passData.material = _meshletMaterial;
builder.SetColorAttachment(backbuffer, 0);
builder.SetRenderFunc<MeshletDebugPassData>(static (data, ctx)=>
{
//var cmd = ctx.GetCommandBufferUnsafe();
//var (backBufferDesc, err) = ctx.ResourceDatabase.GetResourceDescription(ctx.GetActualResource(data.backbuffer.AsResource()));
//if (err != Error.None)
//{
// return;
//}
//var viewportDesc = new ViewportDesc
//{
// X = 0,
// Y = 0,
// Width = backBufferDesc.TextureDescription.Width,
// Height = backBufferDesc.TextureDescription.Height,
// MinDepth = 0,
// MaxDepth = 1
//};
//var rectDesc = new ScissorRectDesc
//{
// Left = 0,
// Top = 0,
// Right = backBufferDesc.TextureDescription.Width,
// Bottom = backBufferDesc.TextureDescription.Height
//};
//cmd.SetViewport(viewportDesc);
//cmd.SetScissorRect(rectDesc);
ctx.SetGlobalData(data.globalIndex, data.viewIndex);
ctx.SetInstanceData(data.instanceIndex);
ctx.SetActiveMaterial(data.material);
var instanceIndex = 0u;
foreach (var record in data.renderList)
{
ctx.SetActiveMesh(record.mesh);
ctx.SetInstanceIndex(instanceIndex);
var meshRefResult = ctx.ResourceManager.GetMeshReference(record.mesh);
if (meshRefResult.IsSuccess)
{
var meshletCount = (uint)meshRefResult.Value.MeshletData.meshlets.Count;
var meshletCount = (uint)meshRefResult.Value.MeshletData.meshletCount;
ctx.DispatchMesh(new uint3(meshletCount, 1, 1));
}
instanceIndex++;

View File

@@ -49,7 +49,7 @@ public sealed partial class GraphicsTestWindow : Window
InitialRenderPipelineSettings = new RenderPasses.TestRenderPipelineSettings()
});
_swapChain = _renderSystem.GraphicsEngine.CreateSwapChain(new SwapChainDesc
_swapChain = _renderSystem.SwapChainManager.EnsureSwapChain(0, new SwapChainDesc
{
Width = (uint)AppWindow.Size.Width,
Height = (uint)AppWindow.Size.Height,
@@ -80,7 +80,7 @@ public sealed partial class GraphicsTestWindow : Window
_world.EntityManager.SetComponent(cameraEntity, new Camera
{
colorTarget = _swapChain.GetCurrentBackBuffer(), // NOTE: This should be updated every frame to the current back buffer.
swapChainIndex = 0,
depthTarget = Handle<Texture>.Invalid,
nearClipPlane = 0.1f,
farClipPlane = 1000.0f,
@@ -112,7 +112,6 @@ public sealed partial class GraphicsTestWindow : Window
_renderSystem.GraphicsEngine.Device.GraphicsQueue.Submit(directCmd);
_renderSystem.GraphicsEngine.Device.GraphicsQueue.WaitIdle();
var meshSet = new ComponentSet(scope.AllocationHandle, ComponentTypeID<MeshInstance>.Value, ComponentTypeID<LocalToWorld>.Value);
var meshEntity = _world.EntityManager.CreateEntity(meshSet);
_world.EntityManager.SetComponent(meshEntity, new MeshInstance
@@ -183,16 +182,6 @@ public sealed partial class GraphicsTestWindow : Window
if (_renderSystem.CPUFenceValue < _renderSystem.GPUFenceValue + _renderSystem.MaxFrameLatency)
{
var queryID = new QueryBuilder().WithAll<Camera>().Build(_world);
ref var query = ref _world.ComponentManager.GetEntityQueryReference(queryID);
// FIX: A critical bug that resize happens on the render thread, but OnRendering invoke on the UI thread, and there is a chance that our extraction system already send the request to the render thread with old back buffer handle, which is already become invalid after resize.
// A proper solution is to use swap chain manager, camera only reference the id of the swap chain, and we will extract the current back buffer handle on the render thread.
foreach (ref var cam in query.GetComponentIterator<Camera>())
{
cam.colorTarget = _swapChain.GetCurrentBackBuffer();
}
_world.SystemManager.UpdateAll(default);
_renderSystem.SignalCPUReady();
}