Added blas-tlas;
Added Multi-Scattering GGX;
This commit is contained in:
@@ -182,7 +182,7 @@ void create_orthonormal_basis(vec3s direction, vec3s* u, vec3s* v)
|
||||
*v = glms_vec3_normalize(glms_vec3_cross(direction, *u));
|
||||
}
|
||||
|
||||
vec3s random_cosine_direction_angular(vec3s direction, float angular, uint32_t index, uint32_t d1, uint32_t d2, uint32_t scramble)
|
||||
vec3s random_cosine_direction_angular(vec3s direction, float angular, uint32_t index, uint16_t d1, uint16_t d2, uint32_t scramble)
|
||||
{
|
||||
vec3s local_dir = sample_cosine_weighted_hemisphere_z_angular(angular, index, d1, d2, scramble);
|
||||
|
||||
@@ -200,7 +200,7 @@ vec3s random_cosine_direction_angular(vec3s direction, float angular, uint32_t i
|
||||
|
||||
// Samples a direction from the hemisphere oriented along 'normal'
|
||||
// with a cosine-weighted distribution.
|
||||
vec3s random_cosine_direction(vec3s direction, uint32_t index, uint32_t d1, uint32_t d2, uint32_t scramble)
|
||||
vec3s random_cosine_direction(vec3s direction, uint32_t index, uint16_t d1, uint16_t d2, uint32_t scramble)
|
||||
{
|
||||
vec3s local_dir = sample_cosine_weighted_hemisphere_z(index, d1, d2, scramble);
|
||||
|
||||
|
||||
172
source/Algorithm/GGXMultiScatter.c
Normal file
172
source/Algorithm/GGXMultiScatter.c
Normal file
@@ -0,0 +1,172 @@
|
||||
#include "Algorithm/GGXMultiScatter.h"
|
||||
|
||||
#include "Algorithm/MicrofacetGGX.h"
|
||||
|
||||
#define GGX_MS_LUT_NOV_SIZE 32
|
||||
#define GGX_MS_LUT_ROUGH_SIZE 32
|
||||
#define GGX_MS_LUT_SAMPLES 256
|
||||
|
||||
static float g_ggx_E_lut[GGX_MS_LUT_ROUGH_SIZE][GGX_MS_LUT_NOV_SIZE];
|
||||
static float g_ggx_Eavg_lut[GGX_MS_LUT_ROUGH_SIZE];
|
||||
static bool g_ggx_ms_lut_initialized = false;
|
||||
|
||||
static inline float saturatef(float x)
|
||||
{
|
||||
return fminf(fmaxf(x, 0.0f), 1.0f);
|
||||
}
|
||||
|
||||
static inline float radical_inverse_vdc(uint32_t bits)
|
||||
{
|
||||
bits = (bits << 16u) | (bits >> 16u);
|
||||
bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u);
|
||||
bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u);
|
||||
bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u);
|
||||
bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u);
|
||||
return (float)bits * 2.3283064365386963e-10f; // / 2^32
|
||||
}
|
||||
|
||||
static inline vec3s vec3_div_safe(vec3s a, vec3s b, float eps)
|
||||
{
|
||||
return (vec3s){a.x / fmaxf(b.x, eps), a.y / fmaxf(b.y, eps), a.z / fmaxf(b.z, eps)};
|
||||
}
|
||||
|
||||
static inline vec3s fresnel_schlick_avg(vec3s f0)
|
||||
{
|
||||
// Cosine-weighted hemispherical average for Schlick Fresnel.
|
||||
// Approximation: <(1 - cos)^5>_cos = 1/21.
|
||||
const float t = 1.0f / 21.0f;
|
||||
return glms_vec3_add(f0, glms_vec3_scale(glms_vec3_sub(glms_vec3_one(), f0), t));
|
||||
}
|
||||
|
||||
static inline float ggx_ms_Eavg(float roughness)
|
||||
{
|
||||
ggx_ms_init_lut_once();
|
||||
|
||||
roughness = saturatef(roughness);
|
||||
float y = roughness * (float)(GGX_MS_LUT_ROUGH_SIZE - 1);
|
||||
uint32_t y0 = (uint32_t)floorf(y);
|
||||
uint32_t y1 = (y0 + 1u < GGX_MS_LUT_ROUGH_SIZE) ? (y0 + 1u) : y0;
|
||||
float ty = y - (float)y0;
|
||||
return saturatef(glm_lerp(g_ggx_Eavg_lut[y0], g_ggx_Eavg_lut[y1], ty));
|
||||
}
|
||||
|
||||
float ggx_ms_E(float NoV, float roughness)
|
||||
{
|
||||
ggx_ms_init_lut_once();
|
||||
|
||||
NoV = saturatef(NoV);
|
||||
roughness = saturatef(roughness);
|
||||
|
||||
float x = NoV * (float)(GGX_MS_LUT_NOV_SIZE - 1);
|
||||
float y = roughness * (float)(GGX_MS_LUT_ROUGH_SIZE - 1);
|
||||
|
||||
uint32_t x0 = (uint32_t)floorf(x);
|
||||
uint32_t y0 = (uint32_t)floorf(y);
|
||||
uint32_t x1 = (x0 + 1u < GGX_MS_LUT_NOV_SIZE) ? (x0 + 1u) : x0;
|
||||
uint32_t y1 = (y0 + 1u < GGX_MS_LUT_ROUGH_SIZE) ? (y0 + 1u) : y0;
|
||||
|
||||
float tx = x - (float)x0;
|
||||
float ty = y - (float)y0;
|
||||
|
||||
float e00 = g_ggx_E_lut[y0][x0];
|
||||
float e10 = g_ggx_E_lut[y0][x1];
|
||||
float e01 = g_ggx_E_lut[y1][x0];
|
||||
float e11 = g_ggx_E_lut[y1][x1];
|
||||
|
||||
float e0 = glm_lerp(e00, e10, tx);
|
||||
float e1 = glm_lerp(e01, e11, tx);
|
||||
return saturatef(glm_lerp(e0, e1, ty));
|
||||
}
|
||||
|
||||
void ggx_ms_init_lut_once(void)
|
||||
{
|
||||
if (g_ggx_ms_lut_initialized)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp critical(ggx_ms_lut_init)
|
||||
#endif
|
||||
{
|
||||
if (!g_ggx_ms_lut_initialized)
|
||||
{
|
||||
vec3s n = (vec3s){0.0f, 0.0f, 1.0f};
|
||||
|
||||
for (uint32_t ry = 0; ry < GGX_MS_LUT_ROUGH_SIZE; ry++)
|
||||
{
|
||||
float roughness = ((float)ry + 0.5f) / (float)GGX_MS_LUT_ROUGH_SIZE;
|
||||
roughness = fmaxf(roughness, 0.001f);
|
||||
|
||||
float Eavg = 0.0f;
|
||||
|
||||
for (uint32_t ix = 0; ix < GGX_MS_LUT_NOV_SIZE; ix++)
|
||||
{
|
||||
float NoV = ((float)ix + 0.5f) / (float)GGX_MS_LUT_NOV_SIZE;
|
||||
NoV = fmaxf(NoV, 1e-4f);
|
||||
|
||||
float sin_theta = sqrtf(fmaxf(0.0f, 1.0f - NoV * NoV));
|
||||
vec3s v = glms_vec3_normalize((vec3s){sin_theta, 0.0f, NoV});
|
||||
|
||||
float sum = 0.0f;
|
||||
uint32_t valid = 0;
|
||||
uint32_t scramble = hash_uint32((ry + 1u) * 0x9E3779B9u ^ (ix + 1u) * 0x7F4A7C15u);
|
||||
|
||||
for (uint32_t s = 0; s < GGX_MS_LUT_SAMPLES; s++)
|
||||
{
|
||||
float u1 = ((float)s + 0.5f) / (float)GGX_MS_LUT_SAMPLES;
|
||||
float u2 = radical_inverse_vdc(s ^ scramble);
|
||||
|
||||
vec3s h = ggx_sample_vndf(n, v, roughness, u1, u2);
|
||||
vec3s l = glms_vec3_reflect(glms_vec3_negate(v), h);
|
||||
float NoL = l.z;
|
||||
if (NoL <= 0.0f)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// For F=1, importance sampling with VNDF yields a simple estimator:
|
||||
// rho_ss(v) = E[ G1(NoL) ].
|
||||
sum += ggx_g1(NoL, roughness);
|
||||
valid++;
|
||||
}
|
||||
|
||||
float E = (valid > 0u) ? (sum / (float)valid) : 0.0f;
|
||||
E = saturatef(E);
|
||||
|
||||
g_ggx_E_lut[ry][ix] = E;
|
||||
|
||||
// Accumulate hemispherical average with cosine weighting:
|
||||
// Eavg = 2 * \int_0^1 E(mu) * mu dmu
|
||||
float mu = NoV;
|
||||
Eavg += E * mu;
|
||||
}
|
||||
|
||||
Eavg = 2.0f * Eavg * (1.0f / (float)GGX_MS_LUT_NOV_SIZE);
|
||||
g_ggx_Eavg_lut[ry] = saturatef(Eavg);
|
||||
}
|
||||
|
||||
g_ggx_ms_lut_initialized = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vec3s ggx_multi_scatter_lambert(vec3s f0, float NoV, float NoL, float roughness)
|
||||
{
|
||||
float Eo = ggx_ms_E(NoV, roughness);
|
||||
float Ei = ggx_ms_E(NoL, roughness);
|
||||
float Eavg = ggx_ms_Eavg(roughness);
|
||||
|
||||
vec3s Favg = fresnel_schlick_avg(f0);
|
||||
|
||||
// Series factor for multiple bounces inside the microfacet layer.
|
||||
// C = Favg^2 / (1 - Favg * (1 - Eavg))
|
||||
vec3s Favg2 = glms_vec3_mul(Favg, Favg);
|
||||
vec3s denom = glms_vec3_sub(glms_vec3_one(), glms_vec3_scale(Favg, (1.0f - Eavg)));
|
||||
vec3s C = vec3_div_safe(Favg2, denom, 1e-6f);
|
||||
|
||||
float scale = (1.0f - Eo) * (1.0f - Ei);
|
||||
scale /= (PI * fmaxf(1.0f - Eavg, 1e-6f));
|
||||
|
||||
return glms_vec3_scale(C, scale);
|
||||
}
|
||||
89
source/Algorithm/MicrofacetGGX.c
Normal file
89
source/Algorithm/MicrofacetGGX.c
Normal file
@@ -0,0 +1,89 @@
|
||||
#include "Algorithm/MicrofacetGGX.h"
|
||||
|
||||
// Trowbridge-Reitz GGX Normal Distribution Function
|
||||
float ggx_distribution(float n_dot_h, float roughness)
|
||||
{
|
||||
float a = roughness * roughness;
|
||||
float a2 = a * a;
|
||||
float n_dot_h2 = n_dot_h * n_dot_h;
|
||||
|
||||
float nom = a2;
|
||||
float denom = (n_dot_h2 * (a2 - 1.0f) + 1.0f);
|
||||
denom = PI * denom * denom;
|
||||
|
||||
return nom / fmaxf(denom, 1e-20f);
|
||||
}
|
||||
|
||||
float ggx_g1(float n_dot_v, float roughness)
|
||||
{
|
||||
if (n_dot_v <= 0.0f)
|
||||
{
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
// Our GGX D() uses alpha = roughness^2, so keep the same convention here.
|
||||
float alpha = roughness * roughness;
|
||||
float alpha2 = alpha * alpha;
|
||||
|
||||
float n2 = n_dot_v * n_dot_v;
|
||||
float denom = n_dot_v + sqrtf(alpha2 + (1.0f - alpha2) * n2);
|
||||
return (2.0f * n_dot_v) / fmaxf(denom, FLT_EPSILON);
|
||||
}
|
||||
|
||||
float ggx_g_smith(float n_dot_v, float n_dot_l, float roughness)
|
||||
{
|
||||
return ggx_g1(n_dot_v, roughness) * ggx_g1(n_dot_l, roughness);
|
||||
}
|
||||
|
||||
vec3s ggx_sample_vndf(vec3s n, vec3s v, float roughness, float u1, float u2)
|
||||
{
|
||||
// Build local frame around n.
|
||||
vec3s tangent, bitangent;
|
||||
create_orthonormal_basis(n, &tangent, &bitangent);
|
||||
|
||||
// View direction in local coordinates.
|
||||
vec3s v_local = (vec3s){glms_vec3_dot(v, tangent), glms_vec3_dot(v, bitangent), glms_vec3_dot(v, n)};
|
||||
v_local = glms_vec3_normalize(v_local);
|
||||
|
||||
// Stretch view.
|
||||
float alpha = roughness * roughness;
|
||||
vec3s v_h = (vec3s){alpha * v_local.x, alpha * v_local.y, v_local.z};
|
||||
v_h = glms_vec3_normalize(v_h);
|
||||
|
||||
// Orthonormal basis around v_h.
|
||||
vec3s t1;
|
||||
if (v_h.z < 0.9999f)
|
||||
{
|
||||
t1 = glms_vec3_normalize(glms_vec3_cross((vec3s){0.0f, 0.0f, 1.0f}, v_h));
|
||||
}
|
||||
else
|
||||
{
|
||||
t1 = (vec3s){1.0f, 0.0f, 0.0f};
|
||||
}
|
||||
vec3s t2 = glms_vec3_cross(v_h, t1);
|
||||
|
||||
// Sample a point on a disk.
|
||||
float r = sqrtf(u1);
|
||||
float phi = TWO_PI * u2;
|
||||
float t1p = r * cosf(phi);
|
||||
float t2p = r * sinf(phi);
|
||||
|
||||
// Warp to the hemisphere.
|
||||
float s = 0.5f * (1.0f + v_h.z);
|
||||
t2p = (1.0f - s) * sqrtf(fmaxf(0.0f, 1.0f - t1p * t1p)) + s * t2p;
|
||||
|
||||
vec3s n_h = glms_vec3_add(
|
||||
glms_vec3_add(glms_vec3_scale(t1, t1p), glms_vec3_scale(t2, t2p)),
|
||||
glms_vec3_scale(v_h, sqrtf(fmaxf(0.0f, 1.0f - t1p * t1p - t2p * t2p))));
|
||||
|
||||
// Unstretch.
|
||||
vec3s h_local = (vec3s){alpha * n_h.x, alpha * n_h.y, fmaxf(0.0f, n_h.z)};
|
||||
h_local = glms_vec3_normalize(h_local);
|
||||
|
||||
// Back to world.
|
||||
vec3s h_world = glms_vec3_add(
|
||||
glms_vec3_add(glms_vec3_scale(tangent, h_local.x), glms_vec3_scale(bitangent, h_local.y)),
|
||||
glms_vec3_scale(n, h_local.z));
|
||||
|
||||
return glms_vec3_normalize(h_world);
|
||||
}
|
||||
@@ -35,6 +35,22 @@ static inline shading_context_t make_shading_context(const scene_t* scene,
|
||||
float cone_width,
|
||||
float spread_angle)
|
||||
{
|
||||
const triangle_collection_t* triangles = &scene->triangles;
|
||||
const bvh_tree_t* bvh_tree = &scene->bvh_tree;
|
||||
if (scene->tlas.nodes != NULL && hit->instance_id != UINT32_MAX && hit->instance_id < scene->mesh_instances.capacity)
|
||||
{
|
||||
const mesh_instance_t* inst = &scene->mesh_instances.buffer[hit->instance_id];
|
||||
if (inst->active && inst->model_id < scene->mesh_models.capacity)
|
||||
{
|
||||
const mesh_model_t* model = &scene->mesh_models.buffer[inst->model_id];
|
||||
if (model->active)
|
||||
{
|
||||
triangles = &model->triangles;
|
||||
bvh_tree = &model->blas;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (shading_context_t){
|
||||
.camera_position = scene->camera.position,
|
||||
.camera_direction = glms_vec3_normalize(glms_vec3_sub(hit->point, scene->camera.position)),
|
||||
@@ -49,8 +65,10 @@ static inline shading_context_t make_shading_context(const scene_t* scene,
|
||||
.sample_index = sample_index,
|
||||
.bounce_depth = bounce_depth,
|
||||
|
||||
.bvh_tree = &scene->bvh_tree,
|
||||
.triangles = &scene->triangles,
|
||||
.scene = scene,
|
||||
|
||||
.bvh_tree = bvh_tree,
|
||||
.triangles = triangles,
|
||||
.lights = &scene->lights,
|
||||
.textures = &scene->textures,
|
||||
|
||||
@@ -72,7 +90,7 @@ static void trace_surface_aovs_only(const scene_t* scene,
|
||||
return;
|
||||
}
|
||||
|
||||
const material_t* hit_material = &scene->materials.buffer[scene->triangles.buffer[closest_hit.triangle_id].material_id];
|
||||
const material_t* hit_material = &scene->materials.buffer[closest_hit.material_id];
|
||||
|
||||
float cone_width = ray.width + closest_hit.distance * ray.spread_angle;
|
||||
shading_context_t shading_context = make_shading_context(scene,
|
||||
@@ -155,6 +173,8 @@ static void trace_lighting_aovs(const scene_t* scene,
|
||||
vec3s throughput = glms_vec3_one();
|
||||
ray_t active_ray = ray;
|
||||
float last_bsdf_pdf = 0.0f;
|
||||
vec3s last_surface_normal = glms_vec3_zero();
|
||||
bool has_last_surface_normal = false;
|
||||
|
||||
uint16_t depth = 0;
|
||||
while (depth < max_depth)
|
||||
@@ -166,6 +186,8 @@ static void trace_lighting_aovs(const scene_t* scene,
|
||||
light_shading_context_t light_context =
|
||||
{
|
||||
.wo = active_ray.direction,
|
||||
.normal = has_last_surface_normal ? last_surface_normal : glms_vec3_zero(),
|
||||
.bounce_depth = depth,
|
||||
.textures = &scene->textures,
|
||||
.spread_angle = active_ray.spread_angle,
|
||||
};
|
||||
@@ -187,9 +209,8 @@ static void trace_lighting_aovs(const scene_t* scene,
|
||||
break;
|
||||
}
|
||||
|
||||
uint8_t material_id = scene->triangles.buffer[closest_hit.triangle_id].material_id;
|
||||
uint8_t material_id = closest_hit.material_id;
|
||||
const material_t* hit_material = &scene->materials.buffer[material_id];
|
||||
|
||||
float current_cone_width = active_ray.width + closest_hit.distance * active_ray.spread_angle;
|
||||
shading_context_t shading_context = make_shading_context(scene,
|
||||
active_ray.direction,
|
||||
@@ -200,6 +221,9 @@ static void trace_lighting_aovs(const scene_t* scene,
|
||||
current_cone_width,
|
||||
active_ray.spread_angle);
|
||||
|
||||
last_surface_normal = shading_context.normal;
|
||||
has_last_surface_normal = true;
|
||||
|
||||
// First-hit surface AOVs are still cheap; record them if requested.
|
||||
if (depth == 0 && aov_wants_surface(aov_flags))
|
||||
{
|
||||
|
||||
@@ -2,6 +2,27 @@
|
||||
#include "Common.h"
|
||||
#include "Geometry/Triangle.h"
|
||||
#include "cglm/struct/vec3.h"
|
||||
#include "cglm/struct/mat3.h"
|
||||
#include "cglm/struct/mat4.h"
|
||||
|
||||
static inline vec3s mat4_mul_point(mat4s m, vec3s p)
|
||||
{
|
||||
return glms_mat4_mulv3(m, p, 1.0f);
|
||||
}
|
||||
|
||||
static inline vec3s mat4_mul_dir(mat4s m, vec3s v)
|
||||
{
|
||||
return glms_mat4_mulv3(m, v, 0.0f);
|
||||
}
|
||||
|
||||
static inline vec3s mat3_mul(mat3s m, vec3s v)
|
||||
{
|
||||
vec3s out;
|
||||
out.x = m.raw[0][0] * v.x + m.raw[0][1] * v.y + m.raw[0][2] * v.z;
|
||||
out.y = m.raw[1][0] * v.x + m.raw[1][1] * v.y + m.raw[1][2] * v.z;
|
||||
out.z = m.raw[2][0] * v.x + m.raw[2][1] * v.y + m.raw[2][2] * v.z;
|
||||
return out;
|
||||
}
|
||||
|
||||
ray_t ray_create(vec3s origin, vec3s direction, float cone_width, float spread_angle)
|
||||
{
|
||||
@@ -347,12 +368,162 @@ hit_result_t ray_intersect_scene_closest(const ray_t* ray, const scene_t* scene)
|
||||
hit_result_t result = {0};
|
||||
float closest = FLT_MAX;
|
||||
|
||||
if (scene == NULL || scene->bvh_tree.nodes == NULL || scene->triangles.count == 0 || scene->bvh_tree.node_count == 0 || scene->bvh_tree.primitive_count == 0)
|
||||
if (scene == NULL)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
ray_intersect_bvh_closest(ray, scene->bvh_tree.nodes, scene->bvh_tree.primitive_indices, &scene->triangles, 0, &closest, &result);
|
||||
// TLAS/BLAS path (may coexist with legacy geometry).
|
||||
if (scene->tlas.nodes != NULL && scene->tlas.node_count > 0)
|
||||
{
|
||||
typedef struct
|
||||
{
|
||||
uint64_t node_index;
|
||||
float enter;
|
||||
} tlas_stack_entry_t;
|
||||
|
||||
// BVH depth is typically small; avoid per-ray heap alloc.
|
||||
tlas_stack_entry_t stack[128];
|
||||
const int32_t stack_capacity = (int32_t)(sizeof(stack) / sizeof(stack[0]));
|
||||
int32_t stack_size = 0;
|
||||
stack[stack_size++] = (tlas_stack_entry_t){.node_index = 0, .enter = 0.0f};
|
||||
|
||||
while (stack_size > 0)
|
||||
{
|
||||
tlas_stack_entry_t entry = stack[--stack_size];
|
||||
if (entry.enter > closest)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
const bvh_node_t* node = &scene->tlas.nodes[entry.node_index];
|
||||
float enter, exit;
|
||||
if (!ray_intersect_aabb(ray, node->bounds, &enter, &exit) || enter > closest)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (node->primitive_count > 0)
|
||||
{
|
||||
for (uint32_t i = 0; i < node->primitive_count; ++i)
|
||||
{
|
||||
uint64_t instance_id = scene->tlas.primitive_indices[node->start_index + i];
|
||||
if (instance_id >= scene->mesh_instances.capacity)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
const mesh_instance_t* inst = &scene->mesh_instances.buffer[instance_id];
|
||||
if (!inst->active)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
float inst_enter, inst_exit;
|
||||
if (!ray_intersect_aabb(ray, inst->world_bounds, &inst_enter, &inst_exit) || inst_enter > closest)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
uint32_t model_id = inst->model_id;
|
||||
if (model_id >= scene->mesh_models.capacity)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
const mesh_model_t* model = &scene->mesh_models.buffer[model_id];
|
||||
if (!model->active || model->blas.nodes == NULL || model->blas.node_count == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
vec3s local_origin = mat4_mul_point(inst->world_to_local, ray->origin);
|
||||
vec3s local_dir = mat4_mul_dir(inst->world_to_local, ray->direction);
|
||||
ray_t local_ray = ray_create(local_origin, local_dir, ray->width, ray->spread_angle);
|
||||
|
||||
float local_closest = FLT_MAX;
|
||||
hit_result_t local_hit = (hit_result_t){0};
|
||||
ray_intersect_bvh_closest(&local_ray, model->blas.nodes, model->blas.primitive_indices, &model->triangles, 0, &local_closest, &local_hit);
|
||||
if (!local_hit.hit)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
vec3s world_point = mat4_mul_point(inst->local_to_world, local_hit.point);
|
||||
float world_distance = glms_vec3_dot(glms_vec3_sub(world_point, ray->origin), ray->direction);
|
||||
if (world_distance <= ray->esp || world_distance >= closest)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
vec3s world_normal = glms_vec3_normalize(mat3_mul(inst->normal_matrix, local_hit.normal));
|
||||
vec3s world_tangent = glms_vec3_normalize(mat3_mul(inst->normal_matrix, local_hit.tangent));
|
||||
|
||||
result = local_hit;
|
||||
result.hit = true;
|
||||
result.point = world_point;
|
||||
result.normal = world_normal;
|
||||
result.tangent = world_tangent;
|
||||
result.distance = world_distance;
|
||||
result.model_id = model_id;
|
||||
result.instance_id = (uint32_t)instance_id;
|
||||
result.material_id = model->triangles.buffer[local_hit.triangle_id].material_id;
|
||||
closest = world_distance;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
uint64_t left = node->left_child_offset;
|
||||
uint64_t right = node->right_child_offset;
|
||||
|
||||
float left_enter, left_exit, right_enter, right_exit;
|
||||
bool hit_left = ray_intersect_aabb(ray, scene->tlas.nodes[left].bounds, &left_enter, &left_exit);
|
||||
bool hit_right = ray_intersect_aabb(ray, scene->tlas.nodes[right].bounds, &right_enter, &right_exit);
|
||||
|
||||
// Push far node first so near node pops first.
|
||||
if (hit_left && hit_right)
|
||||
{
|
||||
if (left_enter < right_enter)
|
||||
{
|
||||
if (stack_size < stack_capacity) stack[stack_size++] = (tlas_stack_entry_t){.node_index = right, .enter = right_enter};
|
||||
if (stack_size < stack_capacity) stack[stack_size++] = (tlas_stack_entry_t){.node_index = left, .enter = left_enter};
|
||||
}
|
||||
else
|
||||
{
|
||||
if (stack_size < stack_capacity) stack[stack_size++] = (tlas_stack_entry_t){.node_index = left, .enter = left_enter};
|
||||
if (stack_size < stack_capacity) stack[stack_size++] = (tlas_stack_entry_t){.node_index = right, .enter = right_enter};
|
||||
}
|
||||
}
|
||||
else if (hit_left)
|
||||
{
|
||||
if (stack_size < stack_capacity) stack[stack_size++] = (tlas_stack_entry_t){.node_index = left, .enter = left_enter};
|
||||
}
|
||||
else if (hit_right)
|
||||
{
|
||||
if (stack_size < stack_capacity) stack[stack_size++] = (tlas_stack_entry_t){.node_index = right, .enter = right_enter};
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Legacy path (or mixed scenes).
|
||||
if (scene->bvh_tree.nodes != NULL && scene->triangles.count > 0 && scene->bvh_tree.node_count > 0 && scene->bvh_tree.primitive_count > 0)
|
||||
{
|
||||
hit_result_t legacy_hit = (hit_result_t){0};
|
||||
float legacy_closest = closest;
|
||||
ray_intersect_bvh_closest(ray, scene->bvh_tree.nodes, scene->bvh_tree.primitive_indices, &scene->triangles, 0, &legacy_closest, &legacy_hit);
|
||||
if (legacy_hit.hit)
|
||||
{
|
||||
legacy_hit.material_id = scene->triangles.buffer[legacy_hit.triangle_id].material_id;
|
||||
legacy_hit.model_id = UINT32_MAX;
|
||||
legacy_hit.instance_id = UINT32_MAX;
|
||||
result = legacy_hit;
|
||||
closest = legacy_closest;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -360,11 +531,116 @@ hit_result_t ray_intersect_scene_any(const ray_t* ray, const scene_t* scene)
|
||||
{
|
||||
hit_result_t result = {0};
|
||||
result.distance = FLT_MAX;
|
||||
if (scene == NULL || scene->bvh_tree.nodes == NULL || scene->triangles.count == 0 || scene->bvh_tree.node_count == 0 || scene->bvh_tree.primitive_count == 0)
|
||||
|
||||
if (scene == NULL)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
if (scene->tlas.nodes != NULL && scene->tlas.node_count > 0)
|
||||
{
|
||||
uint64_t stack[128];
|
||||
const int32_t stack_capacity = (int32_t)(sizeof(stack) / sizeof(stack[0]));
|
||||
int32_t stack_size = 0;
|
||||
stack[stack_size++] = 0;
|
||||
|
||||
while (stack_size > 0)
|
||||
{
|
||||
uint64_t node_index = stack[--stack_size];
|
||||
const bvh_node_t* node = &scene->tlas.nodes[node_index];
|
||||
|
||||
float enter, exit;
|
||||
if (!ray_intersect_aabb(ray, node->bounds, &enter, &exit))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (node->primitive_count > 0)
|
||||
{
|
||||
for (uint32_t i = 0; i < node->primitive_count; ++i)
|
||||
{
|
||||
uint64_t instance_id = scene->tlas.primitive_indices[node->start_index + i];
|
||||
if (instance_id >= scene->mesh_instances.capacity)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
const mesh_instance_t* inst = &scene->mesh_instances.buffer[instance_id];
|
||||
if (!inst->active)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
float inst_enter, inst_exit;
|
||||
if (!ray_intersect_aabb(ray, inst->world_bounds, &inst_enter, &inst_exit))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
uint32_t model_id = inst->model_id;
|
||||
if (model_id >= scene->mesh_models.capacity)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
const mesh_model_t* model = &scene->mesh_models.buffer[model_id];
|
||||
if (!model->active || model->blas.nodes == NULL || model->blas.node_count == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
vec3s local_origin = mat4_mul_point(inst->world_to_local, ray->origin);
|
||||
vec3s local_dir = mat4_mul_dir(inst->world_to_local, ray->direction);
|
||||
ray_t local_ray = ray_create(local_origin, local_dir, ray->width, ray->spread_angle);
|
||||
|
||||
hit_result_t local_hit = (hit_result_t){0};
|
||||
ray_intersect_bvh_any(&local_ray, model->blas.nodes, model->blas.primitive_indices, &model->triangles, 0, &local_hit);
|
||||
if (!local_hit.hit)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
vec3s world_point = mat4_mul_point(inst->local_to_world, local_hit.point);
|
||||
float world_distance = glms_vec3_dot(glms_vec3_sub(world_point, ray->origin), ray->direction);
|
||||
if (world_distance <= ray->esp)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
result = local_hit;
|
||||
result.hit = true;
|
||||
result.point = world_point;
|
||||
result.distance = world_distance;
|
||||
result.model_id = model_id;
|
||||
result.instance_id = (uint32_t)instance_id;
|
||||
result.material_id = model->triangles.buffer[local_hit.triangle_id].material_id;
|
||||
return result;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (stack_size < stack_capacity) stack[stack_size++] = node->left_child_offset;
|
||||
if (stack_size < stack_capacity) stack[stack_size++] = node->right_child_offset;
|
||||
}
|
||||
|
||||
if (result.hit)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
if (scene->bvh_tree.nodes == NULL || scene->triangles.count == 0 || scene->bvh_tree.node_count == 0 || scene->bvh_tree.primitive_count == 0)
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
ray_intersect_bvh_any(ray, scene->bvh_tree.nodes, scene->bvh_tree.primitive_indices, &scene->triangles, 0, &result);
|
||||
if (result.hit)
|
||||
{
|
||||
result.material_id = scene->triangles.buffer[result.triangle_id].material_id;
|
||||
result.model_id = UINT32_MAX;
|
||||
result.instance_id = UINT32_MAX;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
173
source/Algorithm/TLAS.c
Normal file
173
source/Algorithm/TLAS.c
Normal file
@@ -0,0 +1,173 @@
|
||||
#include "Algorithm/TLAS.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
static inline aabb_t compute_bounds_range(const uint64_t* primitive_indices, uint64_t start, uint64_t count, const aabb_t* all_bounds)
|
||||
{
|
||||
if (count == 0)
|
||||
{
|
||||
return invalid_aabb();
|
||||
}
|
||||
|
||||
aabb_t bounds = all_bounds[primitive_indices[start]];
|
||||
for (uint64_t i = start + 1; i < start + count; ++i)
|
||||
{
|
||||
bounds = aabb_union(bounds, all_bounds[primitive_indices[i]]);
|
||||
}
|
||||
|
||||
return bounds;
|
||||
}
|
||||
|
||||
static inline vec3s bounds_centroid(aabb_t b)
|
||||
{
|
||||
return glms_vec3_scale(glms_vec3_add(b.min, b.max), 0.5f);
|
||||
}
|
||||
|
||||
static uint64_t build_tlas_node(bvh_node_t* nodes,
|
||||
uint64_t* next_node_index,
|
||||
uint64_t* primitive_indices,
|
||||
const aabb_t* all_bounds,
|
||||
uint64_t prim_start,
|
||||
uint64_t prim_count)
|
||||
{
|
||||
uint64_t node_index = (*next_node_index)++;
|
||||
bvh_node_t* node = &nodes[node_index];
|
||||
|
||||
node->start_index = prim_start;
|
||||
node->primitive_count = 0;
|
||||
node->bounds = compute_bounds_range(primitive_indices, prim_start, prim_count, all_bounds);
|
||||
|
||||
const uint32_t LEAF_THRESHOLD = 4;
|
||||
if (prim_count <= LEAF_THRESHOLD)
|
||||
{
|
||||
node->primitive_count = prim_count;
|
||||
return node_index;
|
||||
}
|
||||
|
||||
// Choose split axis based on centroid bounds extent.
|
||||
aabb_t centroid_bounds = invalid_aabb();
|
||||
for (uint64_t i = prim_start; i < prim_start + prim_count; ++i)
|
||||
{
|
||||
aabb_t b = all_bounds[primitive_indices[i]];
|
||||
vec3s c = bounds_centroid(b);
|
||||
aabb_growth(¢roid_bounds, c);
|
||||
}
|
||||
|
||||
vec3s extent = glms_vec3_sub(centroid_bounds.max, centroid_bounds.min);
|
||||
int axis = 0;
|
||||
if (extent.y > extent.x) axis = 1;
|
||||
if (extent.z > extent.raw[axis]) axis = 2;
|
||||
|
||||
float mid = (centroid_bounds.min.raw[axis] + centroid_bounds.max.raw[axis]) * 0.5f;
|
||||
|
||||
// Partition by centroid along axis.
|
||||
uint64_t i = prim_start;
|
||||
for (uint64_t j = prim_start; j < prim_start + prim_count; ++j)
|
||||
{
|
||||
aabb_t b = all_bounds[primitive_indices[j]];
|
||||
vec3s c = bounds_centroid(b);
|
||||
if (c.raw[axis] < mid)
|
||||
{
|
||||
uint64_t tmp = primitive_indices[i];
|
||||
primitive_indices[i] = primitive_indices[j];
|
||||
primitive_indices[j] = tmp;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t left_count = i - prim_start;
|
||||
uint64_t right_count = prim_count - left_count;
|
||||
|
||||
// Fallback to median if partition failed.
|
||||
if (left_count == 0 || right_count == 0)
|
||||
{
|
||||
uint64_t median = prim_start + prim_count / 2;
|
||||
left_count = median - prim_start;
|
||||
right_count = prim_count - left_count;
|
||||
i = median;
|
||||
|
||||
if (left_count == 0 || right_count == 0)
|
||||
{
|
||||
node->primitive_count = prim_count;
|
||||
return node_index;
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t left_child = build_tlas_node(nodes, next_node_index, primitive_indices, all_bounds, prim_start, left_count);
|
||||
uint64_t right_child = build_tlas_node(nodes, next_node_index, primitive_indices, all_bounds, i, right_count);
|
||||
|
||||
node->left_child_offset = left_child;
|
||||
node->right_child_offset = right_child;
|
||||
|
||||
return node_index;
|
||||
}
|
||||
|
||||
bool tlas_tree_build(tlas_tree_t* tlas, const uint64_t* instance_indices, uint64_t instance_count, const aabb_t* all_instance_bounds)
|
||||
{
|
||||
if (tlas == NULL)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
tlas_tree_free(tlas);
|
||||
|
||||
if (instance_count == 0 || instance_indices == NULL || all_instance_bounds == NULL)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
tlas->primitive_count = instance_count;
|
||||
tlas->instance_bounds = all_instance_bounds;
|
||||
|
||||
tlas->primitive_indices = (uint64_t*)malloc(sizeof(uint64_t) * instance_count);
|
||||
if (tlas->primitive_indices == NULL)
|
||||
{
|
||||
tlas_tree_free(tlas);
|
||||
return false;
|
||||
}
|
||||
|
||||
memcpy(tlas->primitive_indices, instance_indices, sizeof(uint64_t) * instance_count);
|
||||
|
||||
tlas->node_capacity = instance_count * 2 - 1;
|
||||
tlas->nodes = (bvh_node_t*)malloc(sizeof(bvh_node_t) * tlas->node_capacity);
|
||||
if (tlas->nodes == NULL)
|
||||
{
|
||||
tlas_tree_free(tlas);
|
||||
return false;
|
||||
}
|
||||
|
||||
uint64_t next_node = 0;
|
||||
(void)build_tlas_node(tlas->nodes, &next_node, tlas->primitive_indices, all_instance_bounds, 0, instance_count);
|
||||
tlas->node_count = next_node;
|
||||
|
||||
if (tlas->node_count < tlas->node_capacity)
|
||||
{
|
||||
bvh_node_t* resized = (bvh_node_t*)realloc(tlas->nodes, sizeof(bvh_node_t) * tlas->node_count);
|
||||
if (resized != NULL)
|
||||
{
|
||||
tlas->nodes = resized;
|
||||
tlas->node_capacity = tlas->node_count;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void tlas_tree_free(tlas_tree_t* tlas)
|
||||
{
|
||||
if (tlas == NULL)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
free(tlas->nodes);
|
||||
free(tlas->primitive_indices);
|
||||
|
||||
tlas->nodes = NULL;
|
||||
tlas->primitive_indices = NULL;
|
||||
tlas->node_count = 0;
|
||||
tlas->node_capacity = 0;
|
||||
tlas->primitive_count = 0;
|
||||
tlas->instance_bounds = NULL;
|
||||
}
|
||||
Reference in New Issue
Block a user