features/modernize #1

Merged
Misaki merged 5 commits from features/modernize into main 2026-02-22 03:05:52 +00:00
5 changed files with 228 additions and 86 deletions
Showing only changes of commit 5c988108ef - Show all commits

View File

@@ -30,6 +30,10 @@ typedef struct
rendering_mode_t rendering_mode;
aov_flags_t aov_flags;
bool is_done;
// Progressive rendering state (0-based count of completed samples per pixel).
// In tile-based mode this remains 0.
uint32_t progressive_sample_index;
} render_job_t;
bool renderer_aov_target_init(render_job_t* job, aov_flags_t aov_flags);

View File

@@ -131,7 +131,8 @@ void ggx_ms_init_lut_once(void)
valid++;
}
float E = (valid > 0u) ? (sum / (float)valid) : 0.0f;
// float E = (valid > 0u) ? (sum / (float)valid) : 0.0f;
float E = sum / (float)GGX_MS_LUT_SAMPLES;
E = saturatef(E);
g_ggx_E_lut[ry][ix] = E;

View File

@@ -27,12 +27,12 @@ static float oren_nayar_eval(vec3s l, vec3s v, vec3s n, float roughness, float n
if (n_dot_v > n_dot_l)
{
sin_alpha = sin_theta_l;
tan_beta = sin_theta_v / fmaxf(n_dot_v, 0.0001f);
tan_beta = sin_theta_v / fmaxf(n_dot_v, FLT_EPSILON);
}
else
{
sin_alpha = sin_theta_v;
tan_beta = sin_theta_l / fmaxf(n_dot_l, 0.0001f);
tan_beta = sin_theta_l / fmaxf(n_dot_l, FLT_EPSILON);
}
return (A + B * cos_phi_diff * sin_alpha * tan_beta) * INV_PI;
@@ -117,19 +117,20 @@ static vec3s evaluate_bsdf_standard_lit(const shading_context_t* context, standa
// Specular (GGX)
float D = ggx_distribution(n_dot_h, surface_data->roughness);
float G = ggx_g_smith(n_dot_v, n_dot_l, surface_data->roughness);
vec3s spec = glms_vec3_scale(glms_vec3_mul(F, (vec3s){D * G, D * G, D * G}), 1.0f / fmaxf(4.0f * n_dot_v * n_dot_l, 0.0001f));
vec3s spec = glms_vec3_scale(glms_vec3_mul(F, (vec3s){D * G, D * G, D * G}),
1.0f / fmaxf(4.0f * n_dot_v * n_dot_l, FLT_EPSILON));
// Multi-scatter GGX (broad lobe)
vec3s ms = ggx_multi_scatter_lambert(f0, n_dot_v, n_dot_l, surface_data->roughness);
// Diffuse (Oren-Nayar)
// Using (1 - F) here can make rough dielectrics look too dark because our specular is single-scatter GGX
// (missing multi-scattering energy compensation). A stable approximation is (1 - F0_diel).
float kd_scale = (1.0f - surface_data->metallic) * (1.0f - DIELECTRIC_REFLECTIVE_F0);
vec3s kD = glms_vec3_scale(glms_vec3_one(), kd_scale);
vec3s kD = glms_vec3_sub(glms_vec3_one(), F);
kD = glms_vec3_scale(kD, (1.0f - surface_data->metallic));
float on_val = oren_nayar_eval(l, v, n, surface_data->diffuse_roughness, n_dot_l, n_dot_v);
vec3s diff = glms_vec3_scale(glms_vec3_mul(surface_data->albedo, kD), on_val);
// return (vec3s){n_dot_h, n_dot_h, n_dot_h};
return glms_vec3_add(glms_vec3_add(diff, spec), ms);
}
@@ -144,7 +145,7 @@ static float sample_bsdf_pdf(const standard_lit_surface_data_t* surface_data, ve
// Lobe probabilities (single-scatter spec vs cosine)
// We allocate some cosine probability for multi-scatter spec, especially for rough metals.
vec3s f0 = glms_vec3_lerp(DIELECTRIC_F0, surface_data->albedo, surface_data->metallic);
float n_dot_v = fmaxf(glms_vec3_dot(surface_data->normal, V), 0.0001f);
float n_dot_v = fmaxf(glms_vec3_dot(surface_data->normal, V), 0.0f);
vec3s F_est = fresnel_schlick_vec3(f0, n_dot_v);
float spec_strength = luminance(F_est);
@@ -159,6 +160,7 @@ static float sample_bsdf_pdf(const standard_lit_surface_data_t* surface_data, ve
{
return 0.0f;
}
w_ss /= sum_w;
w_cos /= sum_w;
@@ -168,16 +170,17 @@ static float sample_bsdf_pdf(const standard_lit_surface_data_t* surface_data, ve
// p_w(wi) = p_h(h) / (4 * (V·H))
vec3s H = glms_vec3_normalize(glms_vec3_add(L, V));
float v_dot_h = glms_vec3_dot(V, H);
if (v_dot_h <= 1e-6f)
if (v_dot_h <= FLT_EPSILON)
{
return 0.0f;
}
float n_dot_h = fmaxf(glms_vec3_dot(surface_data->normal, H), 0.0f);
float D = ggx_distribution(n_dot_h, surface_data->roughness);
float G1v = ggx_g1(n_dot_v, surface_data->roughness);
float pdf_h = (D * G1v * n_dot_h) / fmaxf(n_dot_v, 1e-6f);
float pdf_spec = pdf_h / (4.0f * fmaxf(v_dot_h, 1e-6f));
float pdf_h = (D * G1v * v_dot_h) / fmaxf(n_dot_v, FLT_EPSILON);
float pdf_spec = pdf_h / (4.0f * fmaxf(v_dot_h, FLT_EPSILON));
// Cosine PDF (used for diffuse + multi-scatter)
float pdf_cos = n_dot_l * INV_PI;
@@ -210,7 +213,7 @@ path_output standard_lit_render_loop(const standard_lit_properties_t* properties
surface_data.normal = context->normal;
}
float n_dot_v = fmaxf(glms_vec3_dot(surface_data.normal, V), 0.0001f);
float n_dot_v = fmaxf(glms_vec3_dot(surface_data.normal, V), 0.0f);
// Ensure LUT is ready (thread-safe, one-time).
ggx_ms_init_lut_once();
@@ -284,7 +287,7 @@ path_output standard_lit_render_loop(const standard_lit_properties_t* properties
vec3s f_eval = glms_vec3_zero();
float n_dot_l = 0.0f;
float r_lobe = sobol_sample_scrambled(context->sample_index, sobol_get_dimension(context->bounce_depth, PRNG_BSDF), scramble);
float r_lobe = sobol_sample(context->sample_index, sobol_get_dimension(context->bounce_depth, PRNG_BSDF));
bool is_specular = (r_lobe < w_ss);
if (is_specular)
@@ -297,7 +300,7 @@ path_output standard_lit_render_loop(const standard_lit_properties_t* properties
float u2 = sobol_sample_scrambled(context->sample_index, d2, scramble);
vec3s H = ggx_sample_vndf(surface_data.normal, V, surface_data.roughness, u1, u2);
output.wi = glms_vec3_reflect(context->wo, H); // reflect(-V, H) -> V is wo inverted
output.wi = glms_vec3_reflect(context->wo, H);
if (glms_vec3_dot(output.wi, surface_data.normal) <= 0.0f || glms_vec3_dot(output.wi, context->normal) <= 0.0f)
{
@@ -305,62 +308,35 @@ path_output standard_lit_render_loop(const standard_lit_properties_t* properties
return output;
}
// Recalculate dots
n_dot_l = fmaxf(glms_vec3_dot(surface_data.normal, output.wi), 0.0001f);
vec3s H_new = glms_vec3_normalize(glms_vec3_add(output.wi, V));
float n_dot_h = fmaxf(glms_vec3_dot(surface_data.normal, H_new), 0.0001f);
float v_dot_h = fmaxf(glms_vec3_dot(V, H_new), 0.0001f);
n_dot_l = fmaxf(glms_vec3_dot(surface_data.normal, output.wi), FLT_EPSILON);
// Evaluate BSDF
float D = ggx_distribution(n_dot_h, surface_data.roughness);
float G1v = ggx_g1(n_dot_v, surface_data.roughness);
float G = ggx_g_smith(n_dot_v, n_dot_l, surface_data.roughness);
vec3s f0 = glms_vec3_lerp(DIELECTRIC_F0, surface_data.albedo, surface_data.metallic);
vec3s F = fresnel_schlick_vec3(f0, v_dot_h);
vec3s spec_f = glms_vec3_scale(glms_vec3_mul(F, (vec3s){D * G, D * G, D * G}),
1.0f / fmaxf(4.0f * n_dot_v * n_dot_l, 1e-6f));
f_eval = spec_f;
// Propagate spread angle for ray cones
// Heuristic: spread increases with roughness
// Spread angle heuristic
output.spread_angle = context->spread_angle + surface_data.roughness * QUARTER_PI;
}
else
{
// Sample Cosine hemisphere (Diffuse + Multi-scatter spec)
// Note: We use cosine sampling for Oren-Nayar and the broad MS term.
uint32_t d1 = sobol_get_dimension(context->bounce_depth, PRNG_BSDF_U);
uint32_t d2 = sobol_get_dimension(context->bounce_depth, PRNG_BSDF_V);
output.wi = random_cosine_direction(surface_data.normal, context->sample_index, d1, d2, scramble);
n_dot_l = fmaxf(glms_vec3_dot(surface_data.normal, output.wi), 0.0001f);
if (glms_vec3_dot(output.wi, context->normal) <= 0.0f)
{
output.state = PS_TERMINATE;
return output;
}
float kd_scale = (1.0f - surface_data.metallic) * (1.0f - DIELECTRIC_REFLECTIVE_F0);
vec3s kD = glms_vec3_scale(glms_vec3_one(), kd_scale);
float on = oren_nayar_eval(output.wi, V, surface_data.normal, surface_data.diffuse_roughness, n_dot_l, n_dot_v);
n_dot_l = fmaxf(glms_vec3_dot(surface_data.normal, output.wi), FLT_EPSILON);
// Diffuse bounce significantly increases spread (effectively resets or becomes very wide)
// Diffuse bounce spread
output.spread_angle = context->spread_angle + 0.5f;
vec3s diff_f = glms_vec3_scale(glms_vec3_mul(surface_data.albedo, kD), on);
// Multi-scatter GGX term (broad): sampled here with cosine.
vec3s ms_f = ggx_multi_scatter_lambert(f0, n_dot_v, n_dot_l, surface_data.roughness);
// Throughput multiplier: (f * NoL) / pdf
vec3s f_sum = glms_vec3_add(diff_f, ms_f);
f_eval = f_sum;
}
// IMPORTANT: evaluate FULL BSDF for the sampled direction (unbiased for mixture sampling)
f_eval = evaluate_bsdf_standard_lit(context, &surface_data, output.wi);
output.pdf = sample_bsdf_pdf(&surface_data, V, output.wi);
if (output.pdf < 1e-12f)
if (output.pdf < FLT_EPSILON)
{
output.state = PS_TERMINATE;
return output;

View File

@@ -1,6 +1,8 @@
#include "Rendering/Renderer.h"
#include "Algorithm/PathTracing.h"
#include <string.h>
static inline void create_target_if_required(aov_flags_t aov_flags, aov_flags_t target_flag, render_target_t** render_target, uint32_t width, uint32_t height)
{
render_target_t* temp = NULL;
@@ -65,6 +67,47 @@ static inline bool aov_needs_lighting_samples(aov_flags_t flags)
return has_flag(flags, AOV_BEAUTY) || has_flag(flags, AOV_DIRECT) || has_flag(flags, AOV_INDIRECT);
}
static inline vec4s running_average_vec4(vec4s prev_avg, vec4s sample, uint32_t prev_count)
{
float n = (float)prev_count;
float inv = 1.0f / (n + 1.0f);
vec4s out;
out.x = (prev_avg.x * n + sample.x) * inv;
out.y = (prev_avg.y * n + sample.y) * inv;
out.z = (prev_avg.z * n + sample.z) * inv;
out.w = 1.0f;
return out;
}
static inline void clear_render_target(render_target_t* target)
{
if (target == NULL || target->buffer == NULL)
{
return;
}
size_t pixel_count = (size_t)target->width * target->height;
memset(target->buffer, 0, pixel_count * sizeof(vec4s));
for (size_t i = 0; i < pixel_count; ++i)
{
target->buffer[i].w = 1.0f;
}
}
static inline void clear_aov_targets(render_job_t* job)
{
if (job == NULL || job->aov_target == NULL)
{
return;
}
for (uint8_t i = 0; i < MAX_AOV_TARGET; ++i)
{
clear_render_target(job->aov_target[i]);
}
}
static void render_pixel(const rendering_config_t* config, scene_t* scene, vec3s coord, uint32_t x, uint32_t y, aov_flags_t aov_flags, aov_output_t* pixel_output)
{
aov_output_t accumulated_color = {0};
@@ -83,8 +126,8 @@ static void render_pixel(const rendering_config_t* config, scene_t* scene, vec3s
uint32_t pos_hash = hash_uint32(pixel_id);
// Apply AA
float du = sobol_sample_scrambled(sobol_idx, PRNG_LENS_U, pos_hash);
float dv = sobol_sample_scrambled(sobol_idx, PRNG_LENS_V, pos_hash);
float du = sobol_sample_scrambled(sobol_idx, PRNG_FILTER_U, pos_hash);
float dv = sobol_sample_scrambled(sobol_idx, PRNG_FILTER_V, pos_hash);
vec2s position_ndc = compute_ndc((float)x + du, (float)y + dv, config->width, config->height);
float screen_x = position_ndc.x * 2.0f - 1.0f;
@@ -111,6 +154,48 @@ static void render_pixel(const rendering_config_t* config, scene_t* scene, vec3s
*pixel_output = accumulated_color;
}
static void render_pixel_one_sample(const rendering_config_t* config,
scene_t* scene,
vec3s coord,
uint32_t x,
uint32_t y,
uint32_t sample_index,
aov_flags_t aov_flags,
aov_output_t* pixel_output)
{
uint32_t pixel_id = y * config->width + x;
uint32_t sobol_idx = pixel_id * config->sample_count + (sample_index + 1);
uint32_t pos_hash = hash_uint32(pixel_id);
vec3s camera_right = quat_get_right(scene->camera.rotation);
vec3s camera_up = quat_get_up(scene->camera.rotation);
float du = sobol_sample_scrambled(sobol_idx, PRNG_FILTER_U, pos_hash);
float dv = sobol_sample_scrambled(sobol_idx, PRNG_FILTER_V, pos_hash);
vec2s position_ndc = compute_ndc((float)x + du, (float)y + dv, config->width, config->height);
float screen_x = position_ndc.x * 2.0f - 1.0f;
float screen_y = position_ndc.y * 2.0f - 1.0f;
float sensor_offset_x = screen_x * scene->camera.size_x * 0.5f;
float sensor_offset_y = screen_y * scene->camera.size_y * 0.5f;
vec3s image_plane_point = coord;
image_plane_point = glms_vec3_add(image_plane_point, glms_vec3_scale(camera_right, sensor_offset_x));
image_plane_point = glms_vec3_add(image_plane_point, glms_vec3_scale(camera_up, sensor_offset_y));
float pixel_height = scene->camera.size_y / (float)config->height;
float spread_angle = atanf(pixel_height / scene->camera.focal_length);
ray_t ray = ray_create(scene->camera.position,
glms_vec3_normalize(glms_vec3_sub(image_plane_point, scene->camera.position)),
0.0f,
spread_angle);
aov_output_t out = {0};
path_trace_aov(scene, ray, sobol_idx, config->max_depth, aov_flags, &out);
*pixel_output = out;
}
static inline void update_aov_pixel_if_exist(render_target_t** target, vec4s color, uint32_t x, uint32_t y)
{
if (*target == NULL || (*target)->buffer == NULL)
@@ -133,17 +218,91 @@ static inline void update_aov(render_target_t** target, const aov_output_t* aov,
update_aov_pixel_if_exist(&target[AOV_INDIRECT_INDEX], aov->indirect, x, y);
}
// TODO: Progressive rendering
void renderer_start(render_job_t* job)
{
ensure_camera_aspect_ratio(&job->scene->camera, job->config);
// Reset progressive state whenever we (re)start.
job->progressive_sample_index = 0;
vec3s coord = glms_vec3_add(job->scene->camera.position, glms_vec3_scale(quat_get_forward(job->scene->camera.rotation), job->scene->camera.focal_length));
if (job->rendering_mode == RENDER_PROGRESSIVE)
{
// Progressive mode: accumulate 1 spp per pass until sample_count or stop requested.
clear_aov_targets(job);
job->is_done = false;
uint32_t width = job->config->width;
uint32_t height = job->config->height;
for (uint32_t s = 0; s < job->config->sample_count; ++s)
{
if (job->is_done)
{
break;
}
int64_t x, y;
#pragma omp parallel for schedule(dynamic, 1) default(none) \
shared(job, coord, width, height, s) \
private(x, y)
for (y = 0; y < (int64_t)height; ++y)
{
for (x = 0; x < (int64_t)width; ++x)
{
if (job->is_done)
{
continue;
}
aov_output_t pixel = {0};
render_pixel_one_sample(job->config, job->scene, coord, (uint32_t)x, (uint32_t)y, s, job->aov_flags, &pixel);
// Accumulate lighting AOVs; write non-stochastic AOVs once.
if (has_flag(job->aov_flags, AOV_BEAUTY))
{
vec4s prev = render_target_get_pixel(job->aov_target[AOV_BEAUTY_INDEX], (uint32_t)x, (uint32_t)y);
vec4s avg = running_average_vec4(prev, pixel.beauty, s);
render_target_set_pixel(job->aov_target[AOV_BEAUTY_INDEX], (uint32_t)x, (uint32_t)y, avg);
}
if (s == 0)
{
update_aov_pixel_if_exist(&job->aov_target[AOV_AlBEDO_INDEX], pixel.albedo, (uint32_t)x, (uint32_t)y);
update_aov_pixel_if_exist(&job->aov_target[AOV_NORMAL_INDEX], pixel.normal, (uint32_t)x, (uint32_t)y);
update_aov_pixel_if_exist(&job->aov_target[AOV_DEPTH_INDEX], (vec4s){pixel.depth, pixel.depth, pixel.depth, 1.0f}, (uint32_t)x, (uint32_t)y);
update_aov_pixel_if_exist(&job->aov_target[AOV_POSITION_INDEX], pixel.position, (uint32_t)x, (uint32_t)y);
}
if (has_flag(job->aov_flags, AOV_DIRECT))
{
vec4s prev = render_target_get_pixel(job->aov_target[AOV_DIRECT_INDEX], (uint32_t)x, (uint32_t)y);
vec4s avg = running_average_vec4(prev, pixel.direct, s);
render_target_set_pixel(job->aov_target[AOV_DIRECT_INDEX], (uint32_t)x, (uint32_t)y, avg);
}
if (has_flag(job->aov_flags, AOV_INDIRECT))
{
vec4s prev = render_target_get_pixel(job->aov_target[AOV_INDIRECT_INDEX], (uint32_t)x, (uint32_t)y);
vec4s avg = running_average_vec4(prev, pixel.indirect, s);
render_target_set_pixel(job->aov_target[AOV_INDIRECT_INDEX], (uint32_t)x, (uint32_t)y, avg);
}
}
}
job->progressive_sample_index = s + 1;
}
job->is_done = true;
return;
}
else
{
uint32_t tile_count_x = (job->config->width + job->config->bucket_size - 1) / job->config->bucket_size;
uint32_t tile_count_y = (job->config->height + job->config->bucket_size - 1) / job->config->bucket_size;
uint32_t tile_count = tile_count_x * tile_count_y;
vec3s coord = glms_vec3_add(job->scene->camera.position, glms_vec3_scale(quat_get_forward(job->scene->camera.rotation), job->scene->camera.focal_length));
int64_t x, y, tile_index; // OpenMP requires these to be declared outside the parallel region.
#pragma omp parallel for schedule(dynamic, 1) default(none) \
shared(tile_count_x, tile_count_y, tile_count, coord, job) \
@@ -172,11 +331,12 @@ void renderer_start(render_job_t* job)
tile_done:;
}
// TODO: A-Trous denoising
job->is_done = true;
}
// TODO: A-Trous denoising
}
void render_job_free(render_job_t* job)
{
if (job == NULL || job->aov_target == NULL)

View File

@@ -15,7 +15,7 @@
#define TITLE "Path Tracing"
#define SCENE_PATH "./assets/sponza.fbx"
#define HDRI_PATH "./assets/hdri/golden_gate_hills_1k.hdr"
#define HDRI_PATH "C:/Users/Misaki/Downloads/shanghai_bund_1k.hdr"
static bool scene_setup(scene_t* scene)
{
@@ -24,8 +24,13 @@ static bool scene_setup(scene_t* scene)
return false;
}
#if 0
scene->camera.position = (vec3s){-7.5f, 2.5f, 0.0f};
scene->camera.rotation = euler_to_quat(10.0f, -90.0f, 0.0f);
#else
scene->camera.position = (vec3s){0.0f, 0.0f, 5.0f};
scene->camera.rotation = glms_quat_identity();
#endif
// TODO: Standardize light unit
light_entity_t sun = light_create_directional_light(&scene->lights);
@@ -81,12 +86,8 @@ static bool load_assets(scene_t* scene)
},
&scene->materials);
// quad_create((vec3s){0.0f, -0.5f, 0.0f}, (vec3s){0.0f, 1.0f, 0.0f}, (vec3s){1.0f, 0.0f, 0.0f}, 10.0f, floor_material.id, &scene->triangles);
quad_create((vec3s){0.0f, -0.5f, 0.0f}, (vec3s){0.0f, 1.0f, 0.0f}, (vec3s){1.0f, 0.0f, 0.0f}, 10.0f, floor_material.id, &scene->triangles);
quad_create((vec3s){0.0f, 0.0f, 0.0f}, (vec3s){0.0f, 0.0f, 1.0f}, (vec3s){0.0f, 1.0f, 0.0f}, 1.0f, floor_material.id, &scene->triangles);
// vertex_t v0 = {.position = (vec3s){-1.0f, -1.0f, 0.0f}, .normal = (vec3s){0.0f, 0.0f, 1.0f}, .tangent = (vec3s){1.0f, 0.0f, 0.0f}, .uv = (vec2s){0.0f, 0.0f}};
// vertex_t v1 = {.position = (vec3s){1.0f, -1.0f, 0.0f}, .normal = (vec3s){0.0f, 0.0f, 1.0f}, .tangent = (vec3s){1.0f, 0.0f, 0.0f}, .uv = (vec2s){1.0f, 0.0f}};
// vertex_t v2 = {.position = (vec3s){-1.0f, 1.0f, 0.0f}, .normal = (vec3s){0.0f, 0.0f, 1.0f}, .tangent = (vec3s){1.0f, 0.0f, 0.0f}, .uv = (vec2s){1.0f, 1.0f}};
// triangle_create(v0, v1, v2, quad_material.id, &scene->triangles);
#endif
return scene_build_bvh(scene);
@@ -110,7 +111,7 @@ static bool initialize_renderer(const rendering_config_t* config, aov_flags_t ao
.scene = outScene,
.config = config,
.rendering_mode = RENDER_TILE_BASED,
.rendering_mode = RENDER_PROGRESSIVE,
.aov_flags = aov_flags,
.is_done = false,
};
@@ -206,7 +207,7 @@ int WINAPI wWinMain(_In_ HINSTANCE hInstance, _In_opt_ HINSTANCE hPrevInstance,
rendering_config_t config = {
.width = 1920 / 2,
.height = 1080 / 2,
.sample_count = 16 * 1,
.sample_count = 16 * 4,
.max_depth = 4,
.bucket_size = 64,
};