#include "Rendering/Renderer.h" #include "Algorithm/PathTracing.h" #include static inline void create_target_if_required(aov_flags_t aov_flags, aov_flags_t target_flag, render_target_t** render_target, uint32_t width, uint32_t height) { render_target_t* temp = NULL; if (has_flag(aov_flags, target_flag)) { temp = (render_target_t*)malloc(sizeof(render_target_t)); if (temp == NULL) { return; } render_target_init(width, height, temp); } *render_target = temp; } bool renderer_aov_target_init(render_job_t* job) { job->aov_target = (render_target_t**)malloc(sizeof(render_target_t*) * MAX_AOV_TARGET); if (job->aov_target == NULL) { return false; } create_target_if_required(job->config.aov_flags, AOV_BEAUTY, &job->aov_target[AOV_BEAUTY_INDEX], job->config.width, job->config.height); create_target_if_required(job->config.aov_flags, AOV_AlBEDO, &job->aov_target[AOV_AlBEDO_INDEX], job->config.width, job->config.height); create_target_if_required(job->config.aov_flags, AOV_NORMAL, &job->aov_target[AOV_NORMAL_INDEX], job->config.width, job->config.height); create_target_if_required(job->config.aov_flags, AOV_DEPTH, &job->aov_target[AOV_DEPTH_INDEX], job->config.width, job->config.height); create_target_if_required(job->config.aov_flags, AOV_POSITION, &job->aov_target[AOV_POSITION_INDEX], job->config.width, job->config.height); create_target_if_required(job->config.aov_flags, AOV_DIRECT, &job->aov_target[AOV_DIRECT_INDEX], job->config.width, job->config.height); create_target_if_required(job->config.aov_flags, AOV_INDIRECT, &job->aov_target[AOV_INDIRECT_INDEX], job->config.width, job->config.height); return true; } static inline void ensure_camera_aspect_ratio(camera_t* camera, const rendering_config_t* config) { float aspect_ratio = (float)config->width / config->height; if (fabsf((float)config->width / config->height - camera->size_x / camera->size_y) > 0.001f) { camera->size_y = camera->size_x / aspect_ratio; camera->fov_y = 2.0f * (float)atan(camera->size_x / (2.0f * camera->focal_length * aspect_ratio)); } } static inline vec2s compute_ndc(float x, float y, uint32_t width, uint32_t height) { return (vec2s){ .x = x / (float)width, #ifdef FLIP_Y .y = 1.0f - y / (float)height #else .y = y / (float)height #endif }; } static inline bool aov_needs_lighting_samples(aov_flags_t flags) { return has_flag(flags, AOV_BEAUTY) || has_flag(flags, AOV_DIRECT) || has_flag(flags, AOV_INDIRECT); } static inline uint32_t get_minimal_sample_count(render_job_t* job) { if (aov_needs_lighting_samples(job->config.aov_flags)) { if (job->config.aov_flags == AOV_DIRECT) { return 1; } return job->config.sample_count; } else { return 1; } } static inline vec4s running_average_vec4(vec4s prev_avg, vec4s sample, uint32_t prev_count) { float n = (float)prev_count; float inv = 1.0f / (n + 1.0f); return (vec4s){ .x = (prev_avg.x * n + sample.x) * inv, .y = (prev_avg.y * n + sample.y) * inv, .z = (prev_avg.z * n + sample.z) * inv, .w = 1.0f }; } static inline void clear_render_target(render_target_t* target) { if (target == NULL || target->buffer == NULL) { return; } size_t pixel_count = (size_t)target->width * target->height; memset(target->buffer, 0, pixel_count * sizeof(vec4s)); for (size_t i = 0; i < pixel_count; ++i) { target->buffer[i].w = 1.0f; } } static inline void clear_aov_targets(render_job_t* job) { if (job == NULL || job->aov_target == NULL) { return; } for (uint8_t i = 0; i < MAX_AOV_TARGET; ++i) { clear_render_target(job->aov_target[i]); } } static void render_pixel(const rendering_config_t* config, const scene_t* scene, vec3s coord, uint32_t x, uint32_t y, aov_flags_t aov_flags, aov_output_t* pixel_output) { aov_output_t accumulated_color = {0}; uint32_t pixel_id = y * config->width + x; uint16_t sample_count = aov_needs_lighting_samples(aov_flags) ? (uint16_t)config->sample_count : 1; float inv_sample = 1.0f / (float)sample_count; vec3s camera_right = quat_get_right(scene->camera.rotation); vec3s camera_up = quat_get_up(scene->camera.rotation); for (uint16_t k = 0; k < sample_count; k++) { // TODO: Hash it uint32_t sobol_idx = pixel_id * (uint32_t)sample_count + (k + 1); uint32_t pos_hash = hash_uint32(pixel_id); // Apply AA float du = sobol_sample_scrambled(sobol_idx, PRNG_FILTER_U, pos_hash); float dv = sobol_sample_scrambled(sobol_idx, PRNG_FILTER_V, pos_hash); vec2s position_ndc = compute_ndc((float)x + du, (float)y + dv, config->width, config->height); float screen_x = position_ndc.x * 2.0f - 1.0f; float screen_y = position_ndc.y * 2.0f - 1.0f; float sensor_offset_x = screen_x * scene->camera.size_x * 0.5f; float sensor_offset_y = screen_y * scene->camera.size_y * 0.5f; vec3s image_plane_point = coord; image_plane_point = glms_vec3_add(image_plane_point, glms_vec3_scale(camera_right, sensor_offset_x)); image_plane_point = glms_vec3_add(image_plane_point, glms_vec3_scale(camera_up, sensor_offset_y)); // Calculate initial spread angle for ray differentials float pixel_height = scene->camera.size_y / (float)config->height; float spread_angle = atanf(pixel_height / scene->camera.focal_length); ray_t ray = ray_create(scene->camera.position, glms_vec3_normalize(glms_vec3_sub(image_plane_point, scene->camera.position)), 0.0f, spread_angle); aov_output_t aov_output = {0}; path_trace_aov(scene, ray, sobol_idx, config->max_depth, aov_flags, &aov_output); accumulate_aov(&accumulated_color, &aov_output, inv_sample); } *pixel_output = accumulated_color; } static void render_pixel_one_sample(const rendering_config_t* config, const scene_t* scene, vec3s coord, uint32_t x, uint32_t y, uint32_t sample_index, aov_output_t* pixel_output) { uint32_t pixel_id = y * config->width + x; uint32_t sobol_idx = pixel_id * config->sample_count + (sample_index + 1); uint32_t pos_hash = hash_uint32(pixel_id); vec3s camera_right = quat_get_right(scene->camera.rotation); vec3s camera_up = quat_get_up(scene->camera.rotation); float du = sobol_sample_scrambled(sobol_idx, PRNG_FILTER_U, pos_hash); float dv = sobol_sample_scrambled(sobol_idx, PRNG_FILTER_V, pos_hash); vec2s position_ndc = compute_ndc((float)x + du, (float)y + dv, config->width, config->height); float screen_x = position_ndc.x * 2.0f - 1.0f; float screen_y = position_ndc.y * 2.0f - 1.0f; float sensor_offset_x = screen_x * scene->camera.size_x * 0.5f; float sensor_offset_y = screen_y * scene->camera.size_y * 0.5f; vec3s image_plane_point = coord; image_plane_point = glms_vec3_add(image_plane_point, glms_vec3_scale(camera_right, sensor_offset_x)); image_plane_point = glms_vec3_add(image_plane_point, glms_vec3_scale(camera_up, sensor_offset_y)); float pixel_height = scene->camera.size_y / (float)config->height; float spread_angle = atanf(pixel_height / scene->camera.focal_length); ray_t ray = ray_create(scene->camera.position, glms_vec3_normalize(glms_vec3_sub(image_plane_point, scene->camera.position)), 0.0f, spread_angle); aov_output_t out = {0}; path_trace_aov(scene, ray, sobol_idx, config->max_depth, config->aov_flags, &out); *pixel_output = out; } static inline void update_aov_pixel_if_exist(render_target_t** target, vec4s color, uint32_t x, uint32_t y) { if (*target == NULL || (*target)->buffer == NULL) { return; } render_target_set_pixel(*target, x, y, color); } static inline void update_aov(render_target_t** target, const aov_output_t* aov, uint32_t x, uint32_t y) { update_aov_pixel_if_exist(&target[AOV_BEAUTY_INDEX], aov->beauty, x, y); update_aov_pixel_if_exist(&target[AOV_AlBEDO_INDEX], aov->albedo, x, y); update_aov_pixel_if_exist(&target[AOV_NORMAL_INDEX], aov->normal, x, y); update_aov_pixel_if_exist(&target[AOV_DEPTH_INDEX], (vec4s){aov->depth, aov->depth, aov->depth, 1.0f}, x, y); update_aov_pixel_if_exist(&target[AOV_POSITION_INDEX], aov->position, x, y); update_aov_pixel_if_exist(&target[AOV_DIRECT_INDEX], aov->direct, x, y); update_aov_pixel_if_exist(&target[AOV_INDIRECT_INDEX], aov->indirect, x, y); } void renderer_start(render_job_t* job) { ensure_camera_aspect_ratio(&job->scene->camera, &job->config); job->config.sample_count = get_minimal_sample_count(job); // Reset progressive state whenever we (re)start. job->progressive_sample_index = 0; vec3s coord = glms_vec3_add(job->scene->camera.position, glms_vec3_scale(quat_get_forward(job->scene->camera.rotation), job->scene->camera.focal_length)); if (job->config.rendering_mode == RENDER_PROGRESSIVE) { // Progressive mode: accumulate 1 spp per pass until sample_count or stop requested. clear_aov_targets(job); job->is_done = false; uint32_t width = job->config.width; uint32_t height = job->config.height; for (uint32_t s = 0; s < job->config.sample_count; ++s) { if (job->is_done) { break; } int64_t x, y; #pragma omp parallel for schedule(dynamic, 1) default(none) \ shared(job, coord, width, height, s) \ private(x, y) for (y = 0; y < (int64_t)height; ++y) { for (x = 0; x < (int64_t)width; ++x) { if (job->is_done) { continue; } aov_output_t pixel = {0}; render_pixel_one_sample(&job->config, job->scene, coord, (uint32_t)x, (uint32_t)y, s, &pixel); // Accumulate lighting AOVs; write non-stochastic AOVs once. if (has_flag(job->config.aov_flags, AOV_BEAUTY)) { vec4s prev = render_target_get_pixel(job->aov_target[AOV_BEAUTY_INDEX], (uint32_t)x, (uint32_t)y); vec4s avg = running_average_vec4(prev, pixel.beauty, s); render_target_set_pixel(job->aov_target[AOV_BEAUTY_INDEX], (uint32_t)x, (uint32_t)y, avg); } if (s == 0) { update_aov_pixel_if_exist(&job->aov_target[AOV_AlBEDO_INDEX], pixel.albedo, (uint32_t)x, (uint32_t)y); update_aov_pixel_if_exist(&job->aov_target[AOV_NORMAL_INDEX], pixel.normal, (uint32_t)x, (uint32_t)y); update_aov_pixel_if_exist(&job->aov_target[AOV_DEPTH_INDEX], (vec4s){pixel.depth, pixel.depth, pixel.depth, 1.0f}, (uint32_t)x, (uint32_t)y); update_aov_pixel_if_exist(&job->aov_target[AOV_POSITION_INDEX], pixel.position, (uint32_t)x, (uint32_t)y); } if (has_flag(job->config.aov_flags, AOV_DIRECT)) { vec4s prev = render_target_get_pixel(job->aov_target[AOV_DIRECT_INDEX], (uint32_t)x, (uint32_t)y); vec4s avg = running_average_vec4(prev, pixel.direct, s); render_target_set_pixel(job->aov_target[AOV_DIRECT_INDEX], (uint32_t)x, (uint32_t)y, avg); } if (has_flag(job->config.aov_flags, AOV_INDIRECT)) { vec4s prev = render_target_get_pixel(job->aov_target[AOV_INDIRECT_INDEX], (uint32_t)x, (uint32_t)y); vec4s avg = running_average_vec4(prev, pixel.indirect, s); render_target_set_pixel(job->aov_target[AOV_INDIRECT_INDEX], (uint32_t)x, (uint32_t)y, avg); } } } job->progressive_sample_index = s + 1; } job->is_done = true; return; } else { uint32_t tile_count_x = (job->config.width + job->config.bucket_size - 1) / job->config.bucket_size; uint32_t tile_count_y = (job->config.height + job->config.bucket_size - 1) / job->config.bucket_size; uint32_t tile_count = tile_count_x * tile_count_y; int64_t x, y, tile_index; // OpenMP requires these to be declared outside the parallel region. #pragma omp parallel for schedule(dynamic, 1) default(none) \ shared(tile_count_x, tile_count_y, tile_count, coord, job) \ private(x, y, tile_index) for (tile_index = 0; tile_index < tile_count; tile_index++) { uint32_t tile_x_0 = (uint32_t)tile_index % tile_count_x * job->config.bucket_size; uint32_t tile_y_0 = (uint32_t)tile_index / tile_count_x * job->config.bucket_size; uint32_t tile_x_1 = (uint32_t)fmin(tile_x_0 + job->config.bucket_size, job->config.width); uint32_t tile_y_1 = (uint32_t)fmin(tile_y_0 + job->config.bucket_size, job->config.height); for (y = tile_y_0; y < tile_y_1; y++) { for (x = tile_x_0; x < tile_x_1; x++) { if (job->is_done) { goto tile_done; } aov_output_t pixel_output = {0}; render_pixel(&job->config, job->scene, coord, (uint32_t)x, (uint32_t)y, job->config.aov_flags, &pixel_output); update_aov(job->aov_target, &pixel_output, (uint32_t)x, (uint32_t)y); } } tile_done:; } job->is_done = true; } // TODO: A-Trous denoising } void render_job_free(render_job_t* job) { if (job == NULL || job->aov_target == NULL) { return; } for (uint8_t i = 0; i < MAX_AOV_TARGET; i++) { if (job->aov_target[i] != NULL) { render_target_free(job->aov_target[i]); free(job->aov_target[i]); } } free(job->aov_target); job->aov_target = NULL; }