Files
SimpleRayTracing/source/Algorithm/BVH.c
Misaki 6800810369 Update project structure and improve performance
Added new files for BVH, AABB, and Debug functionalities.
Added new utility functions in Common.h.
Added gamma correction function in PostProcessing.h.
Changed the return type of path_trace to vec4s for alpha blending.
Changed BSDF function signatures to include sample index and bounce.
Changed the BSDF.h to replace inline functions with declarations.
Changed the Light and SkyLight evaluation functions to include throughput and sample index.
Changed the sphere creation function in GeometryUtilities.h for better quality.
Changed the scene structure to include a BVH tree for improved ray intersection.
Changed the scene initialization parameters for better performance.
Created new Debug functions for ray intersection counting.
Created new functions for triangle collection management in Triangle.c.
Improved pixel updating logic in Window.c.
Improved ray intersection performance with new BVH implementation.
Removed unused includes from Common.h.
Removed old library linking methods in CMakeLists.txt.
2025-04-21 15:56:19 +09:00

368 lines
13 KiB
C

#include "Algorithm/BVH.h"
#include "Geometry/AABB.h"
#include "Geometry/Triangle.h"
#include <math.h>
#include <stdbool.h>
typedef struct
{
aabb_t bounds;
uint32_t count;
} bin_t;
bool bvh_tree_init(bvh_tree_t* bvh_tree, const triangle_collection_t* triangles)
{
if (bvh_tree == NULL || triangles == NULL || triangles->count == 0)
{
return false;
}
bvh_tree_t temp = {0};
temp.triangles = triangles;
temp.primitive_count = triangles->count;
// Allocate primitive indices buffer
temp.primitive_indices = (uint64_t*)malloc(temp.primitive_count * sizeof(uint64_t));
if (!temp.primitive_indices)
{
return false;
}
// Initialize primitive indices
for (uint64_t i = 0; i < temp.primitive_count; ++i)
{
temp.primitive_indices[i] = i;
}
// Allocate nodes buffer (Estimate: 2n-1 nodes for a binary tree)
temp.node_capacity = temp.primitive_count * 2 - 1; // Start with a generous estimate
temp.nodes = (bvh_node_t*)malloc(temp.node_capacity * sizeof(bvh_node_t));
if (!temp.nodes)
{
free(temp.primitive_indices);
temp.primitive_indices = NULL;
return false;
}
temp.node_count = 0;
*bvh_tree = temp;
return true;
}
static inline aabb_t compute_primitives_aabb(const triangle_collection_t* triangles, const uint64_t* primitive_indices, uint64_t start, uint64_t count)
{
if (count == 0)
{
return (aabb_t){0};
}
uint64_t triangle_index = primitive_indices[start];
aabb_t bounds = compute_bounds_triangle(triangles->buffer[triangle_index]);
for (uint64_t i = start + 1; i < start + count; ++i)
{
triangle_index = primitive_indices[i];
aabb_growth(&bounds, triangles->buffer[triangle_index].point_1);
aabb_growth(&bounds, triangles->buffer[triangle_index].point_2);
aabb_growth(&bounds, triangles->buffer[triangle_index].point_3);
}
return bounds;
}
// TODO: For BVH generation, we can use a stack-based approach to avoid recursion and improve performance.
// TODO: We can generate a cheap LBVH first, and then refine it with SAH in parallel. Doing this even allows us to migrate BVH generation to the GPU.
static uint64_t build_bvh_node(bvh_node_t* bvh_nodes, uint64_t* next_node_index, uint64_t* primitive_indices, const triangle_collection_t* triangles,
uint64_t prim_start, uint64_t prim_count)
{
uint64_t node_index = (*next_node_index)++;
// We assume that the node_index is always less than the node_capacity, may need better error handling later
bvh_node_t* node = &bvh_nodes[node_index];
node->start_index = prim_start;
node->primitive_count = 0; // Only leaf nodes have primitive_count > 0
node->bounds = compute_primitives_aabb(triangles, primitive_indices, prim_start, prim_count);
const uint32_t LEAF_THRESHOLD = 4;
if (prim_count <= LEAF_THRESHOLD)
{
node->primitive_count = prim_count;
return node_index;
}
// --- Find the Best Bind ---
int best_axis = -1;
int best_split_bin_index = -1;
// NOTE: This is global index, not the local index in the node.
uint64_t best_split_index = 0;
float best_sah_cost = SAH_INTERSECTION_COST * prim_count; // TODO: Replace with actual SAH cost calculation
aabb_t current_aabb = node->bounds;
float parent_sah = aabb_surface_area(current_aabb);
if (parent_sah <= 0.0f)
{
// Split down the middle if the parent AABB is flat
if (prim_count <= 1)
{
goto force_leaf;
}
best_axis = 0;
best_split_index = prim_start + (prim_count / 2);
if (best_split_index == prim_start || best_split_index == prim_start + prim_count)
{
best_axis = -1; // Indicate no effective split if it results in empty child
}
goto perform_split_check;
}
// SAH binning evaluation
for (int axis = 0; axis < 3; axis++)
{
bin_t bins[SAH_BIN_COUNT];
for (int i = 0; i < SAH_BIN_COUNT; i++)
{
bins[i].count = 0;
bins[i].bounds = invalid_aabb();
}
float axis_min = current_aabb.min.raw[axis];
float axis_max = current_aabb.max.raw[axis];
float axis_extent = axis_max - axis_min;
float bin_size = axis_extent / SAH_BIN_COUNT;
if (bin_size <= 0.0f)
{
continue; // Skip flat aabb since they need to handle separately;
}
// Populate bins
for (uint64_t i = 0; i < prim_count; i++)
{
uint64_t triangle_index = primitive_indices[prim_start + i];
vec3s centroid = triangle_centroid(triangles->buffer[triangle_index]);
float position_on_axis = centroid.raw[axis];
int bin_index = (int)((position_on_axis - axis_min) / bin_size);
bin_index = (int)fminf((float)bin_index, (float)SAH_BIN_COUNT - 1.0f);
bin_index = (int)fmaxf((float)bin_index, 0.0f);
aabb_t triangle_aabb = compute_bounds_triangle(triangles->buffer[triangle_index]);;
bins[bin_index].bounds = aabb_union(bins[bin_index].bounds, triangle_aabb);
bins[bin_index].count++;
}
// Calculate prefix and suffix AABBs and counts.
aabb_t left_aabb[SAH_BIN_COUNT];
uint32_t left_count[SAH_BIN_COUNT] = {0};
aabb_t right_aabb[SAH_BIN_COUNT];
uint32_t right_count[SAH_BIN_COUNT] = {0};
aabb_t current_left_aabb = invalid_aabb();
uint32_t current_left_count = 0;
for (int i = 0; i < SAH_BIN_COUNT; i++)
{
if (bins[i].count > 0)
{
if (current_left_count == 0)
current_left_aabb = bins[i].bounds;
else
current_left_aabb = aabb_union(current_left_aabb, bins[i].bounds);
}
current_left_count += bins[i].count;
left_aabb[i] = current_left_aabb;
left_count[i] = current_left_count;
}
aabb_t current_right_aabb = invalid_aabb();
uint32_t current_right_count = 0;
for (int i = SAH_BIN_COUNT - 1; i >= 0; i--)
{
if (bins[i].count > 0)
{
if (current_right_count == 0)
current_right_aabb = bins[i].bounds; // First non-empty bin (from right)
else
current_right_aabb = aabb_union(current_right_aabb, bins[i].bounds);
}
current_right_count += bins[i].count;
right_aabb[i] = current_right_aabb;
right_count[i] = current_right_count;
}
// Evaluate splits between bins
for (int i = 0; i < SAH_BIN_COUNT - 1; i++)
{
uint32_t n_left = left_count[i];
uint32_t n_right = right_count[i + 1];
if (n_left == 0 || n_right == 0)
{
continue; // Skip empty bins
}
float sah_cost = SAH_TRAVERSAL_COST +
(aabb_surface_area(left_aabb[i]) / parent_sah) * SAH_INTERSECTION_COST * n_left +
(aabb_surface_area(right_aabb[i+1]) / parent_sah) * SAH_INTERSECTION_COST * n_right;
if (sah_cost < best_sah_cost)
{
best_sah_cost = sah_cost;
best_axis = axis;
best_split_bin_index = i;
// The split point in the primitive indices is after the first "left_count" primitives in the current range.
best_split_index = prim_start + n_left;
}
}
} //End axis loop
// --- Decide whether to split or make leaf ---
perform_split_check:
bool perform_split = false;
uint64_t actual_partition_index = prim_start; // Default if no split
if (parent_sah == 0.0f && prim_count > 1) {
// Flat AABB case with more than one primitive: Force median split
perform_split = true;
// The target is mid-point, partition manually based on count
best_split_index = prim_start + prim_count / 2;
best_axis = 0; // Arbitrary axis for the partition loop if needed (though count is primary)
best_split_bin_index = -1; // Not applicable for this type of split
}
else if (best_axis != -1 && best_sah_cost < SAH_INTERSECTION_COST * prim_count)
{
// Non-flat AABB and SAH found a beneficial split
perform_split = true;
}
if (!perform_split)
{
goto force_leaf;
}
// --- Perform Split if beneficial ---
// Rearrange primitive_indices in the range [prim_start ... prim_start + prim_count - 1]
// The criterion depends on whether this was an SAH split or a flat AABB split.
if (parent_sah <= 0.0f)
{
uint64_t threshold_original_index = primitive_indices[prim_start + (prim_count / 2)]; // Arbitrary index threshold
uint64_t i = prim_start; // Pointer for the left side
for (uint64_t j = prim_start; j < prim_start + prim_count; ++j) {
// Swap if original index is "less than" threshold
if (primitive_indices[j] < threshold_original_index)
{
// Arbitrary criterion
uint64_t temp = primitive_indices[i];
primitive_indices[i] = primitive_indices[j];
primitive_indices[j] = temp;
i++;
}
}
actual_partition_index = i;
}
else
{
// SAH-based partition (uses bin index as criterion)
// The partition criterion is: centroid bin along best_axis <= best_split_bin_index
float split_axis_min = node->bounds.min.raw[best_axis];
float split_axis_extent = node->bounds.max.raw[best_axis] - split_axis_min;
float split_bin_size = split_axis_extent / SAH_BIN_COUNT; // Use best_axis's properties
uint64_t i = prim_start;
for (uint64_t j = prim_start; j < prim_start + prim_count; ++j)
{
uint64_t original_tri_idx = primitive_indices[j];
vec3s centroid = triangle_centroid(triangles->buffer[original_tri_idx]);
float pos_on_axis = ((float*)&centroid)[best_axis];
int bin_idx = (int)((pos_on_axis - split_axis_min) / split_bin_size);
bin_idx = (int)fminf((float)bin_idx, (float)SAH_BIN_COUNT - 1.0f);
bin_idx = (int)fmaxf((float)bin_idx, 0.0f);
// If the primitive belongs to the left child's partition (bin <= best_split_bin_index)
if (bin_idx <= best_split_bin_index) {
// Swap it into the current position of the left partition
uint64_t temp = primitive_indices[i];
primitive_indices[i] = primitive_indices[j];
primitive_indices[j] = temp;
i++; // Move the left partition boundary forward
}
}
// After this partition loop, 'i' is the index of the first element that belongs
// to the right side's partition. This is the actual partition point.
actual_partition_index = i;
} // End partition type branch
uint64_t left_count = actual_partition_index - prim_start;
uint64_t right_count = (prim_start + prim_count) - actual_partition_index;
if (left_count == 0 || right_count == 0)
{
goto force_leaf;
}
uint64_t left_child_idx = build_bvh_node(bvh_nodes, next_node_index, primitive_indices, triangles, prim_start, left_count);
uint64_t right_child_idx = build_bvh_node(bvh_nodes, next_node_index, primitive_indices, triangles, actual_partition_index, right_count);
node->left_child_offset = left_child_idx;
node->right_child_offset = right_child_idx;
return node_index;
force_leaf:
node->primitive_count = prim_count;
return node_index; // Return the index of this leaf node
}
bool bvh_tree_build(bvh_tree_t* bvh_tree)
{
if (bvh_tree ==NULL)
{
return false;
}
uint64_t triangle_count = bvh_tree->triangles->count;
if (triangle_count == 0)
{
return true;
}
uint64_t next_node_index = 0;
uint64_t root_node_index = build_bvh_node(bvh_tree->nodes, &next_node_index,
bvh_tree->primitive_indices, bvh_tree->triangles,
0, triangle_count);
bvh_tree->node_count = next_node_index;
if (bvh_tree->node_count < bvh_tree->node_capacity)
{
bvh_node_t* temp_nodes = realloc(bvh_tree->nodes, bvh_tree->node_count * sizeof(bvh_node_t));
if (temp_nodes != NULL)
{
bvh_tree->nodes = temp_nodes;
bvh_tree->node_capacity = bvh_tree->node_count;
}
}
return true;
}
void bvh_tree_free(bvh_tree_t* bvh_tree)
{
if (bvh_tree == NULL)
{
return;
}
free(bvh_tree->nodes);
free(bvh_tree->primitive_indices);
bvh_tree->nodes = NULL;
bvh_tree->primitive_indices = NULL;
}