Added new files for BVH, AABB, and Debug functionalities. Added new utility functions in Common.h. Added gamma correction function in PostProcessing.h. Changed the return type of path_trace to vec4s for alpha blending. Changed BSDF function signatures to include sample index and bounce. Changed the BSDF.h to replace inline functions with declarations. Changed the Light and SkyLight evaluation functions to include throughput and sample index. Changed the sphere creation function in GeometryUtilities.h for better quality. Changed the scene structure to include a BVH tree for improved ray intersection. Changed the scene initialization parameters for better performance. Created new Debug functions for ray intersection counting. Created new functions for triangle collection management in Triangle.c. Improved pixel updating logic in Window.c. Improved ray intersection performance with new BVH implementation. Removed unused includes from Common.h. Removed old library linking methods in CMakeLists.txt.
368 lines
13 KiB
C
368 lines
13 KiB
C
#include "Algorithm/BVH.h"
|
|
#include "Geometry/AABB.h"
|
|
#include "Geometry/Triangle.h"
|
|
#include <math.h>
|
|
#include <stdbool.h>
|
|
|
|
typedef struct
|
|
{
|
|
aabb_t bounds;
|
|
uint32_t count;
|
|
} bin_t;
|
|
|
|
bool bvh_tree_init(bvh_tree_t* bvh_tree, const triangle_collection_t* triangles)
|
|
{
|
|
if (bvh_tree == NULL || triangles == NULL || triangles->count == 0)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
bvh_tree_t temp = {0};
|
|
temp.triangles = triangles;
|
|
temp.primitive_count = triangles->count;
|
|
|
|
// Allocate primitive indices buffer
|
|
temp.primitive_indices = (uint64_t*)malloc(temp.primitive_count * sizeof(uint64_t));
|
|
if (!temp.primitive_indices)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
// Initialize primitive indices
|
|
for (uint64_t i = 0; i < temp.primitive_count; ++i)
|
|
{
|
|
temp.primitive_indices[i] = i;
|
|
}
|
|
|
|
// Allocate nodes buffer (Estimate: 2n-1 nodes for a binary tree)
|
|
temp.node_capacity = temp.primitive_count * 2 - 1; // Start with a generous estimate
|
|
temp.nodes = (bvh_node_t*)malloc(temp.node_capacity * sizeof(bvh_node_t));
|
|
if (!temp.nodes)
|
|
{
|
|
free(temp.primitive_indices);
|
|
temp.primitive_indices = NULL;
|
|
return false;
|
|
}
|
|
temp.node_count = 0;
|
|
|
|
*bvh_tree = temp;
|
|
return true;
|
|
}
|
|
|
|
static inline aabb_t compute_primitives_aabb(const triangle_collection_t* triangles, const uint64_t* primitive_indices, uint64_t start, uint64_t count)
|
|
{
|
|
if (count == 0)
|
|
{
|
|
return (aabb_t){0};
|
|
}
|
|
|
|
uint64_t triangle_index = primitive_indices[start];
|
|
aabb_t bounds = compute_bounds_triangle(triangles->buffer[triangle_index]);
|
|
|
|
for (uint64_t i = start + 1; i < start + count; ++i)
|
|
{
|
|
triangle_index = primitive_indices[i];
|
|
aabb_growth(&bounds, triangles->buffer[triangle_index].point_1);
|
|
aabb_growth(&bounds, triangles->buffer[triangle_index].point_2);
|
|
aabb_growth(&bounds, triangles->buffer[triangle_index].point_3);
|
|
}
|
|
|
|
return bounds;
|
|
}
|
|
|
|
// TODO: For BVH generation, we can use a stack-based approach to avoid recursion and improve performance.
|
|
// TODO: We can generate a cheap LBVH first, and then refine it with SAH in parallel. Doing this even allows us to migrate BVH generation to the GPU.
|
|
static uint64_t build_bvh_node(bvh_node_t* bvh_nodes, uint64_t* next_node_index, uint64_t* primitive_indices, const triangle_collection_t* triangles,
|
|
uint64_t prim_start, uint64_t prim_count)
|
|
{
|
|
uint64_t node_index = (*next_node_index)++;
|
|
// We assume that the node_index is always less than the node_capacity, may need better error handling later
|
|
|
|
bvh_node_t* node = &bvh_nodes[node_index];
|
|
node->start_index = prim_start;
|
|
node->primitive_count = 0; // Only leaf nodes have primitive_count > 0
|
|
node->bounds = compute_primitives_aabb(triangles, primitive_indices, prim_start, prim_count);
|
|
|
|
const uint32_t LEAF_THRESHOLD = 4;
|
|
if (prim_count <= LEAF_THRESHOLD)
|
|
{
|
|
node->primitive_count = prim_count;
|
|
return node_index;
|
|
}
|
|
|
|
// --- Find the Best Bind ---
|
|
int best_axis = -1;
|
|
int best_split_bin_index = -1;
|
|
// NOTE: This is global index, not the local index in the node.
|
|
uint64_t best_split_index = 0;
|
|
float best_sah_cost = SAH_INTERSECTION_COST * prim_count; // TODO: Replace with actual SAH cost calculation
|
|
|
|
aabb_t current_aabb = node->bounds;
|
|
float parent_sah = aabb_surface_area(current_aabb);
|
|
|
|
if (parent_sah <= 0.0f)
|
|
{
|
|
// Split down the middle if the parent AABB is flat
|
|
if (prim_count <= 1)
|
|
{
|
|
goto force_leaf;
|
|
}
|
|
|
|
best_axis = 0;
|
|
best_split_index = prim_start + (prim_count / 2);
|
|
if (best_split_index == prim_start || best_split_index == prim_start + prim_count)
|
|
{
|
|
best_axis = -1; // Indicate no effective split if it results in empty child
|
|
}
|
|
|
|
goto perform_split_check;
|
|
}
|
|
|
|
// SAH binning evaluation
|
|
for (int axis = 0; axis < 3; axis++)
|
|
{
|
|
bin_t bins[SAH_BIN_COUNT];
|
|
for (int i = 0; i < SAH_BIN_COUNT; i++)
|
|
{
|
|
bins[i].count = 0;
|
|
bins[i].bounds = invalid_aabb();
|
|
}
|
|
|
|
float axis_min = current_aabb.min.raw[axis];
|
|
float axis_max = current_aabb.max.raw[axis];
|
|
float axis_extent = axis_max - axis_min;
|
|
float bin_size = axis_extent / SAH_BIN_COUNT;
|
|
|
|
if (bin_size <= 0.0f)
|
|
{
|
|
continue; // Skip flat aabb since they need to handle separately;
|
|
}
|
|
|
|
// Populate bins
|
|
for (uint64_t i = 0; i < prim_count; i++)
|
|
{
|
|
uint64_t triangle_index = primitive_indices[prim_start + i];
|
|
vec3s centroid = triangle_centroid(triangles->buffer[triangle_index]);
|
|
float position_on_axis = centroid.raw[axis];
|
|
|
|
int bin_index = (int)((position_on_axis - axis_min) / bin_size);
|
|
bin_index = (int)fminf((float)bin_index, (float)SAH_BIN_COUNT - 1.0f);
|
|
bin_index = (int)fmaxf((float)bin_index, 0.0f);
|
|
|
|
aabb_t triangle_aabb = compute_bounds_triangle(triangles->buffer[triangle_index]);;
|
|
bins[bin_index].bounds = aabb_union(bins[bin_index].bounds, triangle_aabb);
|
|
bins[bin_index].count++;
|
|
}
|
|
|
|
// Calculate prefix and suffix AABBs and counts.
|
|
aabb_t left_aabb[SAH_BIN_COUNT];
|
|
uint32_t left_count[SAH_BIN_COUNT] = {0};
|
|
aabb_t right_aabb[SAH_BIN_COUNT];
|
|
uint32_t right_count[SAH_BIN_COUNT] = {0};
|
|
|
|
aabb_t current_left_aabb = invalid_aabb();
|
|
uint32_t current_left_count = 0;
|
|
for (int i = 0; i < SAH_BIN_COUNT; i++)
|
|
{
|
|
if (bins[i].count > 0)
|
|
{
|
|
if (current_left_count == 0)
|
|
current_left_aabb = bins[i].bounds;
|
|
else
|
|
current_left_aabb = aabb_union(current_left_aabb, bins[i].bounds);
|
|
}
|
|
current_left_count += bins[i].count;
|
|
|
|
left_aabb[i] = current_left_aabb;
|
|
left_count[i] = current_left_count;
|
|
}
|
|
|
|
aabb_t current_right_aabb = invalid_aabb();
|
|
uint32_t current_right_count = 0;
|
|
for (int i = SAH_BIN_COUNT - 1; i >= 0; i--)
|
|
{
|
|
if (bins[i].count > 0)
|
|
{
|
|
if (current_right_count == 0)
|
|
current_right_aabb = bins[i].bounds; // First non-empty bin (from right)
|
|
else
|
|
current_right_aabb = aabb_union(current_right_aabb, bins[i].bounds);
|
|
}
|
|
current_right_count += bins[i].count;
|
|
|
|
right_aabb[i] = current_right_aabb;
|
|
right_count[i] = current_right_count;
|
|
}
|
|
|
|
// Evaluate splits between bins
|
|
for (int i = 0; i < SAH_BIN_COUNT - 1; i++)
|
|
{
|
|
uint32_t n_left = left_count[i];
|
|
uint32_t n_right = right_count[i + 1];
|
|
|
|
if (n_left == 0 || n_right == 0)
|
|
{
|
|
continue; // Skip empty bins
|
|
}
|
|
|
|
float sah_cost = SAH_TRAVERSAL_COST +
|
|
(aabb_surface_area(left_aabb[i]) / parent_sah) * SAH_INTERSECTION_COST * n_left +
|
|
(aabb_surface_area(right_aabb[i+1]) / parent_sah) * SAH_INTERSECTION_COST * n_right;
|
|
|
|
if (sah_cost < best_sah_cost)
|
|
{
|
|
best_sah_cost = sah_cost;
|
|
best_axis = axis;
|
|
best_split_bin_index = i;
|
|
// The split point in the primitive indices is after the first "left_count" primitives in the current range.
|
|
best_split_index = prim_start + n_left;
|
|
}
|
|
}
|
|
|
|
} //End axis loop
|
|
|
|
// --- Decide whether to split or make leaf ---
|
|
perform_split_check:
|
|
bool perform_split = false;
|
|
uint64_t actual_partition_index = prim_start; // Default if no split
|
|
|
|
if (parent_sah == 0.0f && prim_count > 1) {
|
|
// Flat AABB case with more than one primitive: Force median split
|
|
perform_split = true;
|
|
// The target is mid-point, partition manually based on count
|
|
best_split_index = prim_start + prim_count / 2;
|
|
best_axis = 0; // Arbitrary axis for the partition loop if needed (though count is primary)
|
|
best_split_bin_index = -1; // Not applicable for this type of split
|
|
}
|
|
else if (best_axis != -1 && best_sah_cost < SAH_INTERSECTION_COST * prim_count)
|
|
{
|
|
// Non-flat AABB and SAH found a beneficial split
|
|
perform_split = true;
|
|
}
|
|
|
|
if (!perform_split)
|
|
{
|
|
goto force_leaf;
|
|
}
|
|
|
|
// --- Perform Split if beneficial ---
|
|
// Rearrange primitive_indices in the range [prim_start ... prim_start + prim_count - 1]
|
|
// The criterion depends on whether this was an SAH split or a flat AABB split.
|
|
|
|
if (parent_sah <= 0.0f)
|
|
{
|
|
uint64_t threshold_original_index = primitive_indices[prim_start + (prim_count / 2)]; // Arbitrary index threshold
|
|
uint64_t i = prim_start; // Pointer for the left side
|
|
for (uint64_t j = prim_start; j < prim_start + prim_count; ++j) {
|
|
// Swap if original index is "less than" threshold
|
|
if (primitive_indices[j] < threshold_original_index)
|
|
{
|
|
// Arbitrary criterion
|
|
uint64_t temp = primitive_indices[i];
|
|
primitive_indices[i] = primitive_indices[j];
|
|
primitive_indices[j] = temp;
|
|
i++;
|
|
}
|
|
}
|
|
actual_partition_index = i;
|
|
}
|
|
else
|
|
{
|
|
// SAH-based partition (uses bin index as criterion)
|
|
// The partition criterion is: centroid bin along best_axis <= best_split_bin_index
|
|
|
|
float split_axis_min = node->bounds.min.raw[best_axis];
|
|
float split_axis_extent = node->bounds.max.raw[best_axis] - split_axis_min;
|
|
float split_bin_size = split_axis_extent / SAH_BIN_COUNT; // Use best_axis's properties
|
|
|
|
uint64_t i = prim_start;
|
|
for (uint64_t j = prim_start; j < prim_start + prim_count; ++j)
|
|
{
|
|
uint64_t original_tri_idx = primitive_indices[j];
|
|
vec3s centroid = triangle_centroid(triangles->buffer[original_tri_idx]);
|
|
float pos_on_axis = ((float*)¢roid)[best_axis];
|
|
|
|
int bin_idx = (int)((pos_on_axis - split_axis_min) / split_bin_size);
|
|
bin_idx = (int)fminf((float)bin_idx, (float)SAH_BIN_COUNT - 1.0f);
|
|
bin_idx = (int)fmaxf((float)bin_idx, 0.0f);
|
|
|
|
// If the primitive belongs to the left child's partition (bin <= best_split_bin_index)
|
|
if (bin_idx <= best_split_bin_index) {
|
|
// Swap it into the current position of the left partition
|
|
uint64_t temp = primitive_indices[i];
|
|
primitive_indices[i] = primitive_indices[j];
|
|
primitive_indices[j] = temp;
|
|
i++; // Move the left partition boundary forward
|
|
}
|
|
}
|
|
// After this partition loop, 'i' is the index of the first element that belongs
|
|
// to the right side's partition. This is the actual partition point.
|
|
actual_partition_index = i;
|
|
} // End partition type branch
|
|
|
|
uint64_t left_count = actual_partition_index - prim_start;
|
|
uint64_t right_count = (prim_start + prim_count) - actual_partition_index;
|
|
|
|
if (left_count == 0 || right_count == 0)
|
|
{
|
|
goto force_leaf;
|
|
}
|
|
|
|
uint64_t left_child_idx = build_bvh_node(bvh_nodes, next_node_index, primitive_indices, triangles, prim_start, left_count);
|
|
uint64_t right_child_idx = build_bvh_node(bvh_nodes, next_node_index, primitive_indices, triangles, actual_partition_index, right_count);
|
|
|
|
node->left_child_offset = left_child_idx;
|
|
node->right_child_offset = right_child_idx;
|
|
|
|
return node_index;
|
|
|
|
force_leaf:
|
|
node->primitive_count = prim_count;
|
|
return node_index; // Return the index of this leaf node
|
|
}
|
|
|
|
bool bvh_tree_build(bvh_tree_t* bvh_tree)
|
|
{
|
|
if (bvh_tree ==NULL)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
uint64_t triangle_count = bvh_tree->triangles->count;
|
|
if (triangle_count == 0)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
uint64_t next_node_index = 0;
|
|
uint64_t root_node_index = build_bvh_node(bvh_tree->nodes, &next_node_index,
|
|
bvh_tree->primitive_indices, bvh_tree->triangles,
|
|
0, triangle_count);
|
|
|
|
bvh_tree->node_count = next_node_index;
|
|
if (bvh_tree->node_count < bvh_tree->node_capacity)
|
|
{
|
|
bvh_node_t* temp_nodes = realloc(bvh_tree->nodes, bvh_tree->node_count * sizeof(bvh_node_t));
|
|
if (temp_nodes != NULL)
|
|
{
|
|
bvh_tree->nodes = temp_nodes;
|
|
bvh_tree->node_capacity = bvh_tree->node_count;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void bvh_tree_free(bvh_tree_t* bvh_tree)
|
|
{
|
|
if (bvh_tree == NULL)
|
|
{
|
|
return;
|
|
}
|
|
|
|
free(bvh_tree->nodes);
|
|
free(bvh_tree->primitive_indices);
|
|
bvh_tree->nodes = NULL;
|
|
bvh_tree->primitive_indices = NULL;
|
|
}
|