#include "Algorithm/BVH.h" #include "Geometry/AABB.h" #include "Geometry/Triangle.h" #include #include typedef struct { aabb_t bounds; uint32_t count; } bin_t; bool bvh_tree_init(bvh_tree_t* bvh_tree, const triangle_collection_t* triangles) { if (bvh_tree == NULL || triangles == NULL || triangles->count == 0) { return false; } bvh_tree_t temp = {0}; temp.triangles = triangles; temp.primitive_count = triangles->count; // Allocate primitive indices buffer temp.primitive_indices = (uint64_t*)malloc(temp.primitive_count * sizeof(uint64_t)); if (!temp.primitive_indices) { return false; } // Initialize primitive indices for (uint64_t i = 0; i < temp.primitive_count; ++i) { temp.primitive_indices[i] = i; } // Allocate nodes buffer (Estimate: 2n-1 nodes for a binary tree) temp.node_capacity = temp.primitive_count * 2 - 1; // Start with a generous estimate temp.nodes = (bvh_node_t*)malloc(temp.node_capacity * sizeof(bvh_node_t)); if (!temp.nodes) { free(temp.primitive_indices); temp.primitive_indices = NULL; return false; } temp.node_count = 0; *bvh_tree = temp; return true; } static inline aabb_t compute_primitives_aabb(const triangle_collection_t* triangles, const uint64_t* primitive_indices, uint64_t start, uint64_t count) { if (count == 0) { return (aabb_t){0}; } uint64_t triangle_index = primitive_indices[start]; aabb_t bounds = compute_bounds_triangle(triangles->buffer[triangle_index]); for (uint64_t i = start + 1; i < start + count; ++i) { triangle_index = primitive_indices[i]; aabb_growth(&bounds, triangles->buffer[triangle_index].vertices[0].position); aabb_growth(&bounds, triangles->buffer[triangle_index].vertices[1].position); aabb_growth(&bounds, triangles->buffer[triangle_index].vertices[2].position); } return bounds; } // TODO: For BVH generation, we can use a stack-based approach to avoid recursion and improve performance. // TODO: We can generate a cheap LBVH first, and then refine it with SAH in parallel. Doing this even allows us to migrate BVH generation to the GPU. static uint64_t build_bvh_node(bvh_node_t* bvh_nodes, uint64_t* next_node_index, uint64_t* primitive_indices, const triangle_collection_t* triangles, uint64_t prim_start, uint64_t prim_count) { uint64_t node_index = (*next_node_index)++; // We assume that the node_index is always less than the node_capacity, may need better error handling later bvh_node_t* node = &bvh_nodes[node_index]; node->start_index = prim_start; node->primitive_count = 0; // Only leaf nodes have primitive_count > 0 node->bounds = compute_primitives_aabb(triangles, primitive_indices, prim_start, prim_count); const uint32_t LEAF_THRESHOLD = 4; if (prim_count <= LEAF_THRESHOLD) { node->primitive_count = prim_count; return node_index; } // --- Find the Best Bind --- int best_axis = -1; int best_split_bin_index = -1; // NOTE: This is global index, not the local index in the node. uint64_t best_split_index = 0; float best_sah_cost = SAH_INTERSECTION_COST * prim_count; // TODO: Replace with actual SAH cost calculation aabb_t current_aabb = node->bounds; float parent_sah = aabb_surface_area(current_aabb); if (parent_sah <= 0.0f) { // Split down the middle if the parent AABB is flat if (prim_count <= 1) { goto force_leaf; } best_axis = 0; best_split_index = prim_start + (prim_count / 2); if (best_split_index == prim_start || best_split_index == prim_start + prim_count) { best_axis = -1; // Indicate no effective split if it results in empty child } goto perform_split_check; } // SAH binning evaluation for (int axis = 0; axis < 3; axis++) { bin_t bins[SAH_BIN_COUNT]; for (int i = 0; i < SAH_BIN_COUNT; i++) { bins[i].count = 0; bins[i].bounds = invalid_aabb(); } float axis_min = current_aabb.min.raw[axis]; float axis_max = current_aabb.max.raw[axis]; float axis_extent = axis_max - axis_min; float bin_size = axis_extent / SAH_BIN_COUNT; if (bin_size <= 0.0f) { continue; // Skip flat aabb since they need to handle separately; } // Populate bins for (uint64_t i = 0; i < prim_count; i++) { uint64_t triangle_index = primitive_indices[prim_start + i]; vec3s centroid = triangle_centroid(triangles->buffer[triangle_index]); float position_on_axis = centroid.raw[axis]; int bin_index = (int)((position_on_axis - axis_min) / bin_size); bin_index = (int)fminf((float)bin_index, (float)SAH_BIN_COUNT - 1.0f); bin_index = (int)fmaxf((float)bin_index, 0.0f); aabb_t triangle_aabb = compute_bounds_triangle(triangles->buffer[triangle_index]);; bins[bin_index].bounds = aabb_union(bins[bin_index].bounds, triangle_aabb); bins[bin_index].count++; } // Calculate prefix and suffix AABBs and counts. aabb_t left_aabb[SAH_BIN_COUNT]; uint32_t left_count[SAH_BIN_COUNT] = {0}; aabb_t right_aabb[SAH_BIN_COUNT]; uint32_t right_count[SAH_BIN_COUNT] = {0}; aabb_t current_left_aabb = invalid_aabb(); uint32_t current_left_count = 0; for (int i = 0; i < SAH_BIN_COUNT; i++) { if (bins[i].count > 0) { if (current_left_count == 0) current_left_aabb = bins[i].bounds; else current_left_aabb = aabb_union(current_left_aabb, bins[i].bounds); } current_left_count += bins[i].count; left_aabb[i] = current_left_aabb; left_count[i] = current_left_count; } aabb_t current_right_aabb = invalid_aabb(); uint32_t current_right_count = 0; for (int i = SAH_BIN_COUNT - 1; i >= 0; i--) { if (bins[i].count > 0) { if (current_right_count == 0) current_right_aabb = bins[i].bounds; // First non-empty bin (from right) else current_right_aabb = aabb_union(current_right_aabb, bins[i].bounds); } current_right_count += bins[i].count; right_aabb[i] = current_right_aabb; right_count[i] = current_right_count; } // Evaluate splits between bins for (int i = 0; i < SAH_BIN_COUNT - 1; i++) { uint32_t n_left = left_count[i]; uint32_t n_right = right_count[i + 1]; if (n_left == 0 || n_right == 0) { continue; // Skip empty bins } float sah_cost = SAH_TRAVERSAL_COST + (aabb_surface_area(left_aabb[i]) / parent_sah) * SAH_INTERSECTION_COST * n_left + (aabb_surface_area(right_aabb[i+1]) / parent_sah) * SAH_INTERSECTION_COST * n_right; if (sah_cost < best_sah_cost) { best_sah_cost = sah_cost; best_axis = axis; best_split_bin_index = i; // The split point in the primitive indices is after the first "left_count" primitives in the current range. best_split_index = prim_start + n_left; } } } //End axis loop // --- Decide whether to split or make leaf --- perform_split_check: bool perform_split = false; uint64_t actual_partition_index = prim_start; // Default if no split if (parent_sah == 0.0f && prim_count > 1) { // Flat AABB case with more than one primitive: Force median split perform_split = true; // The target is mid-point, partition manually based on count best_split_index = prim_start + prim_count / 2; best_axis = 0; // Arbitrary axis for the partition loop if needed (though count is primary) best_split_bin_index = -1; // Not applicable for this type of split } else if (best_axis != -1 && best_sah_cost < SAH_INTERSECTION_COST * prim_count) { // Non-flat AABB and SAH found a beneficial split perform_split = true; } if (!perform_split) { goto force_leaf; } // --- Perform Split if beneficial --- // Rearrange primitive_indices in the range [prim_start ... prim_start + prim_count - 1] // The criterion depends on whether this was an SAH split or a flat AABB split. if (parent_sah <= 0.0f) { uint64_t threshold_original_index = primitive_indices[prim_start + (prim_count / 2)]; // Arbitrary index threshold uint64_t i = prim_start; // Pointer for the left side for (uint64_t j = prim_start; j < prim_start + prim_count; ++j) { // Swap if original index is "less than" threshold if (primitive_indices[j] < threshold_original_index) { // Arbitrary criterion uint64_t temp = primitive_indices[i]; primitive_indices[i] = primitive_indices[j]; primitive_indices[j] = temp; i++; } } actual_partition_index = i; } else { // SAH-based partition (uses bin index as criterion) // The partition criterion is: centroid bin along best_axis <= best_split_bin_index float split_axis_min = node->bounds.min.raw[best_axis]; float split_axis_extent = node->bounds.max.raw[best_axis] - split_axis_min; float split_bin_size = split_axis_extent / SAH_BIN_COUNT; // Use best_axis's properties uint64_t i = prim_start; for (uint64_t j = prim_start; j < prim_start + prim_count; ++j) { uint64_t original_tri_idx = primitive_indices[j]; vec3s centroid = triangle_centroid(triangles->buffer[original_tri_idx]); float pos_on_axis = ((float*)¢roid)[best_axis]; int bin_idx = (int)((pos_on_axis - split_axis_min) / split_bin_size); bin_idx = (int)fminf((float)bin_idx, (float)SAH_BIN_COUNT - 1.0f); bin_idx = (int)fmaxf((float)bin_idx, 0.0f); // If the primitive belongs to the left child's partition (bin <= best_split_bin_index) if (bin_idx <= best_split_bin_index) { // Swap it into the current position of the left partition uint64_t temp = primitive_indices[i]; primitive_indices[i] = primitive_indices[j]; primitive_indices[j] = temp; i++; // Move the left partition boundary forward } } // After this partition loop, 'i' is the index of the first element that belongs // to the right side's partition. This is the actual partition point. actual_partition_index = i; } // End partition type branch uint64_t left_count = actual_partition_index - prim_start; uint64_t right_count = (prim_start + prim_count) - actual_partition_index; if (left_count == 0 || right_count == 0) { goto force_leaf; } uint64_t left_child_idx = build_bvh_node(bvh_nodes, next_node_index, primitive_indices, triangles, prim_start, left_count); uint64_t right_child_idx = build_bvh_node(bvh_nodes, next_node_index, primitive_indices, triangles, actual_partition_index, right_count); node->left_child_offset = left_child_idx; node->right_child_offset = right_child_idx; return node_index; force_leaf: node->primitive_count = prim_count; return node_index; // Return the index of this leaf node } bool bvh_tree_build(bvh_tree_t* bvh_tree) { if (bvh_tree ==NULL) { return false; } uint64_t triangle_count = bvh_tree->triangles->count; if (triangle_count == 0) { return true; } uint64_t next_node_index = 0; uint64_t root_node_index = build_bvh_node(bvh_tree->nodes, &next_node_index, bvh_tree->primitive_indices, bvh_tree->triangles, 0, triangle_count); bvh_tree->node_count = next_node_index; if (bvh_tree->node_count < bvh_tree->node_capacity) { bvh_node_t* temp_nodes = realloc(bvh_tree->nodes, bvh_tree->node_count * sizeof(bvh_node_t)); if (temp_nodes != NULL) { bvh_tree->nodes = temp_nodes; bvh_tree->node_capacity = bvh_tree->node_count; } } return true; } void bvh_tree_free(bvh_tree_t* bvh_tree) { if (bvh_tree == NULL) { return; } free(bvh_tree->nodes); free(bvh_tree->primitive_indices); bvh_tree->nodes = NULL; bvh_tree->primitive_indices = NULL; }