Change project structure;

Added new c# binding;
2025-12-30 20:54:05 +09:00
parent 5f5404268c
commit f1d3dddb9a
392 changed files with 2694 additions and 360462 deletions
--- a/native/source/Rendering/Camera.c
+++ b/native/source/Rendering/Camera.c
@@ -0,0 +1,17 @@
+#include "Rendering/Camera.h"
+
+camera_t camera_create(vec3s position, versors rotation, float focal_length, float size_x, float aspect_ratio)
+{
+    camera_t camera =
+    {
+        .position = position,
+        .rotation = rotation,
+        .focal_length = focal_length,
+        .size_x = size_x,
+        .size_y = size_x / aspect_ratio,
+        .fov_x = 2.0f * (float)atan(size_x / (2.0f * focal_length)),
+        .fov_y = 2.0f * (float)atan(size_x / (2.0f * focal_length * aspect_ratio)),
+    };
+
+    return camera;
+}
--- a/native/source/Rendering/Debug.c
+++ b/native/source/Rendering/Debug.c
@@ -0,0 +1,65 @@
+#include "Rendering/Debug.h"
+#include "Algorithm/RayIntersection.h"
+
+static void ray_intersect_bvh_count(ray_t ray, bvh_tree_t bvh_tree, uint64_t node_index, uint32_t* count_out)
+{
+    const float _MAX_DIST = 1e6f;
+    if (bvh_tree.nodes == NULL || bvh_tree.primitive_indices == NULL || count_out == NULL)
+    {
+        return;
+    }
+
+    const bvh_node_t* node = &bvh_tree.nodes[node_index];
+
+    float enter, exit;
+    if (!ray_intersect_aabb(&ray, node->bounds, &enter, &exit))
+    {
+        return;
+    }
+
+    if (enter > _MAX_DIST || exit < 0.0f)
+    {
+        return;
+    }
+
+    (*count_out)++;
+    if (node->primitive_count == 0)
+    {
+        // Internal node
+        ray_intersect_bvh_count(ray, bvh_tree, node->left_child_offset, count_out);
+        ray_intersect_bvh_count(ray, bvh_tree, node->right_child_offset, count_out);
+    }
+}
+
+vec4s render_debug(scene_t* scene, ray_t ray, uint16_t sample_index, int flag)
+{
+    if (scene == NULL)
+    {
+        return glms_vec4_zero();
+    }
+
+    switch (flag & 0xFF)
+    {
+    case DEBUG_BVH:
+        uint32_t count = 0;
+        ray_intersect_bvh_count(ray, scene->bvh_tree, 0, &count);
+
+        vec4s result = glms_vec4_zero();
+        for (uint32_t i = 0; i < count; i++)
+        {
+            result = glms_vec4_add(result, DEBUG_COLOR_BVH);
+        }
+        return result;
+
+    case DEBUG_SOBOL:
+        float sobol_sample_value = sobol_sample(sample_index, 1); // Assuming dimension 0 for simplicity
+        return (vec4s){sobol_sample_value, sobol_sample_value, sobol_sample_value, 1.0f};
+
+    case DEBUG_UV:
+        hit_result_t hit_result = ray_intersect_scene_closest(&ray, scene);
+        return (vec4s){fmodf(fabsf(hit_result.uv.x), 1.0f), fmodf(fabsf(hit_result.uv.y), 1.0f), 0.0f, 1.0f};
+
+    default:
+        return glms_vec4_zero();
+    }
+}
--- a/native/source/Rendering/RenderTarget.c
+++ b/native/source/Rendering/RenderTarget.c
@@ -0,0 +1,83 @@
+#include "Rendering/RenderTarget.h"
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+bool render_target_init(uint32_t width, uint32_t height, render_target_t* render_target)
+{
+    render_target->width = width;
+    render_target->height = height;
+
+    size_t size_of_pixel = sizeof(vec4s);
+    size_t image_size = (size_t)width * height;
+    size_t buffer_size = image_size * size_of_pixel;
+    vec4s* buffer = (vec4s*)malloc(buffer_size);
+    if (buffer == NULL)
+    {
+        return false;
+    }
+
+    memset(buffer, 0, buffer_size);
+
+    for (size_t i = 0; i < image_size; i++)
+    {
+        buffer[i].w = 1.0;
+    }
+
+    render_target->buffer = buffer;
+    return true;
+}
+
+vec4s render_target_get_pixel(const render_target_t* render_target, uint32_t x, uint32_t y)
+{
+    if (x < render_target->width && y < render_target->height)
+    {
+        size_t index = (size_t)y * render_target->width + x;
+        return render_target->buffer[index];
+    }
+
+    return (vec4s){0.0f, 0.0f, 0.0f, 0.0f}; // Return black if out of bounds
+}
+
+void render_target_set_pixel(render_target_t* render_target, uint32_t x, uint32_t y, vec4s color)
+{
+    if (x < render_target->width && y < render_target->height)
+    {
+        size_t index = (size_t)y * render_target->width + x;
+        render_target->buffer[index] = color;
+    }
+}
+
+unsigned char* render_target_to_char(render_target_t* render_target)
+{
+    size_t buffer_size = (size_t)render_target->width * render_target->height * 4; // 4 bytes for RGBA
+    unsigned char* char_buffer = (unsigned char*)malloc(buffer_size);
+    if (char_buffer == NULL)
+    {
+        return NULL;
+    }
+
+    for (uint32_t y = 0; y < render_target->height; y++)
+    {
+        for (uint32_t x = 0; x < render_target->width; x++)
+        {
+            vec4s pixel = render_target_get_pixel(render_target, x, y);
+            size_t index = ((size_t)y * render_target->width + x) * 4;
+
+            char_buffer[index + 0] = COLOR_CLAMP(pixel.x * 255.0f);
+            char_buffer[index + 1] = COLOR_CLAMP(pixel.y * 255.0f);
+            char_buffer[index + 2] = COLOR_CLAMP(pixel.z * 255.0f);
+            char_buffer[index + 3] = COLOR_CLAMP(pixel.w * 255.0f);
+        }
+    }
+
+    return char_buffer;
+}
+
+void render_target_free(render_target_t* target)
+{
+    if (target != NULL && target->buffer != NULL)
+    {
+        free(target->buffer);
+    }
+}
--- a/native/source/Rendering/Renderer.c
+++ b/native/source/Rendering/Renderer.c
@@ -0,0 +1,198 @@
+#include "Rendering/Renderer.h"
+#include "Algorithm/PathTracing.h"
+
+static inline void create_target_if_required(aov_flags_t aov_flags, aov_flags_t target_flag, render_target_t** render_target, uint32_t width, uint32_t height)
+{
+    render_target_t* temp = NULL;
+    if (has_flag(aov_flags, target_flag))
+    {
+        temp = (render_target_t*)malloc(sizeof(render_target_t));
+        if (temp == NULL)
+        {
+            return;
+        }
+
+        render_target_init(width, height, temp);
+    }
+
+    *render_target = temp;
+}
+
+bool renderer_aov_target_init(render_job_t* job, aov_flags_t aov_flags)
+{
+    job->aov_target = (render_target_t**)malloc(sizeof(render_target_t*) * MAX_AOV_TARGET);
+    if (job->aov_target == NULL)
+    {
+        return false;
+    }
+
+    create_target_if_required(aov_flags, AOV_BEAUTY, &job->aov_target[AOV_BEAUTY_INDEX], job->config->width, job->config->height);
+    create_target_if_required(aov_flags, AOV_AlBEDO, &job->aov_target[AOV_AlBEDO_INDEX], job->config->width, job->config->height);
+    create_target_if_required(aov_flags, AOV_NORMAL, &job->aov_target[AOV_NORMAL_INDEX], job->config->width, job->config->height);
+    create_target_if_required(aov_flags, AOV_DEPTH, &job->aov_target[AOV_DEPTH_INDEX], job->config->width, job->config->height);
+    create_target_if_required(aov_flags, AOV_POSITION, &job->aov_target[AOV_POSITION_INDEX], job->config->width, job->config->height);
+
+    create_target_if_required(aov_flags, AOV_DIRECT, &job->aov_target[AOV_DIRECT_INDEX], job->config->width, job->config->height);
+    create_target_if_required(aov_flags, AOV_INDIRECT, &job->aov_target[AOV_INDIRECT_INDEX], job->config->width, job->config->height);
+
+    return true;
+}
+
+static inline void ensure_camera_aspect_ratio(camera_t* camera, const rendering_config_t* config)
+{
+    float aspect_ratio = (float)config->width / config->height;
+    if (fabsf((float)config->width / config->height - camera->size_x / camera->size_y) > 0.001f)
+    {
+        camera->size_y = camera->size_x / aspect_ratio;
+        camera->fov_y = 2.0f * (float)atan(camera->size_x / (2.0f * camera->focal_length * aspect_ratio));
+    }
+}
+
+static inline vec2s compute_ndc(float x, float y, uint32_t width, uint32_t height)
+{
+    return (vec2s){
+        .x = x / (float)width,
+#ifdef FLIP_Y
+        .y = 1.0f - y / (float)height
+#else
+        .y = y / (float)height
+#endif
+    };
+}
+
+static inline bool aov_needs_lighting_samples(aov_flags_t flags)
+{
+    return has_flag(flags, AOV_BEAUTY) || has_flag(flags, AOV_DIRECT) || has_flag(flags, AOV_INDIRECT);
+}
+
+static void render_pixel(const rendering_config_t* config, scene_t* scene, vec3s coord, uint32_t x, uint32_t y, aov_flags_t aov_flags, aov_output_t* pixel_output)
+{
+    aov_output_t accumulated_color = {0};
+
+    uint32_t pixel_id = y * config->width + x;
+    uint16_t sample_count = aov_needs_lighting_samples(aov_flags) ? (uint16_t)config->sample_count : 1;
+    float inv_sample = 1.0f / (float)sample_count;
+
+    vec3s camera_right = quat_get_right(scene->camera.rotation);
+    vec3s camera_up = quat_get_up(scene->camera.rotation);
+
+    for (uint16_t k = 0; k < sample_count; k++)
+    {
+        // TODO: Hash it
+        uint32_t sobol_idx = pixel_id * (uint32_t)sample_count + (k + 1);
+        uint32_t pos_hash = hash_uint32(pixel_id);
+
+        // Apply AA
+        float du = sobol_sample_scrambled(sobol_idx, PRNG_LENS_U, pos_hash);
+        float dv = sobol_sample_scrambled(sobol_idx, PRNG_LENS_V, pos_hash);
+        vec2s position_ndc = compute_ndc((float)x + du, (float)y + dv, config->width, config->height);
+
+        float screen_x = position_ndc.x * 2.0f - 1.0f;
+        float screen_y = position_ndc.y * 2.0f - 1.0f;
+        float sensor_offset_x = screen_x * scene->camera.size_x * 0.5f;
+        float sensor_offset_y = screen_y * scene->camera.size_y * 0.5f;
+
+        vec3s image_plane_point = coord;
+        image_plane_point = glms_vec3_add(image_plane_point, glms_vec3_scale(camera_right, sensor_offset_x));
+        image_plane_point = glms_vec3_add(image_plane_point, glms_vec3_scale(camera_up, sensor_offset_y));
+
+        // Calculate initial spread angle for ray differentials
+        float pixel_height = scene->camera.size_y / (float)config->height;
+        float spread_angle = atanf(pixel_height / scene->camera.focal_length);
+
+        ray_t ray = ray_create(scene->camera.position, glms_vec3_normalize(glms_vec3_sub(image_plane_point, scene->camera.position)), 0.0f, spread_angle);
+
+        aov_output_t aov_output = {0};
+        path_trace_aov(scene, ray, sobol_idx, config->max_depth, aov_flags, &aov_output);
+
+        accumulate_aov(&accumulated_color, &aov_output, inv_sample);
+    }
+
+    *pixel_output = accumulated_color;
+}
+
+static inline void update_aov_pixel_if_exist(render_target_t** target, vec4s color, uint32_t x, uint32_t y)
+{
+    if (*target == NULL || (*target)->buffer == NULL)
+    {
+        return;
+    }
+
+    render_target_set_pixel(*target, x, y, color);
+}
+
+static inline void update_aov(render_target_t** target, const aov_output_t* aov, uint32_t x, uint32_t y)
+{
+    update_aov_pixel_if_exist(&target[AOV_BEAUTY_INDEX], aov->beauty, x, y);
+    update_aov_pixel_if_exist(&target[AOV_AlBEDO_INDEX], aov->albedo, x, y);
+    update_aov_pixel_if_exist(&target[AOV_NORMAL_INDEX], aov->normal, x, y);
+    update_aov_pixel_if_exist(&target[AOV_DEPTH_INDEX], (vec4s){aov->depth, aov->depth, aov->depth, 1.0f}, x, y);
+    update_aov_pixel_if_exist(&target[AOV_POSITION_INDEX], aov->position, x, y);
+
+    update_aov_pixel_if_exist(&target[AOV_DIRECT_INDEX], aov->direct, x, y);
+    update_aov_pixel_if_exist(&target[AOV_INDIRECT_INDEX], aov->indirect, x, y);
+}
+
+// TODO: Progressive rendering
+void renderer_start(render_job_t* job)
+{
+    ensure_camera_aspect_ratio(&job->scene->camera, job->config);
+
+    uint32_t tile_count_x = (job->config->width + job->config->bucket_size - 1) / job->config->bucket_size;
+    uint32_t tile_count_y = (job->config->height + job->config->bucket_size - 1) / job->config->bucket_size;
+    uint32_t tile_count = tile_count_x * tile_count_y;
+
+    vec3s coord = glms_vec3_add(job->scene->camera.position, glms_vec3_scale(quat_get_forward(job->scene->camera.rotation), job->scene->camera.focal_length));
+
+    int64_t x, y, tile_index; // OpenMP requires these to be declared outside the parallel region.
+#pragma omp parallel for schedule(dynamic, 1) default(none) \
+    shared(tile_count_x, tile_count_y, tile_count, coord, job) \
+    private(x, y, tile_index)
+    for (tile_index = 0; tile_index < tile_count; tile_index++)
+    {
+        uint32_t tile_x_0 = (uint32_t)tile_index % tile_count_x * job->config->bucket_size;
+        uint32_t tile_y_0 = (uint32_t)tile_index / tile_count_x * job->config->bucket_size;
+        uint32_t tile_x_1 = (uint32_t)fmin(tile_x_0 + job->config->bucket_size, job->config->width);
+        uint32_t tile_y_1 = (uint32_t)fmin(tile_y_0 + job->config->bucket_size, job->config->height);
+
+        for (y = tile_y_0; y < tile_y_1; y++)
+        {
+            for (x = tile_x_0; x < tile_x_1; x++)
+            {
+                if (job->is_done)
+                {
+                    goto tile_done;
+                }
+
+                aov_output_t pixel_output = {0};
+                render_pixel(job->config, job->scene, coord, (uint32_t)x, (uint32_t)y, job->aov_flags, &pixel_output);
+                update_aov(job->aov_target, &pixel_output, (uint32_t)x, (uint32_t)y);
+            }
+        }
+        tile_done:;
+    }
+
+    // TODO: A-Trous denoising
+
+    job->is_done = true;
+}
+
+void render_job_free(render_job_t* job)
+{
+    if (job == NULL || job->aov_target == NULL)
+    {
+        return;
+    }
+
+    for (uint8_t i = 0; i < MAX_AOV_TARGET; i++)
+    {
+        if (job->aov_target[i] != NULL)
+        {
+            render_target_free(job->aov_target[i]);
+            free(job->aov_target[i]);
+        }
+    }
+
+    free(job->aov_target);
+    job->aov_target = NULL;
+}
--- a/native/source/Rendering/Scene.c
+++ b/native/source/Rendering/Scene.c
@@ -0,0 +1,415 @@
+#include "Rendering/Scene.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+static inline void mesh_model_collection_init(mesh_model_collection_t* models, uint32_t capacity)
+{
+    models->count = capacity;
+    models->capacity = capacity;
+    models->buffer = (mesh_model_t*)calloc(capacity, sizeof(mesh_model_t));
+}
+
+static inline void mesh_instance_collection_init(mesh_instance_collection_t* instances, uint32_t capacity)
+{
+    instances->count = capacity;
+    instances->capacity = capacity;
+    instances->buffer = (mesh_instance_t*)calloc(capacity, sizeof(mesh_instance_t));
+}
+
+static inline void mesh_model_collection_free(mesh_model_collection_t* models)
+{
+    if (models == NULL || models->buffer == NULL)
+    {
+        return;
+    }
+
+    for (uint32_t i = 0; i < models->capacity; ++i)
+    {
+        mesh_model_t* m = &models->buffer[i];
+        if (m->active)
+        {
+            bvh_tree_free(&m->blas);
+            triangle_collection_free(&m->triangles);
+        }
+    }
+
+    free(models->buffer);
+    models->buffer = NULL;
+    models->count = 0;
+    models->capacity = 0;
+}
+
+static inline void mesh_instance_collection_free(mesh_instance_collection_t* instances)
+{
+    if (instances == NULL)
+    {
+        return;
+    }
+
+    free(instances->buffer);
+    instances->buffer = NULL;
+    instances->count = 0;
+    instances->capacity = 0;
+}
+
+static inline vec3s mat4_mul_point(mat4s m, vec3s p)
+{
+    return glms_mat4_mulv3(m, p, 1.0f);
+}
+
+static inline aabb_t aabb_transform(mat4s m, aabb_t aabb)
+{
+    // Transform 8 corners and compute bounds.
+    vec3s c000 = mat4_mul_point(m, (vec3s){aabb.min.x, aabb.min.y, aabb.min.z});
+    vec3s c001 = mat4_mul_point(m, (vec3s){aabb.min.x, aabb.min.y, aabb.max.z});
+    vec3s c010 = mat4_mul_point(m, (vec3s){aabb.min.x, aabb.max.y, aabb.min.z});
+    vec3s c011 = mat4_mul_point(m, (vec3s){aabb.min.x, aabb.max.y, aabb.max.z});
+    vec3s c100 = mat4_mul_point(m, (vec3s){aabb.max.x, aabb.min.y, aabb.min.z});
+    vec3s c101 = mat4_mul_point(m, (vec3s){aabb.max.x, aabb.min.y, aabb.max.z});
+    vec3s c110 = mat4_mul_point(m, (vec3s){aabb.max.x, aabb.max.y, aabb.min.z});
+    vec3s c111 = mat4_mul_point(m, (vec3s){aabb.max.x, aabb.max.y, aabb.max.z});
+
+    aabb_t out = invalid_aabb();
+    aabb_growth(&out, c000);
+    aabb_growth(&out, c001);
+    aabb_growth(&out, c010);
+    aabb_growth(&out, c011);
+    aabb_growth(&out, c100);
+    aabb_growth(&out, c101);
+    aabb_growth(&out, c110);
+    aabb_growth(&out, c111);
+    return out;
+}
+
+static inline mat3s compute_normal_matrix(mat4s local_to_world)
+{
+    // normalMatrix = transpose(inverse(mat3(local_to_world)))
+    mat4s inv = glms_mat4_inv(local_to_world);
+    mat3s m3 = glms_mat4_pick3(inv);
+    return glms_mat3_transpose(m3);
+}
+
+static bool scene_rebuild_tlas(scene_t* scene)
+{
+    if (scene == NULL)
+    {
+        return false;
+    }
+
+    // Build list of active instances.
+    uint64_t active_count = 0;
+    for (uint32_t i = 0; i < scene->mesh_instances.capacity; ++i)
+    {
+        if (scene->mesh_instances.buffer[i].active)
+        {
+            active_count++;
+        }
+    }
+
+    if (active_count == 0)
+    {
+        tlas_tree_free(&scene->tlas);
+        scene->tlas_dirty = false;
+        return true;
+    }
+
+    uint64_t* indices = (uint64_t*)malloc(sizeof(uint64_t) * active_count);
+    if (indices == NULL)
+    {
+        return false;
+    }
+
+    uint64_t cursor = 0;
+    for (uint32_t i = 0; i < scene->mesh_instances.capacity; ++i)
+    {
+        if (scene->mesh_instances.buffer[i].active)
+        {
+            indices[cursor++] = i;
+        }
+    }
+
+    // Build an array of bounds for all instances (indexed by instance_id).
+    // TLAS references this via primitive indices.
+    // We can pass the backing array directly.
+    aabb_t* bounds = (aabb_t*)malloc(sizeof(aabb_t) * scene->mesh_instances.capacity);
+    if (bounds == NULL)
+    {
+        free(indices);
+        return false;
+    }
+    for (uint32_t i = 0; i < scene->mesh_instances.capacity; ++i)
+    {
+        bounds[i] = scene->mesh_instances.buffer[i].world_bounds;
+    }
+
+    // Store bounds pointer via tlas->instance_bounds; Scene owns this allocation.
+    // For simplicity, reuse the buffer by freeing previous and storing on scene.
+    // (We attach it to tlas.instance_bounds and free it in scene_free via tlas_tree_free doesn't free it.)
+    // We'll keep it alive by storing in a static on scene via tlas.instance_bounds.
+    // TLAS builder does not take ownership; we manage it here.
+    const aabb_t* old_bounds = scene->tlas.instance_bounds;
+
+    bool ok = tlas_tree_build(&scene->tlas, indices, active_count, bounds);
+    free(indices);
+    if (!ok)
+    {
+        free(bounds);
+        return false;
+    }
+
+    // Free old bounds after successful rebuild.
+    free((void*)old_bounds);
+
+    scene->tlas_dirty = false;
+    return true;
+}
+
+bool scene_init(scene_t* scene, uint64_t triangle_count, uint16_t texture_count, uint8_t material_count, uint32_t punctual_light_count)
+{
+    scene_t temp = {0};
+
+    if (!triangle_collection_init(triangle_count, &temp.triangles))
+    {
+        goto triangle_failed;
+    }
+
+    if (!texture_collection_init(texture_count, &temp.textures))
+    {
+        goto texture_failed;
+    }
+
+    if (!material_collection_init(material_count, &temp.materials))
+    {
+        goto material_failed;
+    }
+
+    if (!light_collection_create(punctual_light_count, 16, &temp.lights)) // NOTE: We just fixed the max directional light count to 16.
+    {
+        goto light_failed;
+    }
+
+    temp.camera = camera_create(
+        (vec3s){0.0f, 0.0f, 5.0f},
+        glms_quat_identity(),
+        0.025f,
+        0.036f,
+        16.0f / 9.0f
+    );
+
+    // New mesh system: start with small default capacities (simple first).
+    (void)triangle_count;
+    mesh_model_collection_init(&temp.mesh_models, 64);
+    mesh_instance_collection_init(&temp.mesh_instances, 128);
+    temp.tlas_dirty = true;
+
+    *scene = temp;
+    return true;
+
+light_failed:
+    material_collection_free(&temp.materials);
+material_failed:
+    texture_collection_free(&temp.textures);
+texture_failed:
+    triangle_collection_free(&temp.triangles);
+triangle_failed:
+    return false;
+}
+
+bool scene_build_bvh(scene_t* scene)
+{
+    if (scene == NULL)
+    {
+        return false;
+    }
+
+    // Prefer TLAS if any mesh instances exist.
+    if (scene->tlas_dirty)
+    {
+        if (!scene_rebuild_tlas(scene))
+        {
+            return false;
+        }
+    }
+
+    // Legacy BVH build if triangles are present.
+    if (scene->triangles.count > 0)
+    {
+        bvh_tree_free(&scene->bvh_tree);
+
+        bvh_tree_t bvh_tree = {0};
+        if (!bvh_tree_init(&bvh_tree, &scene->triangles))
+        {
+            return false;
+        }
+
+        bvh_tree_build(&bvh_tree);
+        scene->bvh_tree = bvh_tree;
+    }
+
+    return true;
+}
+
+void scene_free(scene_t* scene)
+{
+    if (scene == NULL)
+    {
+        return;
+    }
+
+    bvh_tree_free(&scene->bvh_tree);
+    triangle_collection_free(&scene->triangles);
+
+    // Mesh system
+    tlas_tree_free(&scene->tlas);
+    free((void*)scene->tlas.instance_bounds);
+    scene->tlas.instance_bounds = NULL;
+    mesh_instance_collection_free(&scene->mesh_instances);
+    mesh_model_collection_free(&scene->mesh_models);
+    texture_collection_free(&scene->textures);
+    material_collection_free(&scene->materials);
+    light_collection_free(&scene->lights);
+}
+
+static uint32_t find_free_mesh_model_slot(scene_t* scene)
+{
+    for (uint32_t i = 0; i < scene->mesh_models.capacity; ++i)
+    {
+        if (!scene->mesh_models.buffer[i].active)
+        {
+            return i;
+        }
+    }
+
+    uint32_t old_capacity = scene->mesh_models.capacity;
+    uint32_t new_capacity = old_capacity == 0 ? 64 : old_capacity * 2;
+    mesh_model_t* resized = (mesh_model_t*)realloc(scene->mesh_models.buffer, sizeof(mesh_model_t) * new_capacity);
+    if (resized == NULL)
+    {
+        return UINT32_MAX;
+    }
+    memset(resized + old_capacity, 0, sizeof(mesh_model_t) * (new_capacity - old_capacity));
+    scene->mesh_models.buffer = resized;
+    scene->mesh_models.capacity = new_capacity;
+    scene->mesh_models.count = new_capacity;
+
+    return old_capacity;
+}
+
+static uint32_t find_free_mesh_instance_slot(scene_t* scene)
+{
+    for (uint32_t i = 0; i < scene->mesh_instances.capacity; ++i)
+    {
+        if (!scene->mesh_instances.buffer[i].active)
+        {
+            return i;
+        }
+    }
+
+    uint32_t old_capacity = scene->mesh_instances.capacity;
+    uint32_t new_capacity = old_capacity == 0 ? 128 : old_capacity * 2;
+    mesh_instance_t* resized = (mesh_instance_t*)realloc(scene->mesh_instances.buffer, sizeof(mesh_instance_t) * new_capacity);
+    if (resized == NULL)
+    {
+        return UINT32_MAX;
+    }
+    memset(resized + old_capacity, 0, sizeof(mesh_instance_t) * (new_capacity - old_capacity));
+    scene->mesh_instances.buffer = resized;
+    scene->mesh_instances.capacity = new_capacity;
+    scene->mesh_instances.count = new_capacity;
+
+    return old_capacity;
+}
+
+uint32_t scene_add_mesh_model(scene_t* scene, uint64_t triangle_reserve)
+{
+    if (scene == NULL)
+    {
+        return UINT32_MAX;
+    }
+
+    uint32_t slot = find_free_mesh_model_slot(scene);
+    if (slot == UINT32_MAX)
+    {
+        return UINT32_MAX;
+    }
+
+    mesh_model_t* model = &scene->mesh_models.buffer[slot];
+    *model = (mesh_model_t){0};
+    model->active = true;
+    model->local_bounds = invalid_aabb();
+
+    if (!triangle_collection_init((size_t)(triangle_reserve > 0 ? triangle_reserve : 1), &model->triangles))
+    {
+        model->active = false;
+        return UINT32_MAX;
+    }
+
+    return slot;
+}
+
+uint32_t scene_add_mesh_instance(scene_t* scene, uint32_t model_id, mat4s local_to_world)
+{
+    if (scene == NULL || model_id >= scene->mesh_models.capacity || !scene->mesh_models.buffer[model_id].active)
+    {
+        return UINT32_MAX;
+    }
+
+    uint32_t slot = find_free_mesh_instance_slot(scene);
+    if (slot == UINT32_MAX)
+    {
+        return UINT32_MAX;
+    }
+
+    mesh_instance_t* inst = &scene->mesh_instances.buffer[slot];
+    *inst = (mesh_instance_t){0};
+    inst->active = true;
+    inst->model_id = model_id;
+    inst->local_to_world = local_to_world;
+    inst->world_to_local = glms_mat4_inv(local_to_world);
+    inst->normal_matrix = compute_normal_matrix(local_to_world);
+    inst->world_bounds = aabb_transform(local_to_world, scene->mesh_models.buffer[model_id].local_bounds);
+
+    scene->tlas_dirty = true;
+    return slot;
+}
+
+void scene_remove_mesh_instance(scene_t* scene, uint32_t instance_id)
+{
+    if (scene == NULL || instance_id >= scene->mesh_instances.capacity)
+    {
+        return;
+    }
+
+    if (!scene->mesh_instances.buffer[instance_id].active)
+    {
+        return;
+    }
+
+    scene->mesh_instances.buffer[instance_id].active = false;
+    scene->tlas_dirty = true;
+}
+
+void scene_set_mesh_instance_transform(scene_t* scene, uint32_t instance_id, mat4s local_to_world)
+{
+    if (scene == NULL || instance_id >= scene->mesh_instances.capacity)
+    {
+        return;
+    }
+
+    mesh_instance_t* inst = &scene->mesh_instances.buffer[instance_id];
+    if (!inst->active)
+    {
+        return;
+    }
+
+    inst->local_to_world = local_to_world;
+    inst->world_to_local = glms_mat4_inv(local_to_world);
+    inst->normal_matrix = compute_normal_matrix(local_to_world);
+    if (inst->model_id < scene->mesh_models.capacity && scene->mesh_models.buffer[inst->model_id].active)
+    {
+        inst->world_bounds = aabb_transform(local_to_world, scene->mesh_models.buffer[inst->model_id].local_bounds);
+    }
+
+    scene->tlas_dirty = true;
+}
--- a/native/source/Rendering/Texture.c
+++ b/native/source/Rendering/Texture.c
@@ -0,0 +1,507 @@
+#include "Rendering/Texture.h"
+#include "Common/String.h"
+
+#define STB_IMAGE_IMPLEMENTATION
+#include "stb_image.h"
+
+#define GET_CHANNEL_DATA(pixel, channel, channel_count, default, max) (channel < channel_count ? pixel[channel] : default) / max
+
+bool texture_collection_init(uint16_t size, texture_collection_t* textures)
+{
+    texture_collection_t temp = {0};
+    temp.buffer = (texture_asset_t*)malloc(size * sizeof(texture_asset_t));
+    if (temp.buffer == NULL)
+    {
+        return false;
+    }
+
+    temp.size = size;
+    temp.count = 0;
+    *textures = temp;
+
+    return true;
+}
+
+void texture_collection_resize(texture_collection_t* textures, uint16_t size)
+{
+    if (size == INVALID_TEXTURE_ID)
+    {
+        size = INVALID_TEXTURE_ID - 1;
+    }
+
+    if (size == textures->size)
+    {
+        return;
+    }
+
+    texture_asset_t* temp = (texture_asset_t*)realloc(textures->buffer, size * sizeof(texture_asset_t));
+    if (temp != NULL)
+    {
+        textures->buffer = temp;
+        textures->size = size;
+    }
+}
+
+void texture_collection_free(texture_collection_t* textures)
+{
+    if (textures == NULL)
+    {
+        return;
+    }
+
+    for (uint16_t i = 0; i < textures->count; i++)
+    {
+        texture_free(&textures->buffer[i].texture);
+
+        char* full_name = textures->buffer[i].full_name;
+        if (full_name != NULL)
+        {
+            free(full_name);
+        }
+    }
+
+    free(textures->buffer);
+    textures->buffer = NULL;
+}
+
+
+static inline void read_pixel_raw(const char* data, uint32_t x, uint32_t y, uint32_t width, uint8_t channel_count, stride_t stride, char* out_pixel_data)
+{
+    size_t pixel_offset = (size_t)(y * width + x) * channel_count * stride;
+    memcpy(out_pixel_data, data + pixel_offset, (size_t)channel_count * stride);
+}
+
+static inline void write_pixel_raw(char* data, uint32_t x, uint32_t y, uint32_t width, uint8_t channel_count, stride_t stride, const char* in_pixel_data)
+{
+     size_t pixel_offset = (size_t)(y * width + x) * channel_count * stride;
+     memcpy(data + pixel_offset, in_pixel_data, (size_t)channel_count * stride);
+}
+
+static void average_pixels_box(const char* current_data, uint32_t current_width, uint32_t current_height,
+                               uint32_t src_x, uint32_t src_y, uint8_t channel_count, stride_t stride, char* out_averaged_pixel) {
+
+    size_t pixel_byte_size = (size_t)channel_count * stride;
+    float pixel_count = 0.0f;
+
+#if defined(__clang__) || defined(__GNUC__)
+    char pixel_data[pixel_byte_size]; // Buffer to read individual pixel data
+
+    // Use a float buffer to accumulate the sum for each channel
+    // This allows us to sum values from different strides by converting them to float
+    float sum_float[channel_count];
+    memset(sum_float, 0, sizeof(float) * channel_count);
+#else
+    char* pixel_data = (char*)malloc(pixel_byte_size); // Buffer to read individual pixel data
+    if (pixel_data == NULL)
+    {
+        return;
+    }
+    
+    float* sum_float = (float*)calloc(channel_count, sizeof(float));
+    if (sum_float == NULL)
+    {
+        free(pixel_data);
+        return;
+    }
+#endif
+
+    // Loop through the 2x2 block in the current level
+    for (int dy = 0; dy < 2; ++dy)
+    {
+        for (int dx = 0; dx < 2; ++dx)
+        {
+            uint32_t current_x = src_x + dx;
+            uint32_t current_y = src_y + dy;
+
+            // Check if the pixel is within the bounds of the current level
+            if (current_x < current_width && current_y < current_height) {
+                // Read the raw pixel data
+                read_pixel_raw(current_data, current_x, current_y, current_width, channel_count, stride, pixel_data);
+
+                // Sum the pixel data channel by channel, converting to float for summation
+                for (uint8_t c = 0; c < channel_count; c++)
+                {
+                    switch (stride)
+                    {
+                        case UINT_8:
+                            sum_float[c] += (float)(((uint8_t*)pixel_data)[c]);
+                            break;
+                        case UINT_16:
+                            sum_float[c] += (float)(((uint16_t*)pixel_data)[c]);
+                            break;
+                        case FLOAT_32:
+                            sum_float[c] += ((float*)pixel_data)[c];
+                            break;
+                        default:
+                            break;
+                    }
+                }
+                pixel_count += 1.0f;
+            }
+        }
+    }
+
+    // Divide the sum by the pixel count to get the average for each channel
+    if (pixel_count > 0.0f)
+    {
+        // Convert the averaged float values back to the original stride type and write to the output buffer
+        for (uint8_t c = 0; c < channel_count; c++) {
+            float average_value = sum_float[c] / pixel_count;
+
+            switch (stride)
+            {
+                case UINT_8:
+                    ((uint8_t*)out_averaged_pixel)[c] = (uint8_t)glm_clamp(average_value, 0.0f, 255.0f);
+                    break;
+                case UINT_16:
+                    ((uint16_t*)out_averaged_pixel)[c] = (uint16_t)glm_clamp(average_value, 0.0f, 65535.0f);
+                    break;
+                case FLOAT_32:
+                    ((float*)out_averaged_pixel)[c] = average_value;
+                    break;
+                default:
+                    break;
+            }
+        }
+    }
+    else
+    {
+        // This case should ideally not happen if current_width or current_height > 1,
+        // but as a safeguard, zero out the output buffer.
+        memset(out_averaged_pixel, 0, pixel_byte_size);
+    }
+
+#if !defined(__clang__) && !defined(__GNUC__)
+    free(pixel_data);
+    free(sum_float);
+#endif
+}
+
+static void generate_mipmap(char* raw_data, mipmap_t* texture_data, uint32_t width, uint32_t height, uint8_t channel_count, uint8_t max_level, stride_t stride)
+{
+    // Store the base level (Level 0)
+    texture_data[0] = (mipmap_t)
+    {
+        .width = width,
+        .height = height,
+        .data = raw_data,
+    };
+
+    char* current_data = raw_data;
+    uint32_t current_width = width;
+    uint32_t current_height = height;
+    int level = 1;
+
+    uint32_t pixel_byte_size = channel_count * stride;
+#if defined(__clang__) || defined(__GNUC__)
+    char averaged_pixel_buffer[pixel_byte_size];
+#else
+    char* averaged_pixel_buffer = (char*)malloc(pixel_byte_size);
+    if (averaged_pixel_buffer == NULL)
+    {
+        return;
+    }
+#endif
+
+    // Continue generating levels as long as at least one dimension is greater than 1
+    while ((current_width > 1 || current_height > 1) && level <= max_level)
+    {
+        uint32_t next_width = max(1u, current_width / 2);
+        uint32_t next_height = max(1u, current_height / 2);
+
+        size_t next_level_size = (size_t)next_width * next_height * channel_count * stride;
+        char* next_data = (char*)malloc(next_level_size);
+        if (next_data == NULL)
+        {
+            break;
+        }
+
+        // Iterate through each pixel in the NEXT mipmap level
+        for (uint32_t y = 0; y < next_height; ++y)
+        {
+            for (uint32_t x = 0; x < next_width; ++x)
+            {
+                // Calculate the starting coordinates (top-left corner) of the 2x2 block in the CURRENT level
+                uint32_t src_x = x * 2;
+                uint32_t src_y = y * 2;
+
+                // Average the pixels in the 2x2 block from the CURRENT level using Box filter
+                average_pixels_box(current_data, current_width, current_height,
+                                   src_x, src_y, channel_count, stride, averaged_pixel_buffer);
+
+                // Write the averaged pixel value to the corresponding location in the NEXT level
+                write_pixel_raw(next_data, x, y, next_width, channel_count, stride, averaged_pixel_buffer);
+            }
+        }
+
+        texture_data[level] = (mipmap_t)
+        {
+            .width = next_width,
+            .height = next_height,
+            .data = next_data,
+        };
+
+        // Update for the next iteration
+        current_data = next_data;
+        current_width = next_width;
+        current_height = next_height;
+        level++;
+    }
+
+#if !defined(__clang__) && !defined(__GNUC__)
+    free(averaged_pixel_buffer);
+#endif
+}
+
+texture_handle_t texture_load(const char* filename, bool srgb, bool mipmap, stride_t stride, texture_collection_t* textures)
+{
+    // TODO: This hurts performance, consider using a hash map or similar structure for faster lookups
+
+    // for (uint16_t i = 0; i < textures->count; i++)
+    // {
+    //     if (strcmp(textures->buffer[i].full_name, filename) == 0)
+    //     {
+    //         return (texture_entity_t){.id = i};
+    //     }
+    // }
+
+    int width, height, channels;
+    char* raw_data = NULL;
+
+    switch (stride)
+    {
+        case UINT_8:
+            raw_data = (char*)stbi_load(filename, &width, &height, &channels, 0);
+            break;
+
+        case UINT_16:
+            raw_data = (char*)stbi_load_16(filename, &width, &height, &channels, 0);
+            break;
+        case FLOAT_32:
+            raw_data = (char*)stbi_loadf(filename, &width, &height, &channels, 0);
+            break;
+    }
+
+    if (raw_data == NULL)
+    {
+        return invalid_texture_handle();
+    }
+
+    uint8_t max_mip_level = mipmap ? (uint8_t)log2f(fmaxf((float)width, (float)height)) : 0;
+    mipmap_t* temp_texture_data = (mipmap_t*)calloc((size_t)max_mip_level + 1, sizeof(mipmap_t));
+    if (temp_texture_data == NULL)
+    {
+        stbi_image_free(raw_data);
+        return invalid_texture_handle();
+    }
+
+    generate_mipmap(raw_data, temp_texture_data, (uint32_t)width, (uint32_t)height, (uint8_t)channels, max_mip_level, stride);
+
+    texture_t texture = {0};
+
+    texture.texel_size = (vec2s){1.0f / (float)width, 1.0f / (float)height};
+    texture.width = (uint32_t)width;
+    texture.height = (uint32_t)height;
+
+    texture.channel_count = (uint8_t)channels;
+    texture.max_mip = max_mip_level;
+    texture.stride = stride;
+    texture.data = temp_texture_data;
+
+    texture.wrap_mode = WM_REPEAT;
+    texture.filter_mode = FM_LINEAR;
+
+    if (textures->count >= textures->size)
+    {
+        texture_collection_resize(textures, textures->size * 2);
+    }
+
+    texture_handle_t entity = {.id = textures->count};
+
+    textures->buffer[textures->count] = (texture_asset_t){.full_name = string_copy(filename), .texture = texture};
+    textures->count++;
+
+    return entity;
+}
+
+static inline void warp_uv(wrap_mode_t mode, vec2s* uv)
+{
+    switch (mode)
+    {
+        case WM_REPEAT:
+            uv->x = fmodf(fabsf(uv->x), 1.0f);
+            uv->y = fmodf(fabsf(uv->y), 1.0f);
+            break;
+        case WM_CLAMP:
+            *uv = glms_vec2_clamp(*uv, 0.0f, 1.0f);
+            break;
+    }
+}
+
+static vec4s get_pixel_data_from_buffer(const char* data, uint32_t x, uint32_t y, uint32_t width, uint8_t channel_count, stride_t stride)
+{
+    size_t pixel_start_offset = (size_t)(y * width + x) * channel_count * stride;
+
+    vec4s out = {0.0f, 0.0f, 0.0f, 1.0f};
+
+    for (int c = 0; c < channel_count && c < 4; c++)
+    {
+        float value = 0.0f;
+        size_t channel_offset = pixel_start_offset + (size_t)c * stride;
+
+        if (channel_offset >= (size_t)y * width * channel_count * stride + (size_t)width * channel_count * stride)
+        {
+            continue;
+        }
+
+        switch (stride)
+        {
+            case UINT_8:
+                value = (float)(((uint8_t*)data)[channel_offset]) / 255.0f;
+                break;
+            case UINT_16:
+                value = (float)(*((uint16_t*)(data + channel_offset))) / 65535.0f;
+                break;
+            case FLOAT_32:
+                value = *((float*)(data + channel_offset));
+                break;
+            default:
+                value = (c == 3) ? 1.0f : 0.0f;
+                break;
+        }
+        out.raw[c] = value;
+    }
+
+    return out;
+}
+
+vec4s texture_get_pixel(const texture_t* texture, vec2s uv, uint8_t lod)
+{
+    uint8_t mip_level = (uint8_t)glm_clamp(lod, 0, texture->max_mip);
+    const mipmap_t* mipmap = &texture->data[mip_level];
+    if (mipmap->data == NULL)
+    {
+        return (vec4s){0.0f, 0.0f, 0.0f, 1.0f};
+    }
+
+    uint32_t x = (uint32_t)floorf(uv.x * (mipmap->width - 1));
+    uint32_t y = (uint32_t)floorf(uv.y * (mipmap->height - 1));
+
+    x = x < mipmap->width ? x : mipmap->width - 1;
+    y = y < mipmap->height ? y : mipmap->height - 1;
+
+    return get_pixel_data_from_buffer(mipmap->data, x, y, mipmap->width, texture->channel_count, texture->stride);
+}
+
+// Calculate LOD based on Ray Cones
+float texture_get_sample_lod(const texture_t* texture, const texture_sample_context_t* sample_context)
+{
+    // 1. Calculate the ray footprint on the surface
+    // If we hit the surface at an angle, the footprint elongates.
+    float cos_theta = fabsf(glms_vec3_dot(sample_context->normal, sample_context->view_direction));
+    float surface_width = sample_context->ray_width / fmaxf(cos_theta, 0.001f); // Project width onto surface
+
+    // 2. Estimate UV density (How much UV changes per meter of surface)
+    // This is an approximation. A more accurate way uses Triangle derivatives (Ray Differentials).
+    // For a triangle, we can approximate the scale:
+    float edge1_len = glms_vec3_norm(sample_context->edge1);
+    float edge2_len = glms_vec3_norm(sample_context->edge2);
+    float uv_area = fabsf((sample_context->uv1.x * sample_context->uv2.y) - (sample_context->uv1.y * sample_context->uv2.x)); // Approximation of UV area
+    float geo_area = glms_vec3_norm(glms_vec3_cross(sample_context->edge1, sample_context->edge2));
+    
+    // Ratio of Texture Area to Geometric Area
+    float uv_density = sqrtf(uv_area / geo_area);
+
+    // 3. Calculate texture footprint
+    // How many texels does our ray cover?
+    float texels_covered = surface_width * uv_density * fmaxf((float)texture->width, (float)texture->height);
+
+    // 4. Convert to LOD
+    // LOD 0 = 1 texel. LOD 1 = 2 texels. LOD 2 = 4 texels.
+    // log2(texels_covered) gives the mip level.
+    return log2f(texels_covered) * 0.5f;
+}
+
+static vec4s nearest_filter(const texture_t* texture, vec2s uv, uint8_t lod)
+{
+    return texture_get_pixel(texture, uv, lod);
+}
+
+static vec4s linear_filter(const texture_t* texture, vec2s uv, uint8_t lod)
+{
+    uint8_t mip_level = (uint8_t)glm_clamp((float)lod, 0.0f, (float)texture->max_mip);
+    const mipmap_t* mipmap = &texture->data[mip_level];
+
+    if (mipmap->data == NULL)
+    {
+        return (vec4s){0.0f, 0.0f, 0.0f, 1.0f};
+    }
+
+    float x = uv.x * (float)(mipmap->width - 1);
+    float y = uv.y * (float)(mipmap->height - 1);
+
+    uint32_t x0 = (uint32_t)floorf(x);
+    uint32_t y0 = (uint32_t)floorf(y);
+
+    uint32_t x1 = x0 + 1;
+    uint32_t y1 = y0 + 1;
+
+    float sx = x - (float)x0;
+    float sy = y - (float)y0;
+
+    x0 = (uint32_t)glm_clamp((float)x0, 0.0f, (float)mipmap->width - 1.0f);
+    x1 = (uint32_t)glm_clamp((float)x1, 0.0f, (float)mipmap->width - 1.0f);
+    y0 = (uint32_t)glm_clamp((float)y0, 0.0f, (float)mipmap->height - 1.0f);
+    y1 = (uint32_t)glm_clamp((float)y1, 0.0f, (float)mipmap->height - 1.0f);
+
+    // Get the pixel values for the four corners of the 2x2 block
+    vec4s c00 = get_pixel_data_from_buffer(mipmap->data, x0, y0, mipmap->width, texture->channel_count, texture->stride);
+    vec4s c10 = get_pixel_data_from_buffer(mipmap->data, x1, y0, mipmap->width, texture->channel_count, texture->stride);
+    vec4s c01 = get_pixel_data_from_buffer(mipmap->data, x0, y1, mipmap->width, texture->channel_count, texture->stride);
+    vec4s c11 = get_pixel_data_from_buffer(mipmap->data, x1, y1, mipmap->width, texture->channel_count, texture->stride);
+
+    vec4s c0 = glms_vec4_lerp(c00, c10, sx);   // Interpolate along x for the top row
+    vec4s c1 = glms_vec4_lerp(c01, c11, sx);   // Interpolate along x for the bottom row
+    vec4s result = glms_vec4_lerp(c0, c1, sy); // Interpolate along y
+
+    return result;
+}
+
+static inline vec4s filter_texture(const texture_t* texture, vec2s uv, float lod)
+{
+    switch (texture->filter_mode)
+    {
+        case FM_NEAREST:
+            return nearest_filter(texture, uv, (uint8_t)lod);
+        case FM_LINEAR:
+            return linear_filter(texture, uv, (uint8_t)lod);
+        default:
+            return (vec4s){0.0f, 0.0f, 0.0f, 1.0f};
+    }
+}
+
+vec4s texture_sample(const texture_t* texture, const texture_sample_context_t* sample_context, vec2s uv)
+{
+    warp_uv(texture->wrap_mode, &uv);
+    float lod = texture_get_sample_lod(texture, sample_context);
+    return filter_texture(texture, uv, lod);
+}
+
+vec4s texture_sample_lod(const texture_t* texture, vec2s uv, float lod)
+{
+    lod = glm_clamp(lod, 0.0f, texture->max_mip);
+    warp_uv(texture->wrap_mode, &uv);
+    return filter_texture(texture, uv, lod);
+}
+
+void texture_free(texture_t* texture)
+{
+    if (texture != NULL && texture->data != NULL)
+    {
+        stbi_image_free(texture->data[0].data);
+        for (uint8_t i = 1; i <= texture->max_mip; i++)
+        {
+            free(texture->data[i].data);
+        }
+    }
+}