#include "Material/StandardLit.h"
#include "Algorithm/BSDF.h"
#include "Lighting/LightEvaluation.h"

// Trowbridge-Reitz GGX Normal Distribution Function
static float ggx_distribution(float n_dot_h, float roughness)
{
    float a = roughness * roughness;
    float a2 = a * a;
    float n_dot_h2 = n_dot_h * n_dot_h;

    float nom = a2;
    float denom = (n_dot_h2 * (a2 - 1.0f) + 1.0f);
    denom = PI * denom * denom;

    return nom / fmaxf(denom, 0.0001f); // Prevent divide by zero
}

static float ggx_g1(float n_dot_v, float roughness)
{
    if (n_dot_v <= 0.0f)
    {
        return 0.0f;
    }

    // Our GGX D() uses alpha = roughness^2, so keep the same convention here.
    float alpha = fmaxf(roughness * roughness, 1e-4f);
    float alpha2 = alpha * alpha;

    float n2 = n_dot_v * n_dot_v;
    float denom = n_dot_v + sqrtf(alpha2 + (1.0f - alpha2) * n2);
    return (2.0f * n_dot_v) / fmaxf(denom, 1e-6f);
}

static inline float ggx_g_smith(float n_dot_v, float n_dot_l, float roughness)
{
    return ggx_g1(n_dot_v, roughness) * ggx_g1(n_dot_l, roughness);
}

// GGX VNDF sampling (Heitz 2018) for isotropic GGX.
// Returns a Half-Vector (H) sampled from the distribution of visible normals.
static vec3s sample_ggx_vndf(vec3s n, vec3s v, float roughness, float u1, float u2)
{
    // Build local frame around n.
    vec3s tangent, bitangent;
    create_orthonormal_basis(n, &tangent, &bitangent);

    // View direction in local coordinates.
    vec3s v_local = (vec3s){glms_vec3_dot(v, tangent), glms_vec3_dot(v, bitangent), glms_vec3_dot(v, n)};
    v_local = glms_vec3_normalize(v_local);

    // Stretch view.
    float alpha = fmaxf(roughness * roughness, 1e-4f);
    vec3s v_h = (vec3s){alpha * v_local.x, alpha * v_local.y, v_local.z};
    v_h = glms_vec3_normalize(v_h);

    // Orthonormal basis around v_h.
    vec3s t1;
    if (v_h.z < 0.9999f)
    {
        t1 = glms_vec3_normalize(glms_vec3_cross((vec3s){0.0f, 0.0f, 1.0f}, v_h));
    }
    else
    {
        t1 = (vec3s){1.0f, 0.0f, 0.0f};
    }
    vec3s t2 = glms_vec3_cross(v_h, t1);

    // Sample a point on a disk.
    float r = sqrtf(u1);
    float phi = TWO_PI * u2;
    float t1p = r * cosf(phi);
    float t2p = r * sinf(phi);

    // Warp to the hemisphere.
    float s = 0.5f * (1.0f + v_h.z);
    t2p = (1.0f - s) * sqrtf(fmaxf(0.0f, 1.0f - t1p * t1p)) + s * t2p;

    vec3s n_h = glms_vec3_add(
        glms_vec3_add(glms_vec3_scale(t1, t1p), glms_vec3_scale(t2, t2p)),
        glms_vec3_scale(v_h, sqrtf(fmaxf(0.0f, 1.0f - t1p * t1p - t2p * t2p))));

    // Unstretch.
    vec3s h_local = (vec3s){alpha * n_h.x, alpha * n_h.y, fmaxf(0.0f, n_h.z)};
    h_local = glms_vec3_normalize(h_local);

    // Back to world.
    vec3s h_world = glms_vec3_add(
        glms_vec3_add(glms_vec3_scale(tangent, h_local.x), glms_vec3_scale(bitangent, h_local.y)),
        glms_vec3_scale(n, h_local.z));

    return glms_vec3_normalize(h_world);
}

static float oren_nayar_eval(vec3s l, vec3s v, vec3s n, float roughness, float n_dot_l, float n_dot_v)
{
    // Full Qualitative Oren-Nayar
    float sigma2 = roughness * roughness;
    float A = 1.0f - (sigma2 / (2.0f * (sigma2 + 0.33f)));
    float B = 0.45f * sigma2 / (sigma2 + 0.09f);

    // Cosine of azimuth difference (phi)
    // Project V and L onto the tangent plane
    vec3s v_plane = glms_vec3_normalize(glms_vec3_sub(v, glms_vec3_scale(n, n_dot_v)));
    vec3s l_plane = glms_vec3_normalize(glms_vec3_sub(l, glms_vec3_scale(n, n_dot_l)));
    float cos_phi_diff = fmaxf(0.0f, glms_vec3_dot(v_plane, l_plane));

    // Sine and Tangent terms
    // alpha = max(theta_l, theta_v), beta = min(theta_l, theta_v)
    // We use sin/tan relationships: sin(x) = sqrt(1-cos^2(x))
    float sin_theta_l = sqrtf(fmaxf(0.0f, 1.0f - n_dot_l * n_dot_l));
    float sin_theta_v = sqrtf(fmaxf(0.0f, 1.0f - n_dot_v * n_dot_v));

    float sin_alpha, tan_beta;
    if (n_dot_v > n_dot_l)
    {
        sin_alpha = sin_theta_l;
        tan_beta = sin_theta_v / fmaxf(n_dot_v, 0.0001f);
    }
    else
    {
        sin_alpha = sin_theta_v;
        tan_beta = sin_theta_l / fmaxf(n_dot_l, 0.0001f);
    }

    return (A + B * cos_phi_diff * sin_alpha * tan_beta) * INV_PI;
}

static void get_surface_data(const shading_context_t* context, const standard_lit_properties_t* properties, standard_lit_surface_data_t* data_out)
{
    // Use the ray cone width (footprint) instead of simple distance for mip selection
    float footprint = context->cone_width;
    float distance = glms_vec3_distance(context->camera_position, context->position);
    vec3s view = context->camera_direction;

    // Fetch geometry data for LOD calculation
    const triangle_t* triangle = &context->triangles->buffer[context->triangle_id];
    texture_sample_context_t sample_context = 
    {
        .view_direction = view,
        .normal = context->normal,
        .edge1 = glms_vec3_sub(triangle->vertices[1].position, triangle->vertices[0].position),
        .edge2 = glms_vec3_sub(triangle->vertices[2].position, triangle->vertices[0].position),
        .uv1 = glms_vec2_sub(triangle->vertices[1].uv, triangle->vertices[0].uv),
        .uv2 = glms_vec2_sub(triangle->vertices[2].uv, triangle->vertices[0].uv),
        .ray_width = footprint,
        .distance = distance
    };

    data_out->albedo = properties->albedo;
    const texture_t* albedo_texture = get_texture(context->textures, properties->albedo_texture);
    if (albedo_texture != NULL && albedo_texture->data != NULL)
    {
        data_out->albedo = glms_vec3_mul(data_out->albedo, glms_vec3(texture_sample(albedo_texture, &sample_context, context->uv)));
    }

    data_out->normal = context->normal;
    const texture_t* normal_texture = get_texture(context->textures, properties->normal_texture);
    if (normal_texture != NULL && normal_texture->data != NULL)
    {
        vec3s normal_sample = glms_vec3(texture_sample(normal_texture, &sample_context, context->uv));
        normal_sample = normal_unpack(normal_sample);
        data_out->normal = normal_ts_to_ws(normal_sample, context->normal, context->tangent);
    }

    data_out->diffuse_roughness = properties->diffuse_roughness;

    data_out->roughness = properties->roughness;
    const texture_t* roughness_texture = get_texture(context->textures, properties->roughness_texture);
    if (roughness_texture != NULL && roughness_texture->data != NULL)
    {
        data_out->roughness = data_out->roughness * texture_sample(roughness_texture, &sample_context, context->uv).x;
    }

    data_out->metallic = properties->metallic;
    const texture_t* metallic_texture = get_texture(context->textures, properties->metallic_texture);
    if (metallic_texture != NULL && metallic_texture->data != NULL)
    {
        data_out->metallic = data_out->metallic * texture_sample(metallic_texture, &sample_context, context->uv).x;
    }
}

static vec3s evaluate_bsdf_standard_lit(const shading_context_t* context, standard_lit_surface_data_t* surface_data, vec3s wi)
{
    vec3s n = surface_data->normal;
    vec3s v = glms_vec3_negate(context->wo);
    vec3s l = wi;

    float n_dot_l = fmaxf(glms_vec3_dot(n, l), 0.0f);
    float n_dot_v = fmaxf(glms_vec3_dot(n, v), 0.0f);
    if (n_dot_l <= 0.0f || n_dot_v <= 0.0f || glms_vec3_dot(context->normal, l) <= 0.0f)
    {
        return glms_vec3_zero();
    }
    
    vec3s h = glms_vec3_normalize(glms_vec3_add(v, l));
    float n_dot_h = fmaxf(glms_vec3_dot(n, h), 0.0f);
    float v_dot_h = fmaxf(glms_vec3_dot(v, h), 0.0f);

    // Fresnel
    vec3s f0 = glms_vec3_lerp(DIELECTRIC_REFLECTIVE, surface_data->albedo, surface_data->metallic);
    vec3s F = fresnel_schlick_vec3(f0, v_dot_h);

    // Specular (GGX)
    float D = ggx_distribution(n_dot_h, surface_data->roughness);
    float G = ggx_g_smith(n_dot_v, n_dot_l, surface_data->roughness);
    vec3s spec = glms_vec3_scale(glms_vec3_mul(F, (vec3s){D * G, D * G, D * G}), 1.0f / fmaxf(4.0f * n_dot_v * n_dot_l, 0.0001f));

    // Diffuse (Oren-Nayar)
    vec3s kD = glms_vec3_scale(glms_vec3_sub(glms_vec3_one(), F), 1.0f - surface_data->metallic);
    float on_val = oren_nayar_eval(l, v, n, surface_data->diffuse_roughness, n_dot_l, n_dot_v);
    vec3s diff = glms_vec3_scale(glms_vec3_mul(surface_data->albedo, kD), on_val);

    return glms_vec3_add(diff, spec);
}

static float sample_bsdf_pdf(const standard_lit_surface_data_t* surface_data, vec3s V, vec3s L)
{
    float n_dot_l = fmaxf(glms_vec3_dot(surface_data->normal, L), 0.0f);
    if (n_dot_l <= 0.0f)
    {
        return 0.0f;
    }

    // Lobe Probabilities
    vec3s f0 = glms_vec3_lerp((vec3s){0.04f, 0.04f, 0.04f}, surface_data->albedo, surface_data->metallic);
    float n_dot_v = fmaxf(glms_vec3_dot(surface_data->normal, V), 0.0001f);
    vec3s F_est = fresnel_schlick_vec3(f0, n_dot_v);

    // Use luminance-based lobe selection (more stable than max(F)).
    float lum_f = (F_est.x + F_est.y + F_est.z) / 3.0f;
    float prob_spec = glm_lerp(lum_f, 1.0f, surface_data->metallic);
    float prob_diff = (1.0f - surface_data->metallic) * (1.0f - lum_f);
    float sum_prob = prob_spec + prob_diff;
    if (sum_prob < FLT_EPSILON)
    {
        return 0.0f;
    }

    prob_spec /= sum_prob;
    prob_diff /= sum_prob;

    // Specular PDF (GGX VNDF reflection)
    // p_h(h) = D(h) * G1(v) * (N·H)/(N·V)
    // p_w(wi) = p_h(h) / (4 * (V·H))
    vec3s H = glms_vec3_normalize(glms_vec3_add(L, V));
    float v_dot_h = glms_vec3_dot(V, H);
    if (v_dot_h <= 1e-6f)
    {
        return 0.0f;
    }
    float n_dot_h = fmaxf(glms_vec3_dot(surface_data->normal, H), 0.0f);
    float D = ggx_distribution(n_dot_h, surface_data->roughness);
    float G1v = ggx_g1(n_dot_v, surface_data->roughness);
    
    // v_dot_h has been cancelled out in the derivation.
    float pdf_h = (D * G1v) / fmaxf(n_dot_v, 1e-6f); // (D * G1v * v_dot_h) / fmaxf(n_dot_v, 1e-6f)
    float pdf_spec = pdf_h / (4.0f); // pdf_h / (4.0f * v_dot_h)

    // Diffuse PDF (Cosine Weighted)
    float pdf_diff = n_dot_l * INV_PI;

    return prob_spec * pdf_spec + prob_diff * pdf_diff;
}

path_output standard_lit_render_loop(const standard_lit_properties_t* properties, const shading_context_t* context)
{
    standard_lit_surface_data_t surface_data; // Assuming you reuse your struct
    get_surface_data(context, properties, &surface_data);

    // Keep shading normal in the same hemisphere as the geometric normal to avoid invalid transport.
    if (glms_vec3_dot(surface_data.normal, context->normal) < 0.0f)
    {
        surface_data.normal = glms_vec3_negate(surface_data.normal);
    }

    path_output output = {0};
    vec3s V = glms_vec3_negate(context->wo);
    // Keep shading normal in the same hemisphere as the geometric normal.
    if (glms_vec3_dot(surface_data.normal, context->normal) < 0.0f)
    {
        surface_data.normal = glms_vec3_negate(surface_data.normal);
    }

    // If shading normal faces away from the view, fall back to geometric normal (prevents VNDF/pdf blow-ups).
    if (glms_vec3_dot(surface_data.normal, V) <= 0.0f)
    {
        surface_data.normal = context->normal;
    }

    float n_dot_v = fmaxf(glms_vec3_dot(surface_data.normal, V), 0.0001f);

    light_shading_context_t light_context = {
        .position = context->position,
        .normal = surface_data.normal,
        .geometric_normal = context->normal,
        .tangent = context->tangent,
        .uv = context->uv,
        .wo = context->wo,

        .bounce_depth = context->bounce_depth,

        .bvh_tree = context->bvh_tree,
        .textures = context->textures,
    };

    uint32_t scramble = hash_position(context->position);

    // Running the light loop.
    // TODO: Implementing other light types.
    for (uint32_t i = 0; i < context->lights->directional_light_count; i++)
    {
        path_output light_output = evaluate_bsdf_directional(context->lights->directional_lights[i], &light_context, context->throughput, context->sample_index);
        if (light_output.state == PS_SUCCESS)
        {
            vec3s bsdf_dir_light = evaluate_bsdf_standard_lit(context, &surface_data, light_output.wi);
            float pdf_bsdf = sample_bsdf_pdf(&surface_data, V, light_output.wi);
            vec3s light_contribute = weight_nee_light(bsdf_dir_light, light_output.direct_lighting, pdf_bsdf, light_output.pdf);
            output.direct_lighting = glms_vec3_add(output.direct_lighting, light_contribute);
        }
    }

    // Sky
    path_output sky_output = evaluate_bsdf_sky(context->lights, &light_context, context->throughput, context->sample_index);
    if (sky_output.state == PS_SUCCESS)
    {
        vec3s bsdf_sky_light = evaluate_bsdf_standard_lit(context, &surface_data, sky_output.wi);
        float pdf_bsdf = sample_bsdf_pdf(&surface_data, V, sky_output.wi);
        vec3s sky_light = weight_nee_light(bsdf_sky_light, sky_output.direct_lighting, pdf_bsdf, sky_output.pdf);
        output.direct_lighting = glms_vec3_add(output.direct_lighting, sky_light);
    }

    // ----------------------------------------------------
    // Indirect Lighting (Sampling Next Ray)
    // ----------------------------------------------------

    // 1. Choose Lobe (Diffuse or Specular)
    vec3s f0 = glms_vec3_lerp(DIELECTRIC_F0, surface_data.albedo, surface_data.metallic);
    vec3s F_est = fresnel_schlick_vec3(f0, n_dot_v);

    // Use luminance-based lobe selection (more stable than max(F)).
    float lum_f = (F_est.x + F_est.y + F_est.z) / 3.0f;
    float prob_spec = glm_lerp(lum_f, 1.0f, surface_data.metallic);
    float prob_diff = (1.0f - surface_data.metallic) * (1.0f - lum_f);

    // Normalize probabilities
    float sum_probs = prob_spec + prob_diff;
    if (sum_probs < FLT_EPSILON)
    {
        output.state = PS_TERMINATE;
        return output;
    }

    prob_spec /= sum_probs;
    prob_diff /= sum_probs;

    float pdf_gen = 0.0f;
    float r_lobe = sobol_sample_scrambled(context->sample_index, sobol_get_dimension(context->bounce_depth, PRNG_BSDF), scramble);
    bool is_specular = (r_lobe < prob_spec);

    if (is_specular)
    {
        // Sample GGX
        uint32_t d1 = sobol_get_dimension(context->bounce_depth, PRNG_BSDF_U);
        uint32_t d2 = sobol_get_dimension(context->bounce_depth, PRNG_BSDF_V);

        float u1 = sobol_sample_scrambled(context->sample_index, d1, scramble);
        float u2 = sobol_sample_scrambled(context->sample_index, d2, scramble);

        vec3s H = sample_ggx_vndf(surface_data.normal, V, surface_data.roughness, u1, u2);
        output.wi = glms_vec3_reflect(context->wo, H); // reflect(-V, H) -> V is wo inverted

        if (glms_vec3_dot(output.wi, surface_data.normal) <= 0.0f || glms_vec3_dot(output.wi, context->normal) <= 0.0f)
        {
            output.state = PS_TERMINATE;
            return output;
        }

        // Recalculate dots
        float n_dot_l = fmaxf(glms_vec3_dot(surface_data.normal, output.wi), 0.0001f);
        vec3s H_new = glms_vec3_normalize(glms_vec3_add(output.wi, V));
        float n_dot_h = fmaxf(glms_vec3_dot(surface_data.normal, H_new), 0.0001f);
        float v_dot_h = fmaxf(glms_vec3_dot(V, H_new), 0.0001f);

        // Evaluate PDF (GGX VNDF reflection)
        // p_w(wi) = (D(h) * G1(v) * (V·H)/(N·V)) / (4 * (V·H))
        float D = ggx_distribution(n_dot_h, surface_data.roughness);
        float G1v = ggx_g1(n_dot_v, surface_data.roughness);

        // v_dot_h has been cancelled out in the derivation.
        float pdf_h = (D * G1v) / fmaxf(n_dot_v, 1e-6f); // (D * G1v * v_dot_h) / fmaxf(n_dot_v, 1e-6f)
        float pdf_spec_dir = pdf_h / (4.0f); // pdf_h / (4.0f * v_dot_h)
        pdf_gen = pdf_spec_dir * prob_spec;
        if (pdf_gen < 1e-12f)
        {
            output.state = PS_TERMINATE;
            return output;
        }

        // Evaluate BSDF
        float G = ggx_g_smith(n_dot_v, n_dot_l, surface_data.roughness);
        vec3s f0 = glms_vec3_lerp(DIELECTRIC_F0, surface_data.albedo, surface_data.metallic);
        vec3s F = fresnel_schlick_vec3(f0, v_dot_h);

        vec3s spec_f = glms_vec3_scale(glms_vec3_mul(F, (vec3s){D * G, D * G, D * G}),
                                       1.0f / fmaxf(4.0f * n_dot_v * n_dot_l, 1e-6f));

        // Throughput multiplier must be: (f * NoL) / pdf_total
        output.bsdf = glms_vec3_scale(spec_f, n_dot_l / pdf_gen);

        // Propagate spread angle for ray cones
        // Heuristic: spread increases with roughness
        output.spread_angle = context->spread_angle + surface_data.roughness * 0.2f;
    }
    else
    {
        // Sample Diffuse (Cosine Weighted)
        // Note: We use Cosine sampling for Oren-Nayar too, it's a "good enough" approximation for the PDF.
        uint32_t d1 = sobol_get_dimension(context->bounce_depth, PRNG_BSDF_U);
        uint32_t d2 = sobol_get_dimension(context->bounce_depth, PRNG_BSDF_V);
        output.wi = random_cosine_direction(surface_data.normal, context->sample_index, d1, d2, scramble);

        float n_dot_l = fmaxf(glms_vec3_dot(surface_data.normal, output.wi), 0.0001f);
        if (glms_vec3_dot(output.wi, context->normal) <= 0.0f)
        {
            output.state = PS_TERMINATE;
            return output;
        }

        // Evaluate Oren-Nayar
        // Note: Cosine PDF = n_dot_l / PI
        float pdf_diff_dir = n_dot_l * INV_PI;
        pdf_gen = pdf_diff_dir * prob_diff;
        if (pdf_gen < 1e-12f)
        {
            output.state = PS_TERMINATE;
            return output;
        }

        vec3s kD = glms_vec3_scale(glms_vec3_sub(glms_vec3_one(), F_est), 1.0f - surface_data.metallic);
        float on = oren_nayar_eval(output.wi, V, surface_data.normal, surface_data.diffuse_roughness, n_dot_l, n_dot_v);


        // Diffuse bounce significantly increases spread (effectively resets or becomes very wide)
        output.spread_angle = context->spread_angle + 0.5f;
        vec3s diff_f = glms_vec3_scale(glms_vec3_mul(surface_data.albedo, kD), on);

        // Throughput multiplier: (f * NoL) / pdf_total
        output.bsdf = glms_vec3_scale(diff_f, n_dot_l / pdf_gen);
    }

    output.pdf = sample_bsdf_pdf(&surface_data, V, output.wi);
    output.state = PS_SUCCESS;
    return output;
}