#include "Algorithm/GGXMultiScatter.h" #include "Algorithm/MicrofacetGGX.h" #define GGX_MS_LUT_NOV_SIZE 32 #define GGX_MS_LUT_ROUGH_SIZE 32 #define GGX_MS_LUT_SAMPLES 256 static float g_ggx_E_lut[GGX_MS_LUT_ROUGH_SIZE][GGX_MS_LUT_NOV_SIZE]; static float g_ggx_Eavg_lut[GGX_MS_LUT_ROUGH_SIZE]; static bool g_ggx_ms_lut_initialized = false; static inline float saturatef(float x) { return fminf(fmaxf(x, 0.0f), 1.0f); } static inline float radical_inverse_vdc(uint32_t bits) { bits = (bits << 16u) | (bits >> 16u); bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u); bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u); bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u); bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u); return (float)bits * 2.3283064365386963e-10f; // / 2^32 } static inline vec3s vec3_div_safe(vec3s a, vec3s b, float eps) { return (vec3s){a.x / fmaxf(b.x, eps), a.y / fmaxf(b.y, eps), a.z / fmaxf(b.z, eps)}; } static inline vec3s fresnel_schlick_avg(vec3s f0) { // Cosine-weighted hemispherical average for Schlick Fresnel. // Approximation: <(1 - cos)^5>_cos = 1/21. const float t = 1.0f / 21.0f; return glms_vec3_add(f0, glms_vec3_scale(glms_vec3_sub(glms_vec3_one(), f0), t)); } static inline float ggx_ms_Eavg(float roughness) { ggx_ms_init_lut_once(); roughness = saturatef(roughness); float y = roughness * (float)(GGX_MS_LUT_ROUGH_SIZE - 1); uint32_t y0 = (uint32_t)floorf(y); uint32_t y1 = (y0 + 1u < GGX_MS_LUT_ROUGH_SIZE) ? (y0 + 1u) : y0; float ty = y - (float)y0; return saturatef(glm_lerp(g_ggx_Eavg_lut[y0], g_ggx_Eavg_lut[y1], ty)); } float ggx_ms_E(float NoV, float roughness) { ggx_ms_init_lut_once(); NoV = saturatef(NoV); roughness = saturatef(roughness); float x = NoV * (float)(GGX_MS_LUT_NOV_SIZE - 1); float y = roughness * (float)(GGX_MS_LUT_ROUGH_SIZE - 1); uint32_t x0 = (uint32_t)floorf(x); uint32_t y0 = (uint32_t)floorf(y); uint32_t x1 = (x0 + 1u < GGX_MS_LUT_NOV_SIZE) ? (x0 + 1u) : x0; uint32_t y1 = (y0 + 1u < GGX_MS_LUT_ROUGH_SIZE) ? (y0 + 1u) : y0; float tx = x - (float)x0; float ty = y - (float)y0; float e00 = g_ggx_E_lut[y0][x0]; float e10 = g_ggx_E_lut[y0][x1]; float e01 = g_ggx_E_lut[y1][x0]; float e11 = g_ggx_E_lut[y1][x1]; float e0 = glm_lerp(e00, e10, tx); float e1 = glm_lerp(e01, e11, tx); return saturatef(glm_lerp(e0, e1, ty)); } void ggx_ms_init_lut_once(void) { if (g_ggx_ms_lut_initialized) { return; } #ifdef _OPENMP #pragma omp critical(ggx_ms_lut_init) #endif { if (!g_ggx_ms_lut_initialized) { vec3s n = (vec3s){0.0f, 0.0f, 1.0f}; for (uint32_t ry = 0; ry < GGX_MS_LUT_ROUGH_SIZE; ry++) { float roughness = ((float)ry + 0.5f) / (float)GGX_MS_LUT_ROUGH_SIZE; roughness = fmaxf(roughness, 0.001f); float Eavg = 0.0f; for (uint32_t ix = 0; ix < GGX_MS_LUT_NOV_SIZE; ix++) { float NoV = ((float)ix + 0.5f) / (float)GGX_MS_LUT_NOV_SIZE; NoV = fmaxf(NoV, 1e-4f); float sin_theta = sqrtf(fmaxf(0.0f, 1.0f - NoV * NoV)); vec3s v = glms_vec3_normalize((vec3s){sin_theta, 0.0f, NoV}); float sum = 0.0f; uint32_t valid = 0; uint32_t scramble = hash_uint32((ry + 1u) * 0x9E3779B9u ^ (ix + 1u) * 0x7F4A7C15u); for (uint32_t s = 0; s < GGX_MS_LUT_SAMPLES; s++) { float u1 = ((float)s + 0.5f) / (float)GGX_MS_LUT_SAMPLES; float u2 = radical_inverse_vdc(s ^ scramble); vec3s h = ggx_sample_vndf(n, v, roughness, u1, u2); vec3s l = glms_vec3_reflect(glms_vec3_negate(v), h); float NoL = l.z; if (NoL <= 0.0f) { continue; } // For F=1, importance sampling with VNDF yields a simple estimator: // rho_ss(v) = E[ G1(NoL) ]. float alpha_lut = roughness * roughness; float alpha2_lut = alpha_lut * alpha_lut; float lambda_v = sqrtf(alpha2_lut + (1.0f - alpha2_lut) * NoV * NoV); float lambda_l = sqrtf(alpha2_lut + (1.0f - alpha2_lut) * NoL * NoL); sum += NoL * (NoV + lambda_v) / fmaxf(NoL * lambda_v + NoV * lambda_l, 1e-7f); valid++; } // float E = (valid > 0u) ? (sum / (float)valid) : 0.0f; float E = sum / (float)GGX_MS_LUT_SAMPLES; E = saturatef(E); g_ggx_E_lut[ry][ix] = E; // Accumulate hemispherical average with cosine weighting: // Eavg = 2 * \int_0^1 E(mu) * mu dmu float mu = NoV; Eavg += E * mu; } Eavg = 2.0f * Eavg * (1.0f / (float)GGX_MS_LUT_NOV_SIZE); g_ggx_Eavg_lut[ry] = saturatef(Eavg); } g_ggx_ms_lut_initialized = true; } } } vec3s ggx_multi_scatter_lambert(vec3s f0, float NoV, float NoL, float roughness) { float Eo = ggx_ms_E(NoV, roughness); float Ei = ggx_ms_E(NoL, roughness); float Eavg = ggx_ms_Eavg(roughness); vec3s Favg = fresnel_schlick_avg(f0); // Series factor for multiple bounces inside the microfacet layer. // C = Favg^2 / (1 - Favg * (1 - Eavg)) vec3s Favg2 = glms_vec3_mul(Favg, Favg); vec3s denom = glms_vec3_sub(glms_vec3_one(), glms_vec3_scale(Favg, (1.0f - Eavg))); vec3s C = vec3_div_safe(Favg2, denom, 1e-6f); float scale = (1.0f - Eo) * (1.0f - Ei); scale /= (PI * fmaxf(1.0f - Eavg, 1e-6f)); return glms_vec3_scale(C, scale); }