Refactor: simplify computations in function #ensure_valid_reflection
Actually both potential roots lie in the interval [0, 1], so the function ended up checking both roots all the time. The new implementation explains why only one of the roots is valid; it saves two square roots and a bunch of other computations.
This commit is contained in:
@@ -107,10 +107,13 @@ ccl_device_forceinline Spectrum interpolate_fresnel_color(float3 L,
|
||||
|
||||
ccl_device float3 ensure_valid_reflection(float3 Ng, float3 I, float3 N)
|
||||
{
|
||||
float3 R = 2 * dot(N, I) * N - I;
|
||||
const float3 R = 2 * dot(N, I) * N - I;
|
||||
|
||||
const float Iz = dot(I, Ng);
|
||||
kernel_assert(Iz > 0);
|
||||
|
||||
/* Reflection rays may always be at least as shallow as the incoming ray. */
|
||||
float threshold = min(0.9f * dot(Ng, I), 0.01f);
|
||||
const float threshold = min(0.9f * Iz, 0.01f);
|
||||
if (dot(Ng, R) >= threshold) {
|
||||
return N;
|
||||
}
|
||||
@@ -119,11 +122,8 @@ ccl_device float3 ensure_valid_reflection(float3 Ng, float3 I, float3 N)
|
||||
* The X axis is found by normalizing the component of N that's orthogonal to Ng.
|
||||
* The Y axis isn't actually needed.
|
||||
*/
|
||||
float NdotNg = dot(N, Ng);
|
||||
float3 X = normalize(N - NdotNg * Ng);
|
||||
const float3 X = normalize(N - dot(N, Ng) * Ng);
|
||||
|
||||
/* Keep math expressions. */
|
||||
/* clang-format off */
|
||||
/* Calculate N.z and N.x in the local coordinate system.
|
||||
*
|
||||
* The goal of this computation is to find a N' that is rotated towards Ng just enough
|
||||
@@ -141,74 +141,43 @@ ccl_device float3 ensure_valid_reflection(float3 Ng, float3 I, float3 N)
|
||||
*
|
||||
* Furthermore, we want N' to be normalized, so N'.x = sqrt(1 - N'.z^2).
|
||||
*
|
||||
* With these simplifications,
|
||||
* we get the final equation 2*(sqrt(1 - N'.z^2)*I.x + N'.z*I.z)*N'.z - I.z = t.
|
||||
* With these simplifications, we get the equation
|
||||
* 2*(sqrt(1 - N'.z^2)*I.x + N'.z*I.z)*N'.z - I.z = t,
|
||||
* or
|
||||
* 2*sqrt(1 - N'.z^2)*I.x*N'.z = t + I.z * (1 - 2*N'.z^2),
|
||||
* after rearranging terms.
|
||||
* Raise both sides to the power of two and substitute terms with
|
||||
* a = I.x^2 + I.z^2,
|
||||
* b = 2*(a + Iz*t),
|
||||
* c = (Iz + t)^2,
|
||||
* we obtain
|
||||
* 4*a*N'.z^4 - 2*b*N'.z^2 + c = 0.
|
||||
*
|
||||
* The only unknown here is N'.z, so we can solve for that.
|
||||
*
|
||||
* The equation has four solutions in general:
|
||||
*
|
||||
* N'.z = +-sqrt(0.5*(+-sqrt(I.x^2*(I.x^2 + I.z^2 - t^2)) + t*I.z + I.x^2 + I.z^2)/(I.x^2 + I.z^2))
|
||||
* We can simplify this expression a bit by grouping terms:
|
||||
*
|
||||
* a = I.x^2 + I.z^2
|
||||
* b = sqrt(I.x^2 * (a - t^2))
|
||||
* c = I.z*t + a
|
||||
* N'.z = +-sqrt(0.5*(+-b + c)/a)
|
||||
*
|
||||
* Two solutions can immediately be discarded because they're negative so N' would lie in the
|
||||
* lower hemisphere.
|
||||
* The equation has four solutions in general, two can immediately be discarded because they're
|
||||
* negative so N' would lie in the lower hemisphere; one solves
|
||||
* 2*sqrt(1 - N'.z^2)*I.x*N'.z = -(t + I.z * (1 - 2*N'.z^2))
|
||||
* instead of the original equation (before squaring both sides).
|
||||
* Therefore only one root is valid.
|
||||
*/
|
||||
/* clang-format on */
|
||||
|
||||
float Ix = dot(I, X), Iz = dot(I, Ng);
|
||||
float Ix2 = sqr(Ix), Iz2 = sqr(Iz);
|
||||
float a = Ix2 + Iz2;
|
||||
const float Ix = dot(I, X);
|
||||
|
||||
float b = safe_sqrtf(Ix2 * (a - sqr(threshold)));
|
||||
float c = Iz * threshold + a;
|
||||
const float a = sqr(Ix) + sqr(Iz);
|
||||
const float b = 2.0f * (a + Iz * threshold);
|
||||
const float c = sqr(threshold + Iz);
|
||||
|
||||
/* Evaluate both solutions.
|
||||
* In many cases one can be immediately discarded (if N'.z would be imaginary or larger than
|
||||
* one), so check for that first. If no option is viable (might happen in extreme cases like N
|
||||
* being in the wrong hemisphere), give up and return Ng. */
|
||||
float fac = 0.5f / a;
|
||||
float N1_z2 = fac * (b + c), N2_z2 = fac * (-b + c);
|
||||
bool valid1 = (N1_z2 > 1e-5f) && (N1_z2 <= (1.0f + 1e-5f));
|
||||
bool valid2 = (N2_z2 > 1e-5f) && (N2_z2 <= (1.0f + 1e-5f));
|
||||
/* In order that the root formula solves 2*sqrt(1 - N'.z^2)*I.x*N'.z = t + I.z - 2*I.z*N'.z^2,
|
||||
* Ix and (t + I.z * (1 - 2*N'.z^2)) must have the same sign (the rest terms are non-negative by
|
||||
* definition). */
|
||||
const float Nz2 = (Ix < 0) ? 0.25f * (b + safe_sqrtf(sqr(b) - 4.0f * a * c)) / a :
|
||||
0.25f * (b - safe_sqrtf(sqr(b) - 4.0f * a * c)) / a;
|
||||
|
||||
float2 N_new;
|
||||
if (valid1 && valid2) {
|
||||
/* If both are possible, do the expensive reflection-based check. */
|
||||
float2 N1 = make_float2(safe_sqrtf(1.0f - N1_z2), safe_sqrtf(N1_z2));
|
||||
float2 N2 = make_float2(safe_sqrtf(1.0f - N2_z2), safe_sqrtf(N2_z2));
|
||||
const float Nx = safe_sqrtf(1.0f - Nz2);
|
||||
const float Nz = safe_sqrtf(Nz2);
|
||||
|
||||
float R1 = 2 * (N1.x * Ix + N1.y * Iz) * N1.y - Iz;
|
||||
float R2 = 2 * (N2.x * Ix + N2.y * Iz) * N2.y - Iz;
|
||||
|
||||
valid1 = (R1 >= 1e-5f);
|
||||
valid2 = (R2 >= 1e-5f);
|
||||
if (valid1 && valid2) {
|
||||
/* If both solutions are valid, return the one with the shallower reflection since it will be
|
||||
* closer to the input (if the original reflection wasn't shallow, we would not be in this
|
||||
* part of the function). */
|
||||
N_new = (R1 < R2) ? N1 : N2;
|
||||
}
|
||||
else {
|
||||
/* If only one reflection is valid (= positive), pick that one. */
|
||||
N_new = (R1 > R2) ? N1 : N2;
|
||||
}
|
||||
}
|
||||
else if (valid1 || valid2) {
|
||||
/* Only one solution passes the N'.z criterium, so pick that one. */
|
||||
float Nz2 = valid1 ? N1_z2 : N2_z2;
|
||||
N_new = make_float2(safe_sqrtf(1.0f - Nz2), safe_sqrtf(Nz2));
|
||||
}
|
||||
else {
|
||||
return Ng;
|
||||
}
|
||||
|
||||
return N_new.x * X + N_new.y * Ng;
|
||||
return Nx * X + Nz * Ng;
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
@@ -67,7 +67,7 @@ closure color absorption() BUILTIN;
|
||||
|
||||
normal ensure_valid_reflection(normal Ng, vector I, normal N)
|
||||
{
|
||||
/* The implementation here mirrors the one in kernel_montecarlo.h,
|
||||
/* The implementation here mirrors the one in bsdf_util.h,
|
||||
* check there for an explanation of the algorithm. */
|
||||
|
||||
float sqr(float x)
|
||||
@@ -77,55 +77,26 @@ normal ensure_valid_reflection(normal Ng, vector I, normal N)
|
||||
|
||||
vector R = 2 * dot(N, I) * N - I;
|
||||
|
||||
float threshold = min(0.9 * dot(Ng, I), 0.01);
|
||||
float Iz = dot(I, Ng);
|
||||
|
||||
float threshold = min(0.9 * Iz, 0.01);
|
||||
if (dot(Ng, R) >= threshold) {
|
||||
return N;
|
||||
}
|
||||
|
||||
float NdotNg = dot(N, Ng);
|
||||
vector X = normalize(N - NdotNg * Ng);
|
||||
vector X = normalize(N - dot(N, Ng) * Ng);
|
||||
float Ix = dot(I, X);
|
||||
|
||||
float Ix = dot(I, X), Iz = dot(I, Ng);
|
||||
float Ix2 = sqr(Ix), Iz2 = sqr(Iz);
|
||||
float a = Ix2 + Iz2;
|
||||
float a = sqr(Ix) + sqr(Iz);
|
||||
float b = 2.0 * (a + Iz * threshold);
|
||||
float c = sqr(threshold + Iz);
|
||||
|
||||
float b = sqrt(Ix2 * (a - sqr(threshold)));
|
||||
float c = Iz * threshold + a;
|
||||
float Nz2 = (Ix < 0) ? 0.25 * (b + sqrt(sqr(b) - 4.0 * a * c)) / a :
|
||||
0.25 * (b - sqrt(sqr(b) - 4.0 * a * c)) / a;
|
||||
float Nx = sqrt(1.0 - Nz2);
|
||||
float Nz = sqrt(Nz2);
|
||||
|
||||
float fac = 0.5 / a;
|
||||
float N1_z2 = fac * (b + c), N2_z2 = fac * (-b + c);
|
||||
int valid1 = (N1_z2 > 1e-5) && (N1_z2 <= (1.0 + 1e-5));
|
||||
int valid2 = (N2_z2 > 1e-5) && (N2_z2 <= (1.0 + 1e-5));
|
||||
|
||||
float N_new_x, N_new_z;
|
||||
if (valid1 && valid2) {
|
||||
float N1_x = sqrt(1.0 - N1_z2), N1_z = sqrt(N1_z2);
|
||||
float N2_x = sqrt(1.0 - N2_z2), N2_z = sqrt(N2_z2);
|
||||
|
||||
float R1 = 2 * (N1_x * Ix + N1_z * Iz) * N1_z - Iz;
|
||||
float R2 = 2 * (N2_x * Ix + N2_z * Iz) * N2_z - Iz;
|
||||
|
||||
valid1 = (R1 >= 1e-5);
|
||||
valid2 = (R2 >= 1e-5);
|
||||
if (valid1 && valid2) {
|
||||
N_new_x = (R1 < R2) ? N1_x : N2_x;
|
||||
N_new_z = (R1 < R2) ? N1_z : N2_z;
|
||||
}
|
||||
else {
|
||||
N_new_x = (R1 > R2) ? N1_x : N2_x;
|
||||
N_new_z = (R1 > R2) ? N1_z : N2_z;
|
||||
}
|
||||
}
|
||||
else if (valid1 || valid2) {
|
||||
float Nz2 = valid1 ? N1_z2 : N2_z2;
|
||||
N_new_x = sqrt(1.0 - Nz2);
|
||||
N_new_z = sqrt(Nz2);
|
||||
}
|
||||
else {
|
||||
return Ng;
|
||||
}
|
||||
|
||||
return N_new_x * X + N_new_z * Ng;
|
||||
return Nx * X + Nz * Ng;
|
||||
}
|
||||
|
||||
#endif /* CCL_STDOSL_H */
|
||||
|
||||
@@ -140,53 +140,28 @@ vec3 ensure_valid_reflection(vec3 Ng, vec3 I, vec3 N)
|
||||
{
|
||||
vec3 R = -reflect(I, N);
|
||||
|
||||
float Iz = dot(I, Ng);
|
||||
|
||||
/* Reflection rays may always be at least as shallow as the incoming ray. */
|
||||
float threshold = min(0.9 * dot(Ng, I), 0.025);
|
||||
float threshold = min(0.9 * Iz, 0.025);
|
||||
if (dot(Ng, R) >= threshold) {
|
||||
return N;
|
||||
}
|
||||
|
||||
float NdotNg = dot(N, Ng);
|
||||
vec3 X = normalize(N - NdotNg * Ng);
|
||||
vec3 X = normalize(N - dot(N, Ng) * Ng);
|
||||
float Ix = dot(I, X);
|
||||
|
||||
float Ix = dot(I, X), Iz = dot(I, Ng);
|
||||
float Ix2 = sqr(Ix), Iz2 = sqr(Iz);
|
||||
float a = Ix2 + Iz2;
|
||||
float a = sqr(Ix) + sqr(Iz);
|
||||
float b = 2.0 * (a + Iz * threshold);
|
||||
float c = sqr(threshold + Iz);
|
||||
|
||||
float b = sqrt(Ix2 * (a - sqr(threshold)));
|
||||
float c = Iz * threshold + a;
|
||||
float Nz2 = (Ix < 0.0) ? 0.25 * (b + safe_sqrt(sqr(b) - 4.0 * a * c)) / a :
|
||||
0.25 * (b - safe_sqrt(sqr(b) - 4.0 * a * c)) / a;
|
||||
|
||||
float fac = 0.5 / a;
|
||||
float N1_z2 = fac * (b + c), N2_z2 = fac * (-b + c);
|
||||
bool valid1 = (N1_z2 > 1e-5) && (N1_z2 <= (1.0 + 1e-5));
|
||||
bool valid2 = (N2_z2 > 1e-5) && (N2_z2 <= (1.0 + 1e-5));
|
||||
float Nx = safe_sqrt(1.0 - Nz2);
|
||||
float Nz = safe_sqrt(Nz2);
|
||||
|
||||
vec2 N_new;
|
||||
if (valid1 && valid2) {
|
||||
/* If both are possible, do the expensive reflection-based check. */
|
||||
vec2 N1 = vec2(safe_sqrt(1.0 - N1_z2), safe_sqrt(N1_z2));
|
||||
vec2 N2 = vec2(safe_sqrt(1.0 - N2_z2), safe_sqrt(N2_z2));
|
||||
|
||||
float R1 = 2.0 * (N1.x * Ix + N1.y * Iz) * N1.y - Iz;
|
||||
float R2 = 2.0 * (N2.x * Ix + N2.y * Iz) * N2.y - Iz;
|
||||
|
||||
valid1 = (R1 >= 1e-5);
|
||||
valid2 = (R2 >= 1e-5);
|
||||
if (valid1 && valid2) {
|
||||
N_new = (R1 < R2) ? N1 : N2;
|
||||
}
|
||||
else {
|
||||
N_new = (R1 > R2) ? N1 : N2;
|
||||
}
|
||||
}
|
||||
else if (valid1 || valid2) {
|
||||
float Nz2 = valid1 ? N1_z2 : N2_z2;
|
||||
N_new = vec2(safe_sqrt(1.0 - Nz2), safe_sqrt(Nz2));
|
||||
}
|
||||
else {
|
||||
return Ng;
|
||||
}
|
||||
return N_new.x * X + N_new.y * Ng;
|
||||
return Nx * X + Nz * Ng;
|
||||
}
|
||||
|
||||
/* ----------- Cone angle Approximation --------- */
|
||||
|
||||
Reference in New Issue
Block a user