diff --git a/intern/cycles/kernel/integrator/shade_volume.h b/intern/cycles/kernel/integrator/shade_volume.h index d94a29b7f49..594396d987e 100644 --- a/intern/cycles/kernel/integrator/shade_volume.h +++ b/intern/cycles/kernel/integrator/shade_volume.h @@ -64,6 +64,11 @@ typedef struct VolumeShaderCoefficients { Spectrum emission; } VolumeShaderCoefficients; +typedef struct EquiangularCoefficients { + float3 P; + float2 t_range; +} EquiangularCoefficients; + /* Evaluate shader to get extinction coefficient at P. */ ccl_device_inline bool shadow_volume_shader_sample(KernelGlobals kg, IntegratorShadowState state, @@ -264,18 +269,18 @@ ccl_device void volume_shadow_heterogeneous(KernelGlobals kg, # define VOLUME_SAMPLE_PDF_CUTOFF 1e-8f ccl_device float volume_equiangular_sample(ccl_private const Ray *ccl_restrict ray, - const float3 light_P, + ccl_private const EquiangularCoefficients &coeffs, const float xi, ccl_private float *pdf) { - const float tmin = ray->tmin; - const float tmax = ray->tmax; - const float delta = dot((light_P - ray->P), ray->D); - const float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta); + const float delta = dot((coeffs.P - ray->P), ray->D); + const float D = safe_sqrtf(len_squared(coeffs.P - ray->P) - delta * delta); if (UNLIKELY(D == 0.0f)) { *pdf = 0.0f; return 0.0f; } + const float tmin = coeffs.t_range.x; + const float tmax = coeffs.t_range.y; const float theta_a = atan2f(tmin - delta, D); const float theta_b = atan2f(tmax - delta, D); const float t_ = D * tanf((xi * theta_b) + (1 - xi) * theta_a); @@ -289,17 +294,17 @@ ccl_device float volume_equiangular_sample(ccl_private const Ray *ccl_restrict r } ccl_device float volume_equiangular_pdf(ccl_private const Ray *ccl_restrict ray, - const float3 light_P, + ccl_private const EquiangularCoefficients &coeffs, const float sample_t) { - const float delta = dot((light_P - ray->P), ray->D); - const float D = safe_sqrtf(len_squared(light_P - ray->P) - delta * delta); + const float delta = dot((coeffs.P - ray->P), ray->D); + const float D = safe_sqrtf(len_squared(coeffs.P - ray->P) - delta * delta); if (UNLIKELY(D == 0.0f)) { return 0.0f; } - const float tmin = ray->tmin; - const float tmax = ray->tmax; + const float tmin = coeffs.t_range.x; + const float tmax = coeffs.t_range.y; const float t_ = sample_t - delta; const float theta_a = atan2f(tmin - delta, D); @@ -313,6 +318,36 @@ ccl_device float volume_equiangular_pdf(ccl_private const Ray *ccl_restrict ray, return pdf; } +ccl_device_inline bool volume_equiangular_valid_ray_segment(KernelGlobals kg, + const float3 ray_P, + const float3 ray_D, + ccl_private float2 *t_range, + const ccl_private LightSample *ls) +{ +# ifdef __LIGHT_TREE__ + /* Do not compute ray segment until #119389 is landed. */ + if (kernel_data.integrator.use_light_tree) { + return true; + } +# endif + + if (ls->type == LIGHT_SPOT) { + ccl_global const KernelLight *klight = &kernel_data_fetch(lights, ls->lamp); + return spot_light_valid_ray_segment(klight, ray_P, ray_D, t_range); + } + if (ls->type == LIGHT_AREA) { + ccl_global const KernelLight *klight = &kernel_data_fetch(lights, ls->lamp); + return area_light_valid_ray_segment(&klight->area, ray_P - klight->co, ray_D, t_range); + } + if (ls->type == LIGHT_TRIANGLE) { + return triangle_light_valid_ray_segment(kg, ray_P - ls->P, ray_D, t_range, ls); + } + + /* Point light, the whole range of the ray is visible. */ + kernel_assert(ls->type == LIGHT_POINT); + return true; +} + /* Distance sampling */ ccl_device float volume_distance_sample(float max_t, @@ -403,7 +438,7 @@ typedef struct VolumeIntegrateState { ccl_device_forceinline void volume_integrate_step_scattering( ccl_private const ShaderData *sd, ccl_private const Ray *ray, - const float3 equiangular_light_P, + ccl_private const EquiangularCoefficients &equiangular_coeffs, ccl_private const VolumeShaderCoefficients &ccl_restrict coeff, const Spectrum transmittance, ccl_private VolumeIntegrateState &ccl_restrict vstate, @@ -474,7 +509,7 @@ ccl_device_forceinline void volume_integrate_step_scattering( /* Multiple importance sampling. */ if (vstate.use_mis) { - const float equiangular_pdf = volume_equiangular_pdf(ray, equiangular_light_P, new_t); + const float equiangular_pdf = volume_equiangular_pdf(ray, equiangular_coeffs, new_t); const float mis_weight = power_heuristic(vstate.distance_pdf * distance_pdf, equiangular_pdf); result.direct_throughput *= 2.0f * mis_weight; @@ -509,7 +544,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous( ccl_global float *ccl_restrict render_buffer, const float object_step_size, const VolumeSampleMethod direct_sample_method, - const float3 equiangular_light_P, + ccl_private const EquiangularCoefficients &equiangular_coeffs, ccl_private VolumeIntegrateResult &result) { PROFILING_INIT(kg, PROFILING_SHADE_VOLUME_INTEGRATE); @@ -560,7 +595,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous( /* Equiangular sampling: compute distance and PDF in advance. */ if (vstate.direct_sample_method == VOLUME_SAMPLE_EQUIANGULAR) { result.direct_t = volume_equiangular_sample( - ray, equiangular_light_P, vstate.rscatter, &vstate.equiangular_pdf); + ray, equiangular_coeffs, vstate.rscatter, &vstate.equiangular_pdf); } # ifdef __PATH_GUIDING__ result.direct_sample_method = vstate.direct_sample_method; @@ -614,7 +649,7 @@ ccl_device_forceinline void volume_integrate_heterogeneous( /* Scattering and absorption. */ volume_integrate_step_scattering( - sd, ray, equiangular_light_P, coeff, transmittance, vstate, result); + sd, ray, equiangular_coeffs, coeff, transmittance, vstate, result); } else { /* Absorption only. */ @@ -673,7 +708,7 @@ ccl_device_forceinline bool integrate_volume_equiangular_sample_light( ccl_private const Ray *ccl_restrict ray, ccl_private const ShaderData *ccl_restrict sd, ccl_private const RNGState *ccl_restrict rng_state, - ccl_private float3 *ccl_restrict P) + ccl_private EquiangularCoefficients *ccl_restrict equiangular_coeffs) { /* Test if there is a light or BSDF that needs direct light. */ if (!kernel_data.integrator.use_direct_light) { @@ -708,9 +743,10 @@ ccl_device_forceinline bool integrate_volume_equiangular_sample_light( return false; } - *P = ls.P; + equiangular_coeffs->P = ls.P; - return true; + return volume_equiangular_valid_ray_segment( + kg, ray->P, ray->D, &equiangular_coeffs->t_range, &ls); } /* Path tracing: sample point on light and evaluate light shader, then @@ -990,10 +1026,12 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg, /* Sample light ahead of volume stepping, for equiangular sampling. */ /* TODO: distant lights are ignored now, but could instead use even distribution. */ const bool need_light_sample = !(INTEGRATOR_STATE(state, path, flag) & PATH_RAY_TERMINATE); - float3 equiangular_P = zero_float3(); + + EquiangularCoefficients equiangular_coeffs = {zero_float3(), make_float2(ray->tmin, ray->tmax)}; + const bool have_equiangular_sample = need_light_sample && integrate_volume_equiangular_sample_light( - kg, state, ray, &sd, &rng_state, &equiangular_P); + kg, state, ray, &sd, &rng_state, &equiangular_coeffs); VolumeSampleMethod direct_sample_method = (have_equiangular_sample) ? volume_stack_sample_method(kg, state) : @@ -1023,7 +1061,7 @@ ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg, render_buffer, step_size, direct_sample_method, - equiangular_P, + equiangular_coeffs, result); /* Perform path termination. The intersect_closest will have already marked this path diff --git a/intern/cycles/kernel/light/area.h b/intern/cycles/kernel/light/area.h index eb03ca866ef..51a66265546 100644 --- a/intern/cycles/kernel/light/area.h +++ b/intern/cycles/kernel/light/area.h @@ -233,6 +233,11 @@ ccl_device bool area_light_spread_clamp_light(const float3 P, return true; } +ccl_device_forceinline bool area_light_is_ellipse(const ccl_global KernelAreaLight *light) +{ + return light->invarea < 0.0f; +} + /* Common API. */ /* Compute `eval_fac` and `pdf`. Also sample a new position on the light if `sample_coord`. */ template @@ -338,7 +343,7 @@ ccl_device_inline bool area_light_sample(const ccl_global KernelLight *klight, const float light_v = dot(inplane, klight->area.axis_v) / klight->area.len_v; if (!in_volume_segment) { - const bool is_ellipse = (klight->area.invarea < 0.0f); + const bool is_ellipse = area_light_is_ellipse(&klight->area); /* Sampled point lies outside of the area light. */ if (is_ellipse && (sqr(light_u) + sqr(light_v) > 0.25f)) { @@ -380,7 +385,7 @@ ccl_device_inline bool area_light_intersect(const ccl_global KernelLight *klight { /* Area light. */ const float invarea = fabsf(klight->area.invarea); - const bool is_ellipse = (klight->area.invarea < 0.0f); + const bool is_ellipse = area_light_is_ellipse(&klight->area); if (invarea == 0.0f) { return false; } @@ -428,6 +433,55 @@ ccl_device_inline bool area_light_sample_from_intersection( return area_light_eval(klight, ray_P, &light_P, ls, zero_float2(), false); } +/* Returns the maximal distance between the light center and the boundary. */ +ccl_device_forceinline float area_light_max_extent(const ccl_global KernelAreaLight *light) +{ + return 0.5f * (area_light_is_ellipse(light) ? fmaxf(light->len_u, light->len_v) : + len(make_float2(light->len_u, light->len_v))); +} + +/* Find the ray segment lit by the area light. */ +ccl_device_inline bool area_light_valid_ray_segment(const ccl_global KernelAreaLight *light, + float3 P, + float3 D, + ccl_private float2 *t_range) +{ + bool valid; + const float tan_half_spread = light->tan_half_spread; + float3 axis = light->dir; + + const bool angle_almost_zero = (tan_half_spread < 1e-5f); + if (angle_almost_zero) { + /* Map to local coordinate of the light. Do not use `itfm` in `KernelLight` as there might be + * additional scaling in the light size. */ + const Transform tfm = make_transform(light->axis_u, light->axis_v, axis); + P = transform_point(&tfm, P); + D = transform_direction(&tfm, D); + axis = make_float3(0.0f, 0.0f, 1.0f); + + const float half_len_u = 0.5f * light->len_u; + const float half_len_v = 0.5f * light->len_v; + if (area_light_is_ellipse(light)) { + valid = ray_infinite_cylinder_intersect(P, D, half_len_u, half_len_v, t_range); + } + else { + const float3 bbox_min = make_float3(-half_len_u, -half_len_v, 0.0f); + const float3 bbox_max = make_float3(half_len_u, half_len_v, FLT_MAX); + valid = ray_aabb_intersect(bbox_min, bbox_max, P, D, t_range); + } + } + else { + /* Conservative estimation with the smallest possible cone covering the whole spread. */ + const float3 apex_to_point = P + area_light_max_extent(light) / tan_half_spread * axis; + const float cos_angle_sq = 1.0f / (1.0f + sqr(tan_half_spread)); + + valid = ray_cone_intersect(axis, apex_to_point, D, cos_angle_sq, t_range); + } + + /* Limit the range to the positive side of the area light. */ + return valid && ray_plane_intersect(axis, P, D, t_range); +} + template ccl_device_forceinline bool area_light_tree_parameters(const ccl_global KernelLight *klight, const float3 centroid, diff --git a/intern/cycles/kernel/light/spot.h b/intern/cycles/kernel/light/spot.h index c5090573d4d..56989933ded 100644 --- a/intern/cycles/kernel/light/spot.h +++ b/intern/cycles/kernel/light/spot.h @@ -265,6 +265,24 @@ ccl_device_inline bool spot_light_sample_from_intersection( return true; } +/* Find the ray segment lit by the spot light. */ +ccl_device_inline bool spot_light_valid_ray_segment(const ccl_global KernelLight *klight, + const float3 P, + const float3 D, + ccl_private float2 *t_range) +{ + /* Convert to local space of the spot light. */ + const Transform itfm = klight->itfm; + float3 local_P = P + klight->spot.dir * klight->spot.ray_segment_dp; + local_P = transform_point(&itfm, local_P); + const float3 local_D = transform_direction(&itfm, D); + const float3 axis = make_float3(0.0f, 0.0f, -1.0f); + + /* Intersect the ray with the smallest enclosing cone of the light spread. */ + return ray_cone_intersect( + axis, local_P, local_D, sqr(klight->spot.cos_half_spot_angle), t_range); +} + template ccl_device_forceinline bool spot_light_tree_parameters(const ccl_global KernelLight *klight, const float3 centroid, diff --git a/intern/cycles/kernel/light/triangle.h b/intern/cycles/kernel/light/triangle.h index 58fc8ea1d92..16834555f1a 100644 --- a/intern/cycles/kernel/light/triangle.h +++ b/intern/cycles/kernel/light/triangle.h @@ -269,6 +269,25 @@ ccl_device_forceinline bool triangle_light_sample(KernelGlobals kg, return (ls->pdf > 0.0f); } +/* Find the ray segment lit by the triangle light. */ +ccl_device_inline bool triangle_light_valid_ray_segment(KernelGlobals kg, + const float3 P, + const float3 D, + ccl_private float2 *t_range, + const ccl_private LightSample *ls) +{ + const int shader_flag = kernel_data_fetch(shaders, ls->shader & SHADER_MASK).flags; + const int SD_MIS_BOTH = SD_MIS_BACK | SD_MIS_FRONT; + if ((shader_flag & SD_MIS_BOTH) == SD_MIS_BOTH) { + /* Both sides are sampled, the complete ray segment is visible. */ + return true; + } + + /* Only one side is sampled, intersect the ray and the triangle light plane to find the visible + * ray segment. Flip normal if Emission Sampling is set to back. */ + return ray_plane_intersect((shader_flag & SD_MIS_BACK) ? -ls->Ng : ls->Ng, P, D, t_range); +} + template ccl_device_forceinline bool triangle_light_tree_parameters( KernelGlobals kg, diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h index 2ed5a790199..b4978744cc7 100644 --- a/intern/cycles/kernel/types.h +++ b/intern/cycles/kernel/types.h @@ -1376,6 +1376,8 @@ typedef struct KernelSpotLight { int is_sphere; /* For non-uniform object scaling, the actual spread might be different. */ float cos_half_larger_spread; + /* Distance from the apex of the smallest enclosing cone of the light spread to light center. */ + float ray_segment_dp; } KernelSpotLight; /* PointLight is SpotLight with only radius and invarea being used. */ diff --git a/intern/cycles/scene/light.cpp b/intern/cycles/scene/light.cpp index 78d237bcd8c..fb424a8fadf 100644 --- a/intern/cycles/scene/light.cpp +++ b/intern/cycles/scene/light.cpp @@ -1362,6 +1362,9 @@ void LightManager::device_update_lights(Device *device, DeviceScene *dscene, Sce /* Choose the angle which spans a larger cone. */ klights[light_index].spot.cos_half_larger_spread = inversesqrtf( 1.0f + tan_sq * fmaxf(len_u_sq, len_v_sq) / len_w_sq); + /* radius / sin(half_angle_small) */ + klights[light_index].spot.ray_segment_dp = + light->size * sqrtf(1.0f + len_w_sq / (tan_sq * fminf(len_u_sq, len_v_sq))); } klights[light_index].shader_id = shader_id; diff --git a/intern/cycles/util/math.h b/intern/cycles/util/math.h index 7d5cab7e30c..cdea258c916 100644 --- a/intern/cycles/util/math.h +++ b/intern/cycles/util/math.h @@ -1030,6 +1030,46 @@ ccl_device_inline uint32_t reverse_integer_bits(uint32_t x) #endif } +/* Check if intervals (first->x, first->y) and (second.x, second.y) intersect, and replace the + * first interval with their intersection. */ +ccl_device_inline bool intervals_intersect(ccl_private float2 *first, const float2 second) +{ + first->x = fmaxf(first->x, second.x); + first->y = fminf(first->y, second.y); + + return first->x < first->y; +} + +/* Solve quadratic equation a*x^2 + b*x + c = 0, adapted from Mitsuba 3 + * The solution is ordered so that x1 <= x2. + * Returns true if at least one solution is found. */ +ccl_device_inline bool solve_quadratic( + const float a, const float b, const float c, ccl_private float &x1, ccl_private float &x2) +{ + /* If the equation is linear, the solution is -c/b, but b has to be non-zero. */ + const bool valid_linear = (a == 0.0f) && (b != 0.0f); + x1 = x2 = -c / b; + + const float discriminant = sqr(b) - 4.0f * a * c; + /* Allow slightly negative discriminant in case of numerical precision issues. */ + const bool valid_quadratic = (a != 0.0f) && (discriminant > -1e-5f); + + if (valid_quadratic) { + /* Numerically stable version of (-b ± sqrt(discriminant)) / (2 * a), avoiding catastrophic + * cancellation when `b` is very close to `sqrt(discriminant)`, by finding the solution of + * greater magnitude which does not suffer from loss of precision, then using the identity + * x1 * x2 = c / a. */ + const float temp = -0.5f * (b + copysignf(safe_sqrtf(discriminant), b)); + const float r1 = temp / a; + const float r2 = c / temp; + + x1 = fminf(r1, r2); + x2 = fmaxf(r1, r2); + } + + return (valid_linear || valid_quadratic); +} + CCL_NAMESPACE_END #endif /* __UTIL_MATH_H__ */ diff --git a/intern/cycles/util/math_intersect.h b/intern/cycles/util/math_intersect.h index b09cf2a4b1b..2e4b9c979f7 100644 --- a/intern/cycles/util/math_intersect.h +++ b/intern/cycles/util/math_intersect.h @@ -302,6 +302,140 @@ ccl_device bool ray_quad_intersect(float3 ray_P, return true; } +/* Find the ray segment that lies in the same side as the normal `N` of the plane. + * `P` is the vector pointing from any point on the plane to the ray origin. */ +ccl_device bool ray_plane_intersect(const float3 N, + const float3 P, + const float3 ray_D, + ccl_private float2 *t_range) +{ + const float DN = dot(ray_D, N); + + /* Distance from P to the plane. */ + const float t = -dot(P, N) / DN; + + /* Limit the range to the positive side. */ + if (DN > 0.0f) { + t_range->x = fmaxf(t_range->x, t); + } + else { + t_range->y = fminf(t_range->y, t); + } + + return t_range->x < t_range->y; +} + +/* Find the ray segment inside an axis-aligned bounding box. */ +ccl_device bool ray_aabb_intersect(const float3 bbox_min, + const float3 bbox_max, + const float3 ray_P, + const float3 ray_D, + ccl_private float2 *t_range) +{ + const float3 inv_ray_D = rcp(ray_D); + + /* Absolute distances to lower and upper box coordinates; */ + const float3 t_lower = (bbox_min - ray_P) * inv_ray_D; + const float3 t_upper = (bbox_max - ray_P) * inv_ray_D; + + /* The four t-intervals (for x-/y-/z-slabs, and ray p(t)). */ + const float4 tmins = float3_to_float4(min(t_lower, t_upper), t_range->x); + const float4 tmaxes = float3_to_float4(max(t_lower, t_upper), t_range->y); + + /* Max of mins and min of maxes. */ + const float tmin = reduce_max(tmins); + const float tmax = reduce_min(tmaxes); + + *t_range = make_float2(tmin, tmax); + + return tmin < tmax; +} + +/* Find the segment of a ray defined by P + D * t that lies inside a cylinder defined by + * (x / len_u)^2 + (y / len_v)^2 = 1. */ +ccl_device_inline bool ray_infinite_cylinder_intersect(const float3 P, + const float3 D, + const float len_u, + const float len_v, + ccl_private float2 *t_range) +{ + /* Convert to a 2D problem. */ + const float2 inv_len = 1.0f / make_float2(len_u, len_v); + float2 P_proj = float3_to_float2(P) * inv_len; + const float2 D_proj = float3_to_float2(D) * inv_len; + + /* Solve quadratic equation a*t^2 + 2b*t + c = 0. */ + const float a = dot(D_proj, D_proj); + float b = dot(P_proj, D_proj); + + /* Move ray origin closer to the cylinder to prevent precision issue when the ray is far away. */ + const float t_mid = -b / a; + P_proj += D_proj * t_mid; + + /* Recompute b from the shifted origin. */ + b = dot(P_proj, D_proj); + const float c = dot(P_proj, P_proj) - 1.0f; + + float tmin, tmax; + const bool valid = solve_quadratic(a, 2.0f * b, c, tmin, tmax); + + return valid && intervals_intersect(t_range, make_float2(tmin, tmax) + t_mid); +} + +/* * + * Find the ray segment inside a single-sided cone. + * + * \param axis: a unit-length direction around which the cone has a circular symmetry + * \param P: the vector pointing from the cone apex to the ray origin + * \param D: the direction of the ray, does not need to have unit-length + * \param cos_angle_sq: `sqr(cos(half_aperture_of_the_cone))` + * \param t_range: the lower and upper bounds between which the ray lies inside the cone + * \return whether the intersection exists and is in the provided range + * + * See https://www.geometrictools.com/Documentation/IntersectionLineCone.pdf for illustration + */ +ccl_device_inline bool ray_cone_intersect(const float3 axis, + const float3 P, + float3 D, + const float cos_angle_sq, + ccl_private float2 *t_range) +{ + if (cos_angle_sq < 1e-4f) { + /* The cone is nearly a plane. */ + return ray_plane_intersect(axis, P, D, t_range); + } + + const float inv_len = inversesqrtf(len_squared(D)); + D *= inv_len; + + const float AD = dot(axis, D); + const float AP = dot(axis, P); + + const float a = sqr(AD) - cos_angle_sq; + const float b = 2.0f * (AD * AP - cos_angle_sq * dot(D, P)); + const float c = sqr(AP) - cos_angle_sq * dot(P, P); + + float tmin = 0.0f, tmax = FLT_MAX; + bool valid = solve_quadratic(a, b, c, tmin, tmax); + + /* Check if the intersections are in the same hemisphere as the cone. */ + const bool tmin_valid = AP + tmin * AD > 0.0f; + const bool tmax_valid = AP + tmax * AD > 0.0f; + + valid &= (tmin_valid || tmax_valid); + + if (!tmax_valid) { + tmax = tmin; + tmin = 0.0f; + } + else if (!tmin_valid) { + tmin = tmax; + tmax = FLT_MAX; + } + + return valid && intervals_intersect(t_range, make_float2(tmin, tmax) * inv_len); +} + CCL_NAMESPACE_END #endif /* __UTIL_MATH_INTERSECT_H__ */ diff --git a/intern/cycles/util/transform.h b/intern/cycles/util/transform.h index 208c68dc5a1..0263be7c841 100644 --- a/intern/cycles/util/transform.h +++ b/intern/cycles/util/transform.h @@ -161,6 +161,17 @@ ccl_device_inline Transform make_transform(float a, return t; } +ccl_device_inline Transform make_transform(const float3 x, const float3 y, const float3 z) +{ + Transform t; + + t.x = float3_to_float4(x, 0.0f); + t.y = float3_to_float4(y, 0.0f); + t.z = float3_to_float4(z, 0.0f); + + return t; +} + ccl_device_inline Transform euler_to_transform(const float3 euler) { float cx = cosf(euler.x); diff --git a/tests/data b/tests/data index 5038ad7165f..00af9c65712 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit 5038ad7165fd1a77e61e0d2d6efdadd6ea7c0dfb +Subproject commit 00af9c65712b6aa78ce6eb0c62c5aafb7a867f18