Merge branch 'blender-v4.4-release'

This commit is contained in:
Bastien Montagne
2025-03-14 18:20:26 +01:00
4 changed files with 108 additions and 118 deletions

View File

@@ -84,10 +84,19 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
ccl_private uint *lcg_state,
const int max_hits)
{
if (local_isect != nullptr) {
local_isect->num_hits = 0;
}
if (!scene_intersect_valid(ray)) {
if (local_isect) {
local_isect->num_hits = 0;
}
return false;
}
const int primitive_type = kernel_data_fetch(objects, local_object).primitive_type;
if (!(primitive_type & PRIMITIVE_TRIANGLE)) {
/* Local intersection functions are only considering triangle and motion triangle orimitives.
* If the local intersection is requested from other primitives (curve or point cloud) perform
* an early return to avoid tree travsersal with no primitive intersection. */
return false;
}
@@ -95,13 +104,8 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
float3 dir = bvh_clamp_direction(ray->D);
float3 idir = bvh_inverse_direction(dir);
if (local_isect != nullptr) {
local_isect->num_hits = 0;
}
const int object_flag = kernel_data_fetch(object_flag, local_object);
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir);
# else
@@ -127,20 +131,30 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
GET_TRAVERSAL_STACK()
void *local_geom = (void *)(kernel_data_fetch(blas_ptr, local_object));
// we don't need custom intersection functions for SSR
# ifdef HIPRT_SHARED_STACK
hiprtGeomTraversalAnyHitCustomStack<Stack> traversal((hiprtGeometry)local_geom,
ray_hip,
stack,
hiprtTraversalHintDefault,
&payload,
kernel_params.table_local_intersect,
2);
# else
hiprtGeomTraversalAnyHit traversal(
local_geom, ray_hip, table, hiprtTraversalHintDefault, &payload);
# endif
hiprtHit hit = traversal.getNextHit();
hiprtHit hit;
if (primitive_type == PRIMITIVE_MOTION_TRIANGLE) {
/* Motion triangle BVH uses custom primitives which requires custom traversal. */
hiprtGeomCustomTraversalAnyHitCustomStack<Stack> traversal((hiprtGeometry)local_geom,
ray_hip,
stack,
hiprtTraversalHintDefault,
&payload,
kernel_params.table_local_intersect,
2);
hit = traversal.getNextHit();
}
else {
hiprtGeomTraversalAnyHitCustomStack<Stack> traversal((hiprtGeometry)local_geom,
ray_hip,
stack,
hiprtTraversalHintDefault,
&payload,
kernel_params.table_local_intersect,
2);
hit = traversal.getNextHit();
}
return hit.hasHit();
}
#endif //__BVH_LOCAL__

View File

@@ -28,7 +28,6 @@ struct ShadowPayload {
struct LocalPayload {
KernelGlobals kg;
RaySelfPrimitives self;
int prim_type;
float ray_time;
int local_object;
uint max_hits;
@@ -42,56 +41,35 @@ struct LocalPayload {
RAY_RT.maxT = RAY->tmax; \
RAY_RT.minT = RAY->tmin;
# if defined(HIPRT_SHARED_STACK)
# define GET_TRAVERSAL_STACK() \
Stack stack(kg->global_stack_buffer, kg->shared_stack); \
Instance_Stack instance_stack;
# else
# define GET_TRAVERSAL_STACK()
# endif
# define GET_TRAVERSAL_STACK() \
Stack stack(kg->global_stack_buffer, kg->shared_stack); \
Instance_Stack instance_stack;
# ifdef HIPRT_SHARED_STACK
# define GET_TRAVERSAL_ANY_HIT(FUNCTION_TABLE, RAY_TYPE, RAY_TIME) \
hiprtSceneTraversalAnyHitCustomStack<Stack, Instance_Stack> traversal( \
(hiprtScene)kernel_data.device_bvh, \
ray_hip, \
stack, \
instance_stack, \
visibility, \
hiprtTraversalHintDefault, \
&payload, \
kernel_params.FUNCTION_TABLE, \
RAY_TYPE, \
RAY_TIME);
# define GET_TRAVERSAL_ANY_HIT(FUNCTION_TABLE, RAY_TYPE, RAY_TIME) \
hiprtSceneTraversalAnyHitCustomStack<Stack, Instance_Stack> traversal( \
(hiprtScene)kernel_data.device_bvh, \
ray_hip, \
stack, \
instance_stack, \
visibility, \
hiprtTraversalHintDefault, \
&payload, \
kernel_params.FUNCTION_TABLE, \
RAY_TYPE, \
RAY_TIME);
# define GET_TRAVERSAL_CLOSEST_HIT(FUNCTION_TABLE, RAY_TYPE, RAY_TIME) \
hiprtSceneTraversalClosestCustomStack<Stack, Instance_Stack> traversal( \
(hiprtScene)kernel_data.device_bvh, \
ray_hip, \
stack, \
instance_stack, \
visibility, \
hiprtTraversalHintDefault, \
&payload, \
kernel_params.FUNCTION_TABLE, \
RAY_TYPE, \
RAY_TIME);
# else
# define GET_TRAVERSAL_ANY_HIT(FUNCTION_TABLE) \
hiprtSceneTraversalAnyHit traversal(kernel_data.device_bvh, \
ray_hip, \
visibility, \
FUNCTION_TABLE, \
hiprtTraversalHintDefault, \
&payload);
# define GET_TRAVERSAL_CLOSEST_HIT(FUNCTION_TABLE) \
hiprtSceneTraversalClosest traversal(kernel_data.device_bvh, \
ray_hip, \
visibility, \
FUNCTION_TABLE, \
hiprtTraversalHintDefault, \
&payload);
# endif
# define GET_TRAVERSAL_CLOSEST_HIT(FUNCTION_TABLE, RAY_TYPE, RAY_TIME) \
hiprtSceneTraversalClosestCustomStack<Stack, Instance_Stack> traversal( \
(hiprtScene)kernel_data.device_bvh, \
ray_hip, \
stack, \
instance_stack, \
visibility, \
hiprtTraversalHintDefault, \
&payload, \
kernel_params.FUNCTION_TABLE, \
RAY_TYPE, \
RAY_TIME);
ccl_device_inline void set_intersect_point(KernelGlobals kg,
hiprtHit &hit,
@@ -243,23 +221,19 @@ ccl_device_inline bool motion_triangle_custom_local_intersect(const hiprtRay &ra
LocalIntersection *local_isect = local_payload->local_isect;
bool b_hit = motion_triangle_intersect_local(kg,
local_isect,
ray.origin,
ray.direction,
local_payload->ray_time,
object_id,
prim_id_global,
prim_id_local,
ray.minT,
ray.maxT,
local_payload->lcg_state,
local_payload->max_hits);
return motion_triangle_intersect_local(kg,
local_isect,
ray.origin,
ray.direction,
local_payload->ray_time,
object_id,
prim_id_global,
prim_id_local,
ray.minT,
ray.maxT,
local_payload->lcg_state,
local_payload->max_hits);
if (b_hit) {
local_payload->prim_type = PRIMITIVE_MOTION_TRIANGLE;
}
return b_hit;
# else
return false;
# endif
@@ -585,18 +559,33 @@ ccl_device_inline bool local_intersection_filter(const hiprtRay &ray,
# ifdef __BVH_LOCAL__
LocalPayload *payload = (LocalPayload *)user_data;
KernelGlobals kg = payload->kg;
int object_id = payload->local_object;
int prim_offset = kernel_data_fetch(object_prim_offset, object_id);
int prim = hit.primID + prim_offset;
const int object_id = payload->local_object;
const uint max_hits = payload->max_hits;
/* Triangle primitive uses hardware intersection, other primitives do custom intersection
* which does reservoir samlping for intersections. For the custom primitives only check
* whether we can stop travsersal early on. The rest of the checks here only do for the
* regular triangles. */
const int primitive_type = kernel_data_fetch(objects, object_id).primitive_type;
if (primitive_type != PRIMITIVE_TRIANGLE) {
if (max_hits == 0) {
return false;
}
return true;
}
const int prim_offset = kernel_data_fetch(object_prim_offset, object_id);
const int prim = hit.primID + prim_offset;
# ifndef __RAY_OFFSET__
if (intersection_skip_self_local(payload->self, prim)) {
return true; // continue search
}
# endif
uint max_hits = payload->max_hits;
if (max_hits == 0) {
return false; // stop search
}
int hit_index = 0;
if (payload->lcg_state) {
for (int i = min(max_hits, payload->local_isect->num_hits) - 1; i >= 0; --i) {
@@ -618,19 +607,20 @@ ccl_device_inline bool local_intersection_filter(const hiprtRay &ray,
}
payload->local_isect->num_hits = 1;
}
Intersection *isect = &payload->local_isect->hits[hit_index];
isect->t = hit.t;
isect->prim = prim;
isect->object = object_id;
isect->type = PRIMITIVE_TRIANGLE; // kernel_data_fetch(__objects, object_id).primitive_type;
isect->u = hit.uv.x;
isect->v = hit.uv.y;
isect->prim = prim;
isect->object = object_id;
isect->type = primitive_type;
payload->local_isect->Ng[hit_index] = hit.normal;
return true;
# else
return false;
# endif
}

View File

@@ -14,8 +14,6 @@
#include "util/color.h" // IWYU pragma: export
#include "util/texture.h" // IWYU pragma: export
#define HIPRT_SHARED_STACK
/* The size of global stack available to each thread (memory reserved for each thread in
* global_stack_buffer). */
#define HIPRT_THREAD_STACK_SIZE 64
@@ -36,29 +34,20 @@ CCL_NAMESPACE_BEGIN
struct KernelGlobalsGPU {
hiprtGlobalStackBuffer global_stack_buffer;
#ifdef HIPRT_SHARED_STACK
hiprtSharedStackBuffer shared_stack;
#endif
};
using KernelGlobals = ccl_global KernelGlobalsGPU *ccl_restrict;
#if defined(HIPRT_SHARED_STACK)
/* This macro allocates shared memory and to pass the shared memory down to intersection functions
* KernelGlobals is used. */
# define HIPRT_INIT_KERNEL_GLOBAL() \
ccl_gpu_shared int shared_stack[HIPRT_SHARED_STACK_SIZE * HIPRT_THREAD_GROUP_SIZE]; \
ccl_global KernelGlobalsGPU kg_gpu; \
KernelGlobals kg = &kg_gpu; \
kg->shared_stack.stackData = &shared_stack[0]; \
kg->shared_stack.stackSize = HIPRT_SHARED_STACK_SIZE; \
kg->global_stack_buffer = stack_buffer;
#else
# define HIPRT_INIT_KERNEL_GLOBAL() \
KernelGlobals kg = nullptr; \
kg->global_stack_buffer = stack_buffer;
#endif
#define HIPRT_INIT_KERNEL_GLOBAL() \
ccl_gpu_shared int shared_stack[HIPRT_SHARED_STACK_SIZE * HIPRT_THREAD_GROUP_SIZE]; \
ccl_global KernelGlobalsGPU kg_gpu; \
KernelGlobals kg = &kg_gpu; \
kg->shared_stack.stackData = &shared_stack[0]; \
kg->shared_stack.stackSize = HIPRT_SHARED_STACK_SIZE; \
kg->global_stack_buffer = stack_buffer;
struct KernelParamsHIPRT {
KernelData data;
@@ -149,11 +138,8 @@ enum Filter_Function_Table_Index {
#ifdef __KERNEL_GPU__
__constant__ KernelParamsHIPRT kernel_params;
# ifdef HIPRT_SHARED_STACK
typedef hiprtGlobalStack Stack;
typedef hiprtEmptyInstanceStack Instance_Stack;
# endif
#endif
/* Abstraction macros */

View File

@@ -588,7 +588,7 @@ static void write_jpeg(jpeg_compress_struct *cinfo, ImBuf *ibuf)
* which isn't needed by JPEG but #BLI_snprintf_rlen requires it. */
const size_t text_length_required = 7 + 2 + strlen(prop->name) + strlen(IDP_String(prop)) +
1;
if (text_length_required <= static_text_size) {
if (text_length_required > static_text_size) {
text = MEM_malloc_arrayN<char>(text_length_required, "jpeg metadata field");
text_size = text_length_required;
}