diff --git a/intern/cycles/device/metal/bvh.h b/intern/cycles/device/metal/bvh.h index 519cbf00294..5448a3ae41d 100644 --- a/intern/cycles/device/metal/bvh.h +++ b/intern/cycles/device/metal/bvh.h @@ -21,6 +21,7 @@ class BVHMetal : public BVH { API_AVAILABLE(macos(11.0)) vector> blas_array; + vector blas_lookup; bool motion_blur = false; diff --git a/intern/cycles/device/metal/bvh.mm b/intern/cycles/device/metal/bvh.mm index a7fd64d3c98..c692b762d86 100644 --- a/intern/cycles/device/metal/bvh.mm +++ b/intern/cycles/device/metal/bvh.mm @@ -816,6 +816,11 @@ bool BVHMetal::build_TLAS(Progress &progress, uint32_t instance_index = 0; uint32_t motion_transform_index = 0; + + // allocate look up buffer for wost case scenario + uint64_t count = objects.size(); + blas_lookup.resize(count); + for (Object *ob : objects) { /* Skip non-traceable objects */ if (!ob->is_traceable()) @@ -843,12 +848,15 @@ bool BVHMetal::build_TLAS(Progress &progress, /* Set user instance ID to object index */ int object_index = ob->get_device_index(); uint32_t user_id = uint32_t(object_index); + int currIndex = instance_index++; + assert(user_id < blas_lookup.size()); + blas_lookup[user_id] = accel_struct_index; /* Bake into the appropriate descriptor */ if (motion_blur) { MTLAccelerationStructureMotionInstanceDescriptor *instances = (MTLAccelerationStructureMotionInstanceDescriptor *)[instanceBuf contents]; - MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[instance_index++]; + MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[currIndex]; desc.accelerationStructureIndex = accel_struct_index; desc.userID = user_id; @@ -894,7 +902,7 @@ bool BVHMetal::build_TLAS(Progress &progress, else { MTLAccelerationStructureUserIDInstanceDescriptor *instances = (MTLAccelerationStructureUserIDInstanceDescriptor *)[instanceBuf contents]; - MTLAccelerationStructureUserIDInstanceDescriptor &desc = instances[instance_index++]; + MTLAccelerationStructureUserIDInstanceDescriptor &desc = instances[currIndex]; desc.accelerationStructureIndex = accel_struct_index; desc.userID = user_id; diff --git a/intern/cycles/device/metal/device_impl.h b/intern/cycles/device/metal/device_impl.h index a10962b4e45..2b89ebf19c9 100644 --- a/intern/cycles/device/metal/device_impl.h +++ b/intern/cycles/device/metal/device_impl.h @@ -74,6 +74,11 @@ class MetalDevice : public Device { id texture_bindings_3d = nil; std::vector> texture_slot_map; + /* BLAS encoding & lookup */ + id mtlBlasArgEncoder = nil; + id blas_buffer = nil; + id blas_lookup_buffer = nil; + bool use_metalrt = false; MetalPipelineType kernel_specialization_level = PSO_GENERIC; diff --git a/intern/cycles/device/metal/device_impl.mm b/intern/cycles/device/metal/device_impl.mm index 35298822e41..aadf5e02934 100644 --- a/intern/cycles/device/metal/device_impl.mm +++ b/intern/cycles/device/metal/device_impl.mm @@ -192,6 +192,10 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile arg_desc_as.dataType = MTLDataTypeInstanceAccelerationStructure; arg_desc_as.access = MTLArgumentAccessReadOnly; + MTLArgumentDescriptor *arg_desc_ptrs = [[MTLArgumentDescriptor alloc] init]; + arg_desc_ptrs.dataType = MTLDataTypePointer; + arg_desc_ptrs.access = MTLArgumentAccessReadOnly; + MTLArgumentDescriptor *arg_desc_ift = [[MTLArgumentDescriptor alloc] init]; arg_desc_ift.dataType = MTLDataTypeIntersectionFunctionTable; arg_desc_ift.access = MTLArgumentAccessReadOnly; @@ -204,14 +208,28 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile [ancillary_desc addObject:[arg_desc_ift copy]]; /* ift_shadow */ arg_desc_ift.index = index++; [ancillary_desc addObject:[arg_desc_ift copy]]; /* ift_local */ + arg_desc_ift.index = index++; + [ancillary_desc addObject:[arg_desc_ift copy]]; /* ift_local_prim */ + arg_desc_ptrs.index = index++; + [ancillary_desc addObject:[arg_desc_ptrs copy]]; /* blas array */ + arg_desc_ptrs.index = index++; + [ancillary_desc addObject:[arg_desc_ptrs copy]]; /* look up table for blas */ [arg_desc_ift release]; [arg_desc_as release]; + [arg_desc_ptrs release]; } } mtlAncillaryArgEncoder = [mtlDevice newArgumentEncoderWithArguments:ancillary_desc]; + // preparing the blas arg encoder + MTLArgumentDescriptor *arg_desc_blas = [[MTLArgumentDescriptor alloc] init]; + arg_desc_blas.dataType = MTLDataTypeInstanceAccelerationStructure; + arg_desc_blas.access = MTLArgumentAccessReadOnly; + mtlBlasArgEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_blas ]]; + [arg_desc_blas release]; + for (int i = 0; i < ancillary_desc.count; i++) { [ancillary_desc[i] release]; } @@ -1240,6 +1258,33 @@ void MetalDevice::build_bvh(BVH *bvh, Progress &progress, bool refit) if (@available(macos 11.0, *)) { if (bvh->params.top_level) { bvhMetalRT = bvh_metal; + + // allocate required buffers for BLAS array + uint64_t count = bvhMetalRT->blas_array.size(); + uint64_t bufferSize = mtlBlasArgEncoder.encodedLength * count; + blas_buffer = [mtlDevice newBufferWithLength:bufferSize options:default_storage_mode]; + stats.mem_alloc(blas_buffer.allocatedSize); + + for (uint64_t i = 0; i < count; ++i) { + [mtlBlasArgEncoder setArgumentBuffer:blas_buffer + offset:i * mtlBlasArgEncoder.encodedLength]; + [mtlBlasArgEncoder setAccelerationStructure:bvhMetalRT->blas_array[i] atIndex:0]; + } + + count = bvhMetalRT->blas_lookup.size(); + bufferSize = sizeof(uint32_t) * count; + blas_lookup_buffer = [mtlDevice newBufferWithLength:bufferSize + options:default_storage_mode]; + stats.mem_alloc(blas_lookup_buffer.allocatedSize); + + memcpy([blas_lookup_buffer contents], + bvhMetalRT -> blas_lookup.data(), + blas_lookup_buffer.allocatedSize); + + if (default_storage_mode == MTLResourceStorageModeManaged) { + [blas_buffer didModifyRange:NSMakeRange(0, blas_buffer.length)]; + [blas_lookup_buffer didModifyRange:NSMakeRange(0, blas_lookup_buffer.length)]; + } } } } diff --git a/intern/cycles/device/metal/kernel.h b/intern/cycles/device/metal/kernel.h index 212671f52a0..0225c5c4947 100644 --- a/intern/cycles/device/metal/kernel.h +++ b/intern/cycles/device/metal/kernel.h @@ -19,6 +19,8 @@ enum { METALRT_FUNC_SHADOW_BOX, METALRT_FUNC_LOCAL_TRI, METALRT_FUNC_LOCAL_BOX, + METALRT_FUNC_LOCAL_TRI_PRIM, + METALRT_FUNC_LOCAL_BOX_PRIM, METALRT_FUNC_CURVE_RIBBON, METALRT_FUNC_CURVE_RIBBON_SHADOW, METALRT_FUNC_CURVE_ALL, @@ -28,7 +30,13 @@ enum { METALRT_FUNC_NUM }; -enum { METALRT_TABLE_DEFAULT, METALRT_TABLE_SHADOW, METALRT_TABLE_LOCAL, METALRT_TABLE_NUM }; +enum { + METALRT_TABLE_DEFAULT, + METALRT_TABLE_SHADOW, + METALRT_TABLE_LOCAL, + METALRT_TABLE_LOCAL_PRIM, + METALRT_TABLE_NUM +}; /* Pipeline State Object types */ enum MetalPipelineType { diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm index 2ed230ee657..d9e977f1ed6 100644 --- a/intern/cycles/device/metal/kernel.mm +++ b/intern/cycles/device/metal/kernel.mm @@ -524,6 +524,8 @@ void MetalKernelPipeline::compile() "__anyhit__cycles_metalrt_shadow_all_hit_box", "__anyhit__cycles_metalrt_local_hit_tri", "__anyhit__cycles_metalrt_local_hit_box", + "__anyhit__cycles_metalrt_local_hit_tri_prim", + "__anyhit__cycles_metalrt_local_hit_box_prim", "__intersection__curve_ribbon", "__intersection__curve_ribbon_shadow", "__intersection__curve_all", @@ -614,11 +616,17 @@ void MetalKernelPipeline::compile() rt_intersection_function[METALRT_FUNC_LOCAL_BOX], rt_intersection_function[METALRT_FUNC_LOCAL_BOX], nil]; + table_functions[METALRT_TABLE_LOCAL_PRIM] = [NSArray + arrayWithObjects:rt_intersection_function[METALRT_FUNC_LOCAL_TRI_PRIM], + rt_intersection_function[METALRT_FUNC_LOCAL_BOX_PRIM], + rt_intersection_function[METALRT_FUNC_LOCAL_BOX_PRIM], + nil]; NSMutableSet *unique_functions = [NSMutableSet setWithArray:table_functions[METALRT_TABLE_DEFAULT]]; [unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_SHADOW]]; [unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_LOCAL]]; + [unique_functions addObjectsFromArray:table_functions[METALRT_TABLE_LOCAL_PRIM]]; if (kernel_has_intersection(device_kernel)) { linked_functions = [[NSArray arrayWithArray:[unique_functions allObjects]] diff --git a/intern/cycles/device/metal/queue.mm b/intern/cycles/device/metal/queue.mm index 9137e9b1fb0..b824b75ccf4 100644 --- a/intern/cycles/device/metal/queue.mm +++ b/intern/cycles/device/metal/queue.mm @@ -482,6 +482,12 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel, if (metal_device_->bvhMetalRT) { id accel_struct = metal_device_->bvhMetalRT->accel_struct; [metal_device_->mtlAncillaryArgEncoder setAccelerationStructure:accel_struct atIndex:2]; + [metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->blas_buffer + offset:0 + atIndex:7]; + [metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->blas_lookup_buffer + offset:0 + atIndex:8]; } for (int table = 0; table < METALRT_TABLE_NUM; table++) { @@ -532,6 +538,10 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel, if (bvhMetalRT) { /* Mark all Accelerations resources as used */ [mtlComputeCommandEncoder useResource:bvhMetalRT->accel_struct usage:MTLResourceUsageRead]; + [mtlComputeCommandEncoder useResource:metal_device_->blas_buffer + usage:MTLResourceUsageRead]; + [mtlComputeCommandEncoder useResource:metal_device_->blas_lookup_buffer + usage:MTLResourceUsageRead]; [mtlComputeCommandEncoder useResources:bvhMetalRT->blas_array.data() count:bvhMetalRT->blas_array.size() usage:MTLResourceUsageRead]; diff --git a/intern/cycles/kernel/device/metal/bvh.h b/intern/cycles/kernel/device/metal/bvh.h index 2ea2d9c2601..f363b02ff1c 100644 --- a/intern/cycles/kernel/device/metal/bvh.h +++ b/intern/cycles/kernel/device/metal/bvh.h @@ -172,17 +172,14 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg, kernel_assert(!"Invalid ift_local"); return false; } -# endif - - metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax); - metalrt_intersector_type metalrt_intersect; - - metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque); - - bool triangle_only = !kernel_data.bvh.have_curves && !kernel_data.bvh.have_points; - if (triangle_only) { - metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle); + if (is_null_intersection_function_table(metal_ancillaries->ift_local_prim)) { + if (local_isect) { + local_isect->num_hits = 0; + } + kernel_assert(!"Invalid ift_local_prim"); + return false; } +# endif MetalRTIntersectionLocalPayload payload; payload.self = ray->self; @@ -195,14 +192,48 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg, } payload.result = false; - typename metalrt_intersector_type::result_type intersection; + metal::raytracing::ray r(ray->P, ray->D, ray->tmin, ray->tmax); # if defined(__METALRT_MOTION__) + metalrt_intersector_type metalrt_intersect; + typename metalrt_intersector_type::result_type intersection; + + metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque); + bool triangle_only = !kernel_data.bvh.have_curves && !kernel_data.bvh.have_points; + if (triangle_only) { + metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle); + } + intersection = metalrt_intersect.intersect( r, metal_ancillaries->accel_struct, 0xFF, ray->time, metal_ancillaries->ift_local, payload); # else + + metalrt_blas_intersector_type metalrt_intersect; + typename metalrt_blas_intersector_type::result_type intersection; + + metalrt_intersect.force_opacity(metal::raytracing::forced_opacity::non_opaque); + bool triangle_only = !kernel_data.bvh.have_curves && !kernel_data.bvh.have_points; + if (triangle_only) { + metalrt_intersect.assume_geometry_type(metal::raytracing::geometry_type::triangle); + } + + // if we know we are going to get max one hit, like for random-sss-walk we can + // optimize and accept the first hit + if (max_hits == 1) { + metalrt_intersect.accept_any_intersection(true); + } + + int blas_index = metal_ancillaries->blas_userID_to_index_lookUp[local_object]; + // transform the ray into object's local space + Transform itfm = kernel_data_fetch(objects, local_object).itfm; + r.origin = transform_point(&itfm, r.origin); + r.direction = transform_direction(&itfm, r.direction); + intersection = metalrt_intersect.intersect( - r, metal_ancillaries->accel_struct, 0xFF, metal_ancillaries->ift_local, payload); + r, + metal_ancillaries->blas_accel_structs[blas_index].blas, + metal_ancillaries->ift_local_prim, + payload); # endif if (lcg_state) { diff --git a/intern/cycles/kernel/device/metal/compat.h b/intern/cycles/kernel/device/metal/compat.h index 9f2a393f9fe..317bdc2eaae 100644 --- a/intern/cycles/kernel/device/metal/compat.h +++ b/intern/cycles/kernel/device/metal/compat.h @@ -266,13 +266,25 @@ ccl_device_forceinline uchar4 make_uchar4(const uchar x, # if defined(__METALRT_MOTION__) # define METALRT_TAGS instancing, instance_motion, primitive_motion +# define METALRT_BLAS_TAGS , primitive_motion # else # define METALRT_TAGS instancing +# define METALRT_BLAS_TAGS # endif /* __METALRT_MOTION__ */ typedef acceleration_structure metalrt_as_type; typedef intersection_function_table metalrt_ift_type; typedef metal::raytracing::intersector metalrt_intersector_type; +# if defined(__METALRT_MOTION__) +typedef acceleration_structure metalrt_blas_as_type; +typedef intersection_function_table metalrt_blas_ift_type; +typedef metal::raytracing::intersector + metalrt_blas_intersector_type; +# else +typedef acceleration_structure<> metalrt_blas_as_type; +typedef intersection_function_table metalrt_blas_ift_type; +typedef metal::raytracing::intersector metalrt_blas_intersector_type; +# endif #endif /* __METALRT__ */ @@ -285,6 +297,12 @@ struct Texture3DParamsMetal { texture3d tex; }; +#ifdef __METALRT__ +struct MetalRTBlasWrapper { + metalrt_blas_as_type blas; +}; +#endif + struct MetalAncillaries { device Texture2DParamsMetal *textures_2d; device Texture3DParamsMetal *textures_3d; @@ -294,6 +312,9 @@ struct MetalAncillaries { metalrt_ift_type ift_default; metalrt_ift_type ift_shadow; metalrt_ift_type ift_local; + metalrt_blas_ift_type ift_local_prim; + constant MetalRTBlasWrapper *blas_accel_structs; + constant int *blas_userID_to_index_lookUp; #endif }; diff --git a/intern/cycles/kernel/device/metal/kernel.metal b/intern/cycles/kernel/device/metal/kernel.metal index 9424e3506bc..497d4ecac37 100644 --- a/intern/cycles/kernel/device/metal/kernel.metal +++ b/intern/cycles/kernel/device/metal/kernel.metal @@ -139,6 +139,20 @@ TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal, #endif } +[[intersection(triangle, triangle_data )]] TriangleIntersectionResult +__anyhit__cycles_metalrt_local_hit_tri_prim( + constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload [[payload]], + uint primitive_id [[primitive_id]], + float2 barycentrics [[barycentric_coord]], + float ray_tmax [[distance]]) +{ + //instance_id, aka the user_id has been removed. If we take this function we optimized the + //SSS for starting traversal from a primitive acceleration structure instead of the root of the global AS. + //this means we will always be intersecting the correct object no need for the userid to check + return metalrt_local_hit( + launch_params_metal, payload, payload.local_object, primitive_id, barycentrics, ray_tmax); +} [[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult __anyhit__cycles_metalrt_local_hit_tri( constant KernelParamsMetal &launch_params_metal [[buffer(1)]], @@ -163,6 +177,17 @@ __anyhit__cycles_metalrt_local_hit_box(const float ray_tmax [[max_distance]]) return result; } +[[intersection(bounding_box, triangle_data )]] BoundingBoxIntersectionResult +__anyhit__cycles_metalrt_local_hit_box_prim(const float ray_tmax [[max_distance]]) +{ + /* unused function */ + BoundingBoxIntersectionResult result; + result.distance = ray_tmax; + result.accept = false; + result.continue_search = false; + return result; +} + template bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal, ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,