Cycles: Fix hanging unit tests when MetalRT is enabled
This patch fixes hanging unit tests when MetalRT is enabled. It simplifies and fixes the kernel selection logic by baking the MetalRT-specific options into `kernels_md5` rather than expanding out and testing MetalRT bit flags explicitly. Pull Request #105270
This commit is contained in:
committed by
Michael Jones (Apple)
parent
9fccd2dc24
commit
7842347ec8
@@ -182,7 +182,7 @@ class Device {
|
||||
{
|
||||
}
|
||||
|
||||
/* Return true if device is ready for rendering, or report status if not. */
|
||||
/* Report status and return true if device is ready for rendering. */
|
||||
virtual bool is_ready(string & /*status*/) const
|
||||
{
|
||||
return true;
|
||||
|
||||
@@ -476,6 +476,9 @@ bool MetalDevice::make_source_and_check_if_compile_needed(MetalPipelineType pso_
|
||||
MD5Hash md5;
|
||||
md5.append(constant_values);
|
||||
md5.append(source[pso_type]);
|
||||
if (use_metalrt) {
|
||||
md5.append(string_printf("metalrt_features=%d", kernel_features & METALRT_FEATURE_MASK));
|
||||
}
|
||||
kernels_md5[pso_type] = md5.get_hex();
|
||||
|
||||
return MetalDeviceKernels::should_load_kernels(this, pso_type);
|
||||
@@ -917,6 +920,17 @@ bool MetalDevice::is_ready(string &status) const
|
||||
DEVICE_KERNEL_NUM);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (int num_requests = MetalDeviceKernels::num_incomplete_specialization_requests()) {
|
||||
status = string_printf("%d kernels to optimize", num_requests);
|
||||
}
|
||||
else if (kernel_specialization_level == PSO_SPECIALIZED_INTERSECT) {
|
||||
status = "Using optimized intersection kernels";
|
||||
}
|
||||
else if (kernel_specialization_level == PSO_SPECIALIZED_SHADE) {
|
||||
status = "Using optimized kernels";
|
||||
}
|
||||
|
||||
metal_printf("MetalDevice::is_ready(...) --> true\n");
|
||||
return true;
|
||||
}
|
||||
@@ -953,7 +967,7 @@ void MetalDevice::optimize_for_scene(Scene *scene)
|
||||
}
|
||||
|
||||
if (specialize_in_background) {
|
||||
if (!MetalDeviceKernels::any_specialization_happening_now()) {
|
||||
if (MetalDeviceKernels::num_incomplete_specialization_requests() == 0) {
|
||||
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0),
|
||||
specialize_kernels_fn);
|
||||
}
|
||||
|
||||
@@ -63,8 +63,7 @@ enum MetalPipelineType {
|
||||
};
|
||||
|
||||
# define METALRT_FEATURE_MASK \
|
||||
(KERNEL_FEATURE_HAIR | KERNEL_FEATURE_HAIR_THICK | KERNEL_FEATURE_POINTCLOUD | \
|
||||
KERNEL_FEATURE_OBJECT_MOTION)
|
||||
(KERNEL_FEATURE_HAIR | KERNEL_FEATURE_HAIR_THICK | KERNEL_FEATURE_POINTCLOUD)
|
||||
|
||||
const char *kernel_type_as_string(MetalPipelineType pso_type);
|
||||
|
||||
@@ -81,7 +80,7 @@ struct MetalKernelPipeline {
|
||||
|
||||
KernelData kernel_data_;
|
||||
bool use_metalrt;
|
||||
uint32_t metalrt_features = 0;
|
||||
uint32_t kernel_features = 0;
|
||||
|
||||
int threads_per_threadgroup;
|
||||
|
||||
@@ -104,7 +103,7 @@ struct MetalKernelPipeline {
|
||||
/* Cache of Metal kernels for each DeviceKernel. */
|
||||
namespace MetalDeviceKernels {
|
||||
|
||||
bool any_specialization_happening_now();
|
||||
int num_incomplete_specialization_requests();
|
||||
int get_loaded_kernel_count(MetalDevice const *device, MetalPipelineType pso_type);
|
||||
bool should_load_kernels(MetalDevice const *device, MetalPipelineType pso_type);
|
||||
bool load(MetalDevice *device, MetalPipelineType pso_type);
|
||||
|
||||
@@ -344,9 +344,7 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
|
||||
|
||||
/* metalrt options */
|
||||
pipeline->use_metalrt = device->use_metalrt;
|
||||
pipeline->metalrt_features = device->use_metalrt ?
|
||||
(device->kernel_features & METALRT_FEATURE_MASK) :
|
||||
0;
|
||||
pipeline->kernel_features = device->kernel_features;
|
||||
|
||||
{
|
||||
thread_scoped_lock lock(cache_mutex);
|
||||
@@ -357,65 +355,36 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
|
||||
|
||||
MetalKernelPipeline *ShaderCache::get_best_pipeline(DeviceKernel kernel, const MetalDevice *device)
|
||||
{
|
||||
/* metalrt options */
|
||||
bool use_metalrt = device->use_metalrt;
|
||||
bool device_metalrt_hair = use_metalrt && device->kernel_features & KERNEL_FEATURE_HAIR;
|
||||
bool device_metalrt_hair_thick = use_metalrt &&
|
||||
device->kernel_features & KERNEL_FEATURE_HAIR_THICK;
|
||||
bool device_metalrt_pointcloud = use_metalrt &&
|
||||
device->kernel_features & KERNEL_FEATURE_POINTCLOUD;
|
||||
bool device_metalrt_motion = use_metalrt &&
|
||||
device->kernel_features & KERNEL_FEATURE_OBJECT_MOTION;
|
||||
|
||||
MetalKernelPipeline *best_pipeline = nullptr;
|
||||
while (!best_pipeline) {
|
||||
while (running) {
|
||||
/* Search all loaded pipelines with matching kernels_md5 checksums. */
|
||||
MetalKernelPipeline *best_match = nullptr;
|
||||
{
|
||||
thread_scoped_lock lock(cache_mutex);
|
||||
for (auto &pipeline : pipelines[kernel]) {
|
||||
if (!pipeline->loaded) {
|
||||
/* still loading - ignore */
|
||||
continue;
|
||||
}
|
||||
|
||||
bool pipeline_metalrt_hair = pipeline->metalrt_features & KERNEL_FEATURE_HAIR;
|
||||
bool pipeline_metalrt_hair_thick = pipeline->metalrt_features & KERNEL_FEATURE_HAIR_THICK;
|
||||
bool pipeline_metalrt_pointcloud = pipeline->metalrt_features & KERNEL_FEATURE_POINTCLOUD;
|
||||
bool pipeline_metalrt_motion = use_metalrt &&
|
||||
pipeline->metalrt_features & KERNEL_FEATURE_OBJECT_MOTION;
|
||||
|
||||
if (pipeline->use_metalrt != use_metalrt || pipeline_metalrt_hair != device_metalrt_hair ||
|
||||
pipeline_metalrt_hair_thick != device_metalrt_hair_thick ||
|
||||
pipeline_metalrt_pointcloud != device_metalrt_pointcloud ||
|
||||
pipeline_metalrt_motion != device_metalrt_motion) {
|
||||
/* wrong combination of metalrt options */
|
||||
continue;
|
||||
}
|
||||
|
||||
if (pipeline->pso_type != PSO_GENERIC) {
|
||||
if (pipeline->kernels_md5 == device->kernels_md5[PSO_SPECIALIZED_INTERSECT] ||
|
||||
pipeline->kernels_md5 == device->kernels_md5[PSO_SPECIALIZED_SHADE]) {
|
||||
best_pipeline = pipeline.get();
|
||||
for (auto &candidate : pipelines[kernel]) {
|
||||
if (candidate->loaded &&
|
||||
candidate->kernels_md5 == device->kernels_md5[candidate->pso_type]) {
|
||||
/* Replace existing match if candidate is more specialized. */
|
||||
if (!best_match || candidate->pso_type > best_match->pso_type) {
|
||||
best_match = candidate.get();
|
||||
}
|
||||
}
|
||||
else if (!best_pipeline) {
|
||||
best_pipeline = pipeline.get();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!best_pipeline) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
if (best_match) {
|
||||
if (best_match->usage_count == 0 && best_match->pso_type != PSO_GENERIC) {
|
||||
metal_printf("Swapping in %s version of %s\n",
|
||||
kernel_type_as_string(best_match->pso_type),
|
||||
device_kernel_as_string(kernel));
|
||||
}
|
||||
best_match->usage_count += 1;
|
||||
return best_match;
|
||||
}
|
||||
}
|
||||
|
||||
if (best_pipeline->usage_count == 0 && best_pipeline->pso_type != PSO_GENERIC) {
|
||||
metal_printf("Swapping in %s version of %s\n",
|
||||
kernel_type_as_string(best_pipeline->pso_type),
|
||||
device_kernel_as_string(kernel));
|
||||
/* Spin until a matching kernel is loaded, or we're shutting down. */
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
}
|
||||
best_pipeline->usage_count += 1;
|
||||
|
||||
return best_pipeline;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool MetalKernelPipeline::should_use_binary_archive() const
|
||||
@@ -570,18 +539,14 @@ void MetalKernelPipeline::compile()
|
||||
NSArray *table_functions[METALRT_TABLE_NUM] = {nil};
|
||||
NSArray *linked_functions = nil;
|
||||
|
||||
bool metalrt_hair = use_metalrt && (metalrt_features & KERNEL_FEATURE_HAIR);
|
||||
bool metalrt_hair_thick = use_metalrt && (metalrt_features & KERNEL_FEATURE_HAIR_THICK);
|
||||
bool metalrt_pointcloud = use_metalrt && (metalrt_features & KERNEL_FEATURE_POINTCLOUD);
|
||||
|
||||
if (use_metalrt) {
|
||||
id<MTLFunction> curve_intersect_default = nil;
|
||||
id<MTLFunction> curve_intersect_shadow = nil;
|
||||
id<MTLFunction> point_intersect_default = nil;
|
||||
id<MTLFunction> point_intersect_shadow = nil;
|
||||
if (metalrt_hair) {
|
||||
if (kernel_features & KERNEL_FEATURE_HAIR) {
|
||||
/* Add curve intersection programs. */
|
||||
if (metalrt_hair_thick) {
|
||||
if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
|
||||
/* Slower programs for thick hair since that also slows down ribbons.
|
||||
* Ideally this should not be needed. */
|
||||
curve_intersect_default = rt_intersection_function[METALRT_FUNC_CURVE_ALL];
|
||||
@@ -592,7 +557,7 @@ void MetalKernelPipeline::compile()
|
||||
curve_intersect_shadow = rt_intersection_function[METALRT_FUNC_CURVE_RIBBON_SHADOW];
|
||||
}
|
||||
}
|
||||
if (metalrt_pointcloud) {
|
||||
if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
|
||||
point_intersect_default = rt_intersection_function[METALRT_FUNC_POINT];
|
||||
point_intersect_shadow = rt_intersection_function[METALRT_FUNC_POINT_SHADOW];
|
||||
}
|
||||
@@ -682,15 +647,6 @@ void MetalKernelPipeline::compile()
|
||||
local_md5.append((uint8_t *)&this->threads_per_threadgroup,
|
||||
sizeof(this->threads_per_threadgroup));
|
||||
|
||||
string options;
|
||||
if (use_metalrt && kernel_has_intersection(device_kernel)) {
|
||||
/* incorporate any MetalRT specializations into the archive name */
|
||||
options += string_printf(".hair_%d.hair_thick_%d.pointcloud_%d",
|
||||
metalrt_hair ? 1 : 0,
|
||||
metalrt_hair_thick ? 1 : 0,
|
||||
metalrt_pointcloud ? 1 : 0);
|
||||
}
|
||||
|
||||
/* Replace non-alphanumerical characters with underscores. */
|
||||
string device_name = [mtlDevice.name UTF8String];
|
||||
for (char &c : device_name) {
|
||||
@@ -702,7 +658,7 @@ void MetalKernelPipeline::compile()
|
||||
metalbin_name = device_name;
|
||||
metalbin_name = path_join(metalbin_name, device_kernel_as_string(device_kernel));
|
||||
metalbin_name = path_join(metalbin_name, kernel_type_as_string(pso_type));
|
||||
metalbin_name = path_join(metalbin_name, local_md5.get_hex() + options + ".bin");
|
||||
metalbin_name = path_join(metalbin_name, local_md5.get_hex() + ".bin");
|
||||
|
||||
metalbin_path = path_cache_get(path_join("kernels", metalbin_name));
|
||||
path_create_directories(metalbin_path);
|
||||
@@ -860,16 +816,15 @@ void MetalDeviceKernels::wait_for_all()
|
||||
}
|
||||
}
|
||||
|
||||
bool MetalDeviceKernels::any_specialization_happening_now()
|
||||
int MetalDeviceKernels::num_incomplete_specialization_requests()
|
||||
{
|
||||
/* Return true if any ShaderCaches have ongoing specialization requests (typically there will be
|
||||
* only 1). */
|
||||
int total = 0;
|
||||
for (int i = 0; i < g_shaderCacheCount; i++) {
|
||||
if (g_shaderCache[i].second->incomplete_specialization_requests > 0) {
|
||||
return true;
|
||||
}
|
||||
total += g_shaderCache[i].second->incomplete_specialization_requests;
|
||||
}
|
||||
return false;
|
||||
return total;
|
||||
}
|
||||
|
||||
int MetalDeviceKernels::get_loaded_kernel_count(MetalDevice const *device,
|
||||
|
||||
@@ -706,6 +706,12 @@ void Session::update_status_time(bool show_pause, bool show_done)
|
||||
string_printf("Sample %d/%d", current_sample, num_samples));
|
||||
}
|
||||
|
||||
/* Append any device-specific status (such as background kernel optimization) */
|
||||
string device_status;
|
||||
if (device->is_ready(device_status) && !device_status.empty()) {
|
||||
substatus += string_printf(" (%s)", device_status.c_str());
|
||||
}
|
||||
|
||||
/* TODO(sergey): Denoising status from the path trace. */
|
||||
|
||||
if (show_pause) {
|
||||
|
||||
Reference in New Issue
Block a user