Cycles: Add optional per-kernel performance statistics
When verbose level 4 is enabled, Blender prints kernel performance data for Cycles on GPU backends (except Metal that doesn't use debug_enqueue_* methods) for groups of kernels. These changes introduce a new CYCLES_DEBUG_PER_KERNEL_PERFORMANCE environment variable to allow getting timings for each kernels separately and not grouped with others. This is done by adding explicit synchronization after each kernel execution. Differential Revision: https://developer.blender.org/D15971
This commit is contained in:
@@ -77,7 +77,7 @@ bool OneapiDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
|
||||
void **args = const_cast<void **>(_args.values);
|
||||
|
||||
debug_enqueue(kernel, signed_kernel_work_size);
|
||||
debug_enqueue_begin(kernel, signed_kernel_work_size);
|
||||
assert(signed_kernel_work_size >= 0);
|
||||
size_t kernel_work_size = (size_t)signed_kernel_work_size;
|
||||
|
||||
@@ -97,6 +97,8 @@ bool OneapiDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
oneapi_device_->oneapi_error_message() + "\"");
|
||||
}
|
||||
|
||||
debug_enqueue_end();
|
||||
|
||||
return is_finished_ok;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user