Merge branch 'blender-v4.5-release'
This commit is contained in:
@@ -27,7 +27,7 @@ class MetalDeviceQueue : public DeviceQueue {
|
||||
|
||||
int num_concurrent_states(const size_t /*state_size*/) const override;
|
||||
int num_concurrent_busy_states(const size_t /*state_size*/) const override;
|
||||
int num_sort_partition_elements() const override;
|
||||
int num_sort_partitions(int max_num_paths, uint max_scene_shaders) const override;
|
||||
bool supports_local_atomic_sort() const override;
|
||||
|
||||
void init_execution() override;
|
||||
|
||||
@@ -297,9 +297,19 @@ int MetalDeviceQueue::num_concurrent_busy_states(const size_t state_size) const
|
||||
return num_concurrent_states(state_size) / 4;
|
||||
}
|
||||
|
||||
int MetalDeviceQueue::num_sort_partition_elements() const
|
||||
int MetalDeviceQueue::num_sort_partitions(int max_num_paths, uint max_scene_shaders) const
|
||||
{
|
||||
return MetalInfo::optimal_sort_partition_elements();
|
||||
int sort_partition_elements = MetalInfo::optimal_sort_partition_elements();
|
||||
/* Sort partitioning becomes less effective when more shaders are in the wavefront. In lieu of
|
||||
* a more sophisticated heuristic we simply disable sort partitioning if the shader count is
|
||||
* high.
|
||||
*/
|
||||
if (max_scene_shaders < 300 && sort_partition_elements > 0) {
|
||||
return max(max_num_paths / sort_partition_elements, 1);
|
||||
}
|
||||
else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
bool MetalDeviceQueue::supports_local_atomic_sort() const
|
||||
|
||||
@@ -42,9 +42,14 @@ int OneapiDeviceQueue::num_concurrent_busy_states(const size_t /*state_size*/) c
|
||||
return 4 * max(8 * max_num_threads, 65536);
|
||||
}
|
||||
|
||||
int OneapiDeviceQueue::num_sort_partition_elements() const
|
||||
int OneapiDeviceQueue::num_sort_partitions(int max_num_paths, uint /*max_scene_shaders*/) const
|
||||
{
|
||||
return (oneapi_device_->get_max_num_threads_per_multiprocessor() >= 128) ? 65536 : 8192;
|
||||
int sort_partition_elements = (oneapi_device_->get_max_num_threads_per_multiprocessor() >= 128) ?
|
||||
65536 :
|
||||
8192;
|
||||
/* Sort partitioning with local sorting on Intel GPUs is currently the most effective solution no
|
||||
* matter the number of shaders. */
|
||||
return max(max_num_paths / sort_partition_elements, 1);
|
||||
}
|
||||
|
||||
void OneapiDeviceQueue::init_execution()
|
||||
|
||||
@@ -27,7 +27,7 @@ class OneapiDeviceQueue : public DeviceQueue {
|
||||
|
||||
int num_concurrent_busy_states(const size_t state_size) const override;
|
||||
|
||||
int num_sort_partition_elements() const override;
|
||||
int num_sort_partitions(int max_num_paths, uint max_scene_shaders) const override;
|
||||
|
||||
void init_execution() override;
|
||||
|
||||
|
||||
@@ -99,11 +99,20 @@ class DeviceQueue {
|
||||
* value. */
|
||||
virtual int num_concurrent_busy_states(const size_t state_size) const = 0;
|
||||
|
||||
/* Number of elements in a partition of sorted shaders, that improves memory locality of
|
||||
/* Number of partitions of sorted shaders, that improves memory locality of
|
||||
* integrator state fetch at the cost of decreased coherence for shader kernel execution. */
|
||||
virtual int num_sort_partition_elements() const
|
||||
virtual int num_sort_partitions(int max_num_paths, uint max_scene_shaders) const
|
||||
{
|
||||
return 65536;
|
||||
/* Sort partitioning becomes less effective when more shaders are in the wavefront. In lieu of
|
||||
* a more sophisticated heuristic we simply disable sort partitioning if the shader count is
|
||||
* high.
|
||||
*/
|
||||
if (max_scene_shaders < 300) {
|
||||
return max(max_num_paths / 65536, 1);
|
||||
}
|
||||
else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Does device support local atomic sorting kernels (INTEGRATOR_SORT_BUCKET_PASS and
|
||||
|
||||
@@ -241,17 +241,8 @@ void PathTraceWorkGPU::alloc_integrator_queue()
|
||||
|
||||
void PathTraceWorkGPU::alloc_integrator_sorting()
|
||||
{
|
||||
/* Compute sort partitions, to balance between memory locality and coherence.
|
||||
* Sort partitioning becomes less effective when more shaders are in the wavefront. In lieu of a
|
||||
* more sophisticated heuristic we simply disable sort partitioning if the shader count is high.
|
||||
*/
|
||||
num_sort_partitions_ = 1;
|
||||
if (device_scene_->data.max_shaders < 300) {
|
||||
const int num_elements = queue_->num_sort_partition_elements();
|
||||
if (num_elements) {
|
||||
num_sort_partitions_ = max(max_num_paths_ / num_elements, 1);
|
||||
}
|
||||
}
|
||||
num_sort_partitions_ = queue_->num_sort_partitions(max_num_paths_,
|
||||
device_scene_->data.max_shaders);
|
||||
|
||||
integrator_state_gpu_.sort_partition_divisor = (int)divide_up(max_num_paths_,
|
||||
num_sort_partitions_);
|
||||
|
||||
Reference in New Issue
Block a user