Files
test/intern/cycles/device/oneapi/queue.h
Xavier Hallade 2df163a648 Fix: Cycles low performance with scenes with many shaders on Arc B570
The performance of the sorted_paths_array kernel on B570 is problematic.
Relying on local sorting+partitioning instead gives a 25% overall rendering
speedup and no regression in shade_surface when rendering Agent 327 Barbershop scene.
On Arc A770, it still gives a 2% speedup when rendering Barbershop.

Pull Request: https://projects.blender.org/blender/blender/pulls/140308
2025-06-18 08:21:19 +02:00

57 lines
1.3 KiB
C++

/* SPDX-FileCopyrightText: 2021-2022 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0 */
#pragma once
#ifdef WITH_ONEAPI
# include "device/memory.h"
# include "device/queue.h"
# include "kernel/device/oneapi/kernel.h"
# include "util/unique_ptr.h"
CCL_NAMESPACE_BEGIN
class OneapiDevice;
class device_memory;
/* Base class for OneAPI queues. */
class OneapiDeviceQueue : public DeviceQueue {
public:
explicit OneapiDeviceQueue(OneapiDevice *device);
int num_concurrent_states(const size_t state_size) const override;
int num_concurrent_busy_states(const size_t state_size) const override;
int num_sort_partitions(int max_num_paths, uint max_scene_shaders) const override;
void init_execution() override;
bool enqueue(DeviceKernel kernel,
const int kernel_work_size,
const DeviceKernelArguments &args) override;
bool synchronize() override;
void zero_to_device(device_memory &mem) override;
void copy_to_device(device_memory &mem) override;
void copy_from_device(device_memory &mem) override;
bool supports_local_atomic_sort() const override
{
return true;
}
protected:
OneapiDevice *oneapi_device_;
unique_ptr<KernelContext> kernel_context_;
};
CCL_NAMESPACE_END
#endif /* WITH_ONEAPI */