This includes much improved GPU rendering performance, viewport interactivity, new shadow catcher, revamped sampling settings, subsurface scattering anisotropy, new GPU volume sampling, improved PMJ sampling pattern, and more. Some features have also been removed or changed, breaking backwards compatibility. Including the removal of the OpenCL backend, for which alternatives are under development. Release notes and code docs: https://wiki.blender.org/wiki/Reference/Release_Notes/3.0/Cycles https://wiki.blender.org/wiki/Source/Render/Cycles Credits: * Sergey Sharybin * Brecht Van Lommel * Patrick Mours (OptiX backend) * Christophe Hery (subsurface scattering anisotropy) * William Leeson (PMJ sampling pattern) * Alaska (various fixes and tweaks) * Thomas Dinges (various fixes) For the full commit history, see the cycles-x branch. This squashes together all the changes since intermediate changes would often fail building or tests. Ref T87839, T87837, T87836 Fixes T90734, T89353, T80267, T80267, T77185, T69800
347 lines
8.4 KiB
C++
347 lines
8.4 KiB
C++
/*
|
|
* Copyright 2011-2013 Blender Foundation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "bvh/bvh2.h"
|
|
|
|
#include "device/device.h"
|
|
#include "device/device_queue.h"
|
|
|
|
#include "device/cpu/device.h"
|
|
#include "device/cuda/device.h"
|
|
#include "device/dummy/device.h"
|
|
#include "device/multi/device.h"
|
|
#include "device/optix/device.h"
|
|
|
|
#include "util/util_foreach.h"
|
|
#include "util/util_half.h"
|
|
#include "util/util_logging.h"
|
|
#include "util/util_math.h"
|
|
#include "util/util_opengl.h"
|
|
#include "util/util_string.h"
|
|
#include "util/util_system.h"
|
|
#include "util/util_time.h"
|
|
#include "util/util_types.h"
|
|
#include "util/util_vector.h"
|
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
bool Device::need_types_update = true;
|
|
bool Device::need_devices_update = true;
|
|
thread_mutex Device::device_mutex;
|
|
vector<DeviceInfo> Device::cuda_devices;
|
|
vector<DeviceInfo> Device::optix_devices;
|
|
vector<DeviceInfo> Device::cpu_devices;
|
|
uint Device::devices_initialized_mask = 0;
|
|
|
|
/* Device */
|
|
|
|
Device::~Device() noexcept(false)
|
|
{
|
|
}
|
|
|
|
void Device::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
|
{
|
|
assert(bvh->params.bvh_layout == BVH_LAYOUT_BVH2);
|
|
|
|
BVH2 *const bvh2 = static_cast<BVH2 *>(bvh);
|
|
if (refit) {
|
|
bvh2->refit(progress);
|
|
}
|
|
else {
|
|
bvh2->build(progress, &stats);
|
|
}
|
|
}
|
|
|
|
Device *Device::create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
|
{
|
|
#ifdef WITH_MULTI
|
|
if (!info.multi_devices.empty()) {
|
|
/* Always create a multi device when info contains multiple devices.
|
|
* This is done so that the type can still be e.g. DEVICE_CPU to indicate
|
|
* that it is a homogeneous collection of devices, which simplifies checks. */
|
|
return device_multi_create(info, stats, profiler);
|
|
}
|
|
#endif
|
|
|
|
Device *device = NULL;
|
|
|
|
switch (info.type) {
|
|
case DEVICE_CPU:
|
|
device = device_cpu_create(info, stats, profiler);
|
|
break;
|
|
#ifdef WITH_CUDA
|
|
case DEVICE_CUDA:
|
|
if (device_cuda_init())
|
|
device = device_cuda_create(info, stats, profiler);
|
|
break;
|
|
#endif
|
|
#ifdef WITH_OPTIX
|
|
case DEVICE_OPTIX:
|
|
if (device_optix_init())
|
|
device = device_optix_create(info, stats, profiler);
|
|
break;
|
|
#endif
|
|
default:
|
|
break;
|
|
}
|
|
|
|
if (device == NULL) {
|
|
device = device_dummy_create(info, stats, profiler);
|
|
}
|
|
|
|
return device;
|
|
}
|
|
|
|
DeviceType Device::type_from_string(const char *name)
|
|
{
|
|
if (strcmp(name, "CPU") == 0)
|
|
return DEVICE_CPU;
|
|
else if (strcmp(name, "CUDA") == 0)
|
|
return DEVICE_CUDA;
|
|
else if (strcmp(name, "OPTIX") == 0)
|
|
return DEVICE_OPTIX;
|
|
else if (strcmp(name, "MULTI") == 0)
|
|
return DEVICE_MULTI;
|
|
|
|
return DEVICE_NONE;
|
|
}
|
|
|
|
string Device::string_from_type(DeviceType type)
|
|
{
|
|
if (type == DEVICE_CPU)
|
|
return "CPU";
|
|
else if (type == DEVICE_CUDA)
|
|
return "CUDA";
|
|
else if (type == DEVICE_OPTIX)
|
|
return "OPTIX";
|
|
else if (type == DEVICE_MULTI)
|
|
return "MULTI";
|
|
|
|
return "";
|
|
}
|
|
|
|
vector<DeviceType> Device::available_types()
|
|
{
|
|
vector<DeviceType> types;
|
|
types.push_back(DEVICE_CPU);
|
|
#ifdef WITH_CUDA
|
|
types.push_back(DEVICE_CUDA);
|
|
#endif
|
|
#ifdef WITH_OPTIX
|
|
types.push_back(DEVICE_OPTIX);
|
|
#endif
|
|
return types;
|
|
}
|
|
|
|
vector<DeviceInfo> Device::available_devices(uint mask)
|
|
{
|
|
/* Lazy initialize devices. On some platforms OpenCL or CUDA drivers can
|
|
* be broken and cause crashes when only trying to get device info, so
|
|
* we don't want to do any initialization until the user chooses to. */
|
|
thread_scoped_lock lock(device_mutex);
|
|
vector<DeviceInfo> devices;
|
|
|
|
#if defined(WITH_CUDA) || defined(WITH_OPTIX)
|
|
if (mask & (DEVICE_MASK_CUDA | DEVICE_MASK_OPTIX)) {
|
|
if (!(devices_initialized_mask & DEVICE_MASK_CUDA)) {
|
|
if (device_cuda_init()) {
|
|
device_cuda_info(cuda_devices);
|
|
}
|
|
devices_initialized_mask |= DEVICE_MASK_CUDA;
|
|
}
|
|
if (mask & DEVICE_MASK_CUDA) {
|
|
foreach (DeviceInfo &info, cuda_devices) {
|
|
devices.push_back(info);
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#ifdef WITH_OPTIX
|
|
if (mask & DEVICE_MASK_OPTIX) {
|
|
if (!(devices_initialized_mask & DEVICE_MASK_OPTIX)) {
|
|
if (device_optix_init()) {
|
|
device_optix_info(cuda_devices, optix_devices);
|
|
}
|
|
devices_initialized_mask |= DEVICE_MASK_OPTIX;
|
|
}
|
|
foreach (DeviceInfo &info, optix_devices) {
|
|
devices.push_back(info);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
if (mask & DEVICE_MASK_CPU) {
|
|
if (!(devices_initialized_mask & DEVICE_MASK_CPU)) {
|
|
device_cpu_info(cpu_devices);
|
|
devices_initialized_mask |= DEVICE_MASK_CPU;
|
|
}
|
|
foreach (DeviceInfo &info, cpu_devices) {
|
|
devices.push_back(info);
|
|
}
|
|
}
|
|
|
|
return devices;
|
|
}
|
|
|
|
DeviceInfo Device::dummy_device(const string &error_msg)
|
|
{
|
|
DeviceInfo info;
|
|
info.type = DEVICE_DUMMY;
|
|
info.error_msg = error_msg;
|
|
return info;
|
|
}
|
|
|
|
string Device::device_capabilities(uint mask)
|
|
{
|
|
thread_scoped_lock lock(device_mutex);
|
|
string capabilities = "";
|
|
|
|
if (mask & DEVICE_MASK_CPU) {
|
|
capabilities += "\nCPU device capabilities: ";
|
|
capabilities += device_cpu_capabilities() + "\n";
|
|
}
|
|
|
|
#ifdef WITH_CUDA
|
|
if (mask & DEVICE_MASK_CUDA) {
|
|
if (device_cuda_init()) {
|
|
capabilities += "\nCUDA device capabilities:\n";
|
|
capabilities += device_cuda_capabilities();
|
|
}
|
|
}
|
|
#endif
|
|
|
|
return capabilities;
|
|
}
|
|
|
|
DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
|
|
int threads,
|
|
bool background)
|
|
{
|
|
assert(subdevices.size() > 0);
|
|
|
|
if (subdevices.size() == 1) {
|
|
/* No multi device needed. */
|
|
return subdevices.front();
|
|
}
|
|
|
|
DeviceInfo info;
|
|
info.type = DEVICE_NONE;
|
|
info.id = "MULTI";
|
|
info.description = "Multi Device";
|
|
info.num = 0;
|
|
|
|
info.has_half_images = true;
|
|
info.has_nanovdb = true;
|
|
info.has_osl = true;
|
|
info.has_profiling = true;
|
|
info.has_peer_memory = false;
|
|
info.denoisers = DENOISER_ALL;
|
|
|
|
foreach (const DeviceInfo &device, subdevices) {
|
|
/* Ensure CPU device does not slow down GPU. */
|
|
if (device.type == DEVICE_CPU && subdevices.size() > 1) {
|
|
if (background) {
|
|
int orig_cpu_threads = (threads) ? threads : system_cpu_thread_count();
|
|
int cpu_threads = max(orig_cpu_threads - (subdevices.size() - 1), 0);
|
|
|
|
VLOG(1) << "CPU render threads reduced from " << orig_cpu_threads << " to " << cpu_threads
|
|
<< ", to dedicate to GPU.";
|
|
|
|
if (cpu_threads >= 1) {
|
|
DeviceInfo cpu_device = device;
|
|
cpu_device.cpu_threads = cpu_threads;
|
|
info.multi_devices.push_back(cpu_device);
|
|
}
|
|
else {
|
|
continue;
|
|
}
|
|
}
|
|
else {
|
|
VLOG(1) << "CPU render threads disabled for interactive render.";
|
|
continue;
|
|
}
|
|
}
|
|
else {
|
|
info.multi_devices.push_back(device);
|
|
}
|
|
|
|
/* Create unique ID for this combination of devices. */
|
|
info.id += device.id;
|
|
|
|
/* Set device type to MULTI if subdevices are not of a common type. */
|
|
if (info.type == DEVICE_NONE) {
|
|
info.type = device.type;
|
|
}
|
|
else if (device.type != info.type) {
|
|
info.type = DEVICE_MULTI;
|
|
}
|
|
|
|
/* Accumulate device info. */
|
|
info.has_half_images &= device.has_half_images;
|
|
info.has_nanovdb &= device.has_nanovdb;
|
|
info.has_osl &= device.has_osl;
|
|
info.has_profiling &= device.has_profiling;
|
|
info.has_peer_memory |= device.has_peer_memory;
|
|
info.denoisers &= device.denoisers;
|
|
}
|
|
|
|
return info;
|
|
}
|
|
|
|
void Device::tag_update()
|
|
{
|
|
free_memory();
|
|
}
|
|
|
|
void Device::free_memory()
|
|
{
|
|
devices_initialized_mask = 0;
|
|
cuda_devices.free_memory();
|
|
optix_devices.free_memory();
|
|
cpu_devices.free_memory();
|
|
}
|
|
|
|
unique_ptr<DeviceQueue> Device::gpu_queue_create()
|
|
{
|
|
LOG(FATAL) << "Device does not support queues.";
|
|
return nullptr;
|
|
}
|
|
|
|
const CPUKernels *Device::get_cpu_kernels() const
|
|
{
|
|
LOG(FATAL) << "Device does not support CPU kernels.";
|
|
return nullptr;
|
|
}
|
|
|
|
void Device::get_cpu_kernel_thread_globals(
|
|
vector<CPUKernelThreadGlobals> & /*kernel_thread_globals*/)
|
|
{
|
|
LOG(FATAL) << "Device does not support CPU kernels.";
|
|
}
|
|
|
|
void *Device::get_cpu_osl_memory()
|
|
{
|
|
return nullptr;
|
|
}
|
|
|
|
/* DeviceInfo */
|
|
|
|
CCL_NAMESPACE_END
|