Files
test2/intern/cycles/device/device.cpp
Brecht Van Lommel 0803119725 Cycles: merge of cycles-x branch, a major update to the renderer
This includes much improved GPU rendering performance, viewport interactivity,
new shadow catcher, revamped sampling settings, subsurface scattering anisotropy,
new GPU volume sampling, improved PMJ sampling pattern, and more.

Some features have also been removed or changed, breaking backwards compatibility.
Including the removal of the OpenCL backend, for which alternatives are under
development.

Release notes and code docs:
https://wiki.blender.org/wiki/Reference/Release_Notes/3.0/Cycles
https://wiki.blender.org/wiki/Source/Render/Cycles

Credits:
* Sergey Sharybin
* Brecht Van Lommel
* Patrick Mours (OptiX backend)
* Christophe Hery (subsurface scattering anisotropy)
* William Leeson (PMJ sampling pattern)
* Alaska (various fixes and tweaks)
* Thomas Dinges (various fixes)

For the full commit history, see the cycles-x branch. This squashes together
all the changes since intermediate changes would often fail building or tests.

Ref T87839, T87837, T87836
Fixes T90734, T89353, T80267, T80267, T77185, T69800
2021-09-21 14:55:54 +02:00

347 lines
8.4 KiB
C++

/*
* Copyright 2011-2013 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdlib.h>
#include <string.h>
#include "bvh/bvh2.h"
#include "device/device.h"
#include "device/device_queue.h"
#include "device/cpu/device.h"
#include "device/cuda/device.h"
#include "device/dummy/device.h"
#include "device/multi/device.h"
#include "device/optix/device.h"
#include "util/util_foreach.h"
#include "util/util_half.h"
#include "util/util_logging.h"
#include "util/util_math.h"
#include "util/util_opengl.h"
#include "util/util_string.h"
#include "util/util_system.h"
#include "util/util_time.h"
#include "util/util_types.h"
#include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
bool Device::need_types_update = true;
bool Device::need_devices_update = true;
thread_mutex Device::device_mutex;
vector<DeviceInfo> Device::cuda_devices;
vector<DeviceInfo> Device::optix_devices;
vector<DeviceInfo> Device::cpu_devices;
uint Device::devices_initialized_mask = 0;
/* Device */
Device::~Device() noexcept(false)
{
}
void Device::build_bvh(BVH *bvh, Progress &progress, bool refit)
{
assert(bvh->params.bvh_layout == BVH_LAYOUT_BVH2);
BVH2 *const bvh2 = static_cast<BVH2 *>(bvh);
if (refit) {
bvh2->refit(progress);
}
else {
bvh2->build(progress, &stats);
}
}
Device *Device::create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
{
#ifdef WITH_MULTI
if (!info.multi_devices.empty()) {
/* Always create a multi device when info contains multiple devices.
* This is done so that the type can still be e.g. DEVICE_CPU to indicate
* that it is a homogeneous collection of devices, which simplifies checks. */
return device_multi_create(info, stats, profiler);
}
#endif
Device *device = NULL;
switch (info.type) {
case DEVICE_CPU:
device = device_cpu_create(info, stats, profiler);
break;
#ifdef WITH_CUDA
case DEVICE_CUDA:
if (device_cuda_init())
device = device_cuda_create(info, stats, profiler);
break;
#endif
#ifdef WITH_OPTIX
case DEVICE_OPTIX:
if (device_optix_init())
device = device_optix_create(info, stats, profiler);
break;
#endif
default:
break;
}
if (device == NULL) {
device = device_dummy_create(info, stats, profiler);
}
return device;
}
DeviceType Device::type_from_string(const char *name)
{
if (strcmp(name, "CPU") == 0)
return DEVICE_CPU;
else if (strcmp(name, "CUDA") == 0)
return DEVICE_CUDA;
else if (strcmp(name, "OPTIX") == 0)
return DEVICE_OPTIX;
else if (strcmp(name, "MULTI") == 0)
return DEVICE_MULTI;
return DEVICE_NONE;
}
string Device::string_from_type(DeviceType type)
{
if (type == DEVICE_CPU)
return "CPU";
else if (type == DEVICE_CUDA)
return "CUDA";
else if (type == DEVICE_OPTIX)
return "OPTIX";
else if (type == DEVICE_MULTI)
return "MULTI";
return "";
}
vector<DeviceType> Device::available_types()
{
vector<DeviceType> types;
types.push_back(DEVICE_CPU);
#ifdef WITH_CUDA
types.push_back(DEVICE_CUDA);
#endif
#ifdef WITH_OPTIX
types.push_back(DEVICE_OPTIX);
#endif
return types;
}
vector<DeviceInfo> Device::available_devices(uint mask)
{
/* Lazy initialize devices. On some platforms OpenCL or CUDA drivers can
* be broken and cause crashes when only trying to get device info, so
* we don't want to do any initialization until the user chooses to. */
thread_scoped_lock lock(device_mutex);
vector<DeviceInfo> devices;
#if defined(WITH_CUDA) || defined(WITH_OPTIX)
if (mask & (DEVICE_MASK_CUDA | DEVICE_MASK_OPTIX)) {
if (!(devices_initialized_mask & DEVICE_MASK_CUDA)) {
if (device_cuda_init()) {
device_cuda_info(cuda_devices);
}
devices_initialized_mask |= DEVICE_MASK_CUDA;
}
if (mask & DEVICE_MASK_CUDA) {
foreach (DeviceInfo &info, cuda_devices) {
devices.push_back(info);
}
}
}
#endif
#ifdef WITH_OPTIX
if (mask & DEVICE_MASK_OPTIX) {
if (!(devices_initialized_mask & DEVICE_MASK_OPTIX)) {
if (device_optix_init()) {
device_optix_info(cuda_devices, optix_devices);
}
devices_initialized_mask |= DEVICE_MASK_OPTIX;
}
foreach (DeviceInfo &info, optix_devices) {
devices.push_back(info);
}
}
#endif
if (mask & DEVICE_MASK_CPU) {
if (!(devices_initialized_mask & DEVICE_MASK_CPU)) {
device_cpu_info(cpu_devices);
devices_initialized_mask |= DEVICE_MASK_CPU;
}
foreach (DeviceInfo &info, cpu_devices) {
devices.push_back(info);
}
}
return devices;
}
DeviceInfo Device::dummy_device(const string &error_msg)
{
DeviceInfo info;
info.type = DEVICE_DUMMY;
info.error_msg = error_msg;
return info;
}
string Device::device_capabilities(uint mask)
{
thread_scoped_lock lock(device_mutex);
string capabilities = "";
if (mask & DEVICE_MASK_CPU) {
capabilities += "\nCPU device capabilities: ";
capabilities += device_cpu_capabilities() + "\n";
}
#ifdef WITH_CUDA
if (mask & DEVICE_MASK_CUDA) {
if (device_cuda_init()) {
capabilities += "\nCUDA device capabilities:\n";
capabilities += device_cuda_capabilities();
}
}
#endif
return capabilities;
}
DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
int threads,
bool background)
{
assert(subdevices.size() > 0);
if (subdevices.size() == 1) {
/* No multi device needed. */
return subdevices.front();
}
DeviceInfo info;
info.type = DEVICE_NONE;
info.id = "MULTI";
info.description = "Multi Device";
info.num = 0;
info.has_half_images = true;
info.has_nanovdb = true;
info.has_osl = true;
info.has_profiling = true;
info.has_peer_memory = false;
info.denoisers = DENOISER_ALL;
foreach (const DeviceInfo &device, subdevices) {
/* Ensure CPU device does not slow down GPU. */
if (device.type == DEVICE_CPU && subdevices.size() > 1) {
if (background) {
int orig_cpu_threads = (threads) ? threads : system_cpu_thread_count();
int cpu_threads = max(orig_cpu_threads - (subdevices.size() - 1), 0);
VLOG(1) << "CPU render threads reduced from " << orig_cpu_threads << " to " << cpu_threads
<< ", to dedicate to GPU.";
if (cpu_threads >= 1) {
DeviceInfo cpu_device = device;
cpu_device.cpu_threads = cpu_threads;
info.multi_devices.push_back(cpu_device);
}
else {
continue;
}
}
else {
VLOG(1) << "CPU render threads disabled for interactive render.";
continue;
}
}
else {
info.multi_devices.push_back(device);
}
/* Create unique ID for this combination of devices. */
info.id += device.id;
/* Set device type to MULTI if subdevices are not of a common type. */
if (info.type == DEVICE_NONE) {
info.type = device.type;
}
else if (device.type != info.type) {
info.type = DEVICE_MULTI;
}
/* Accumulate device info. */
info.has_half_images &= device.has_half_images;
info.has_nanovdb &= device.has_nanovdb;
info.has_osl &= device.has_osl;
info.has_profiling &= device.has_profiling;
info.has_peer_memory |= device.has_peer_memory;
info.denoisers &= device.denoisers;
}
return info;
}
void Device::tag_update()
{
free_memory();
}
void Device::free_memory()
{
devices_initialized_mask = 0;
cuda_devices.free_memory();
optix_devices.free_memory();
cpu_devices.free_memory();
}
unique_ptr<DeviceQueue> Device::gpu_queue_create()
{
LOG(FATAL) << "Device does not support queues.";
return nullptr;
}
const CPUKernels *Device::get_cpu_kernels() const
{
LOG(FATAL) << "Device does not support CPU kernels.";
return nullptr;
}
void Device::get_cpu_kernel_thread_globals(
vector<CPUKernelThreadGlobals> & /*kernel_thread_globals*/)
{
LOG(FATAL) << "Device does not support CPU kernels.";
}
void *Device::get_cpu_osl_memory()
{
return nullptr;
}
/* DeviceInfo */
CCL_NAMESPACE_END