From a8654a1dbea218e8e072b651f0987fbc584f693e Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Sun, 29 Dec 2024 23:13:45 +0100 Subject: [PATCH] Refactor: Cycles: Make CPU kernel globals storage more sane Pull Request: https://projects.blender.org/blender/blender/pulls/132361 --- intern/cycles/app/cycles_precompute.cpp | 12 +-- intern/cycles/device/CMakeLists.txt | 2 - intern/cycles/device/cpu/device_impl.cpp | 12 +-- intern/cycles/device/cpu/device_impl.h | 4 +- intern/cycles/device/cpu/kernel.h | 21 +++-- .../device/cpu/kernel_thread_globals.cpp | 91 ------------------ .../cycles/device/cpu/kernel_thread_globals.h | 45 --------- intern/cycles/device/device.cpp | 4 +- intern/cycles/device/device.h | 8 +- intern/cycles/device/multi/device.cpp | 2 +- intern/cycles/device/optix/device_impl.cpp | 2 +- intern/cycles/device/optix/device_impl.h | 2 +- .../cycles/integrator/path_trace_work_cpu.cpp | 36 ++++---- .../cycles/integrator/path_trace_work_cpu.h | 10 +- intern/cycles/integrator/shader_eval.cpp | 7 +- intern/cycles/kernel/CMakeLists.txt | 1 + intern/cycles/kernel/device/cpu/bvh.h | 10 +- intern/cycles/kernel/device/cpu/globals.cpp | 43 +++++++++ intern/cycles/kernel/device/cpu/globals.h | 61 ++++++++---- intern/cycles/kernel/device/cpu/kernel.cpp | 2 + intern/cycles/kernel/device/cpu/kernel_arch.h | 34 +++---- .../kernel/device/cpu/kernel_arch_impl.h | 30 +++--- .../cycles/kernel/device/cpu/kernel_avx2.cpp | 1 + .../cycles/kernel/device/cpu/kernel_sse42.cpp | 1 + intern/cycles/kernel/osl/closures.cpp | 92 +++++++++---------- intern/cycles/kernel/osl/globals.cpp | 68 +++++++------- intern/cycles/kernel/osl/globals.h | 33 ++++--- intern/cycles/kernel/osl/osl.h | 2 +- intern/cycles/kernel/osl/services.cpp | 53 +++++------ intern/cycles/kernel/osl/services.h | 6 +- intern/cycles/kernel/types.h | 2 +- intern/cycles/scene/osl.cpp | 8 +- 32 files changed, 319 insertions(+), 386 deletions(-) delete mode 100644 intern/cycles/device/cpu/kernel_thread_globals.cpp delete mode 100644 intern/cycles/device/cpu/kernel_thread_globals.h create mode 100644 intern/cycles/kernel/device/cpu/globals.cpp diff --git a/intern/cycles/app/cycles_precompute.cpp b/intern/cycles/app/cycles_precompute.cpp index e5b31771c4f..8e393db309a 100644 --- a/intern/cycles/app/cycles_precompute.cpp +++ b/intern/cycles/app/cycles_precompute.cpp @@ -19,8 +19,6 @@ CCL_NAMESPACE_BEGIN static float precompute_ggx_E(const float rough, const float mu, const float3 rand) { - KernelGlobalsCPU kg; - MicrofacetBsdf bsdf; bsdf.weight = one_float3(); bsdf.sample_weight = 1.0f; @@ -36,7 +34,7 @@ static float precompute_ggx_E(const float rough, const float mu, const float3 ra float pdf = 0.0f; float sampled_eta; float2 sampled_roughness; - bsdf_microfacet_ggx_sample(&kg, + bsdf_microfacet_ggx_sample(nullptr, (ShaderClosure *)&bsdf, make_float3(0.0f, 0.0f, 1.0f), make_float3(sqrtf(1.0f - sqr(mu)), 0.0f, mu), @@ -57,8 +55,6 @@ static float precompute_ggx_glass_E(const float rough, const float eta, const float3 rand) { - KernelGlobalsCPU kg; - MicrofacetBsdf bsdf; bsdf.weight = one_float3(); bsdf.sample_weight = 1.0f; @@ -74,7 +70,7 @@ static float precompute_ggx_glass_E(const float rough, float pdf = 0.0f; float sampled_eta; float2 sampled_roughness; - bsdf_microfacet_ggx_sample(&kg, + bsdf_microfacet_ggx_sample(nullptr, (ShaderClosure *)&bsdf, make_float3(0.0f, 0.0f, 1.0f), make_float3(sqrtf(1.0f - sqr(mu)), 0.0f, mu), @@ -93,8 +89,6 @@ static float precompute_ggx_glass_E(const float rough, static float precompute_ggx_gen_schlick_s( const float rough, const float mu, const float eta, const float exponent, const float3 rand) { - KernelGlobalsCPU kg; - MicrofacetBsdf bsdf; bsdf.weight = one_float3(); bsdf.sample_weight = 1.0f; @@ -120,7 +114,7 @@ static float precompute_ggx_gen_schlick_s( float pdf = 0.0f; float sampled_eta; float2 sampled_roughness; - bsdf_microfacet_ggx_sample(&kg, + bsdf_microfacet_ggx_sample(nullptr, (ShaderClosure *)&bsdf, make_float3(0.0f, 0.0f, 1.0f), make_float3(sqrtf(1.0f - sqr(mu)), 0.0f, mu), diff --git a/intern/cycles/device/CMakeLists.txt b/intern/cycles/device/CMakeLists.txt index e2939ef08da..431ae7a80d4 100644 --- a/intern/cycles/device/CMakeLists.txt +++ b/intern/cycles/device/CMakeLists.txt @@ -33,8 +33,6 @@ set(SRC_CPU cpu/kernel.cpp cpu/kernel.h cpu/kernel_function.h - cpu/kernel_thread_globals.cpp - cpu/kernel_thread_globals.h ) set(SRC_CUDA diff --git a/intern/cycles/device/cpu/device_impl.cpp b/intern/cycles/device/cpu/device_impl.cpp index fef9dd27296..c912eeca5b7 100644 --- a/intern/cycles/device/cpu/device_impl.cpp +++ b/intern/cycles/device/cpu/device_impl.cpp @@ -25,7 +25,6 @@ #endif #include "device/cpu/kernel.h" -#include "device/cpu/kernel_thread_globals.h" #include "device/device.h" @@ -56,9 +55,6 @@ CPUDevice::CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_ info.cpu_threads = TaskScheduler::max_concurrency(); } -#ifdef WITH_OSL - kernel_globals.osl = &osl_globals; -#endif #ifdef WITH_EMBREE embree_device = rtcNewDevice("verbose=0"); #endif @@ -296,19 +292,19 @@ void *CPUDevice::get_guiding_device() const } void CPUDevice::get_cpu_kernel_thread_globals( - vector &kernel_thread_globals) + vector &kernel_thread_globals) { /* Ensure latest texture info is loaded into kernel globals before returning. */ load_texture_info(); kernel_thread_globals.clear(); - void *osl_memory = get_cpu_osl_memory(); + OSLGlobals *osl_globals = get_cpu_osl_memory(); for (int i = 0; i < info.cpu_threads; i++) { - kernel_thread_globals.emplace_back(kernel_globals, osl_memory, profiler, i); + kernel_thread_globals.emplace_back(kernel_globals, osl_globals, profiler, i); } } -void *CPUDevice::get_cpu_osl_memory() +OSLGlobals *CPUDevice::get_cpu_osl_memory() { #ifdef WITH_OSL return &osl_globals; diff --git a/intern/cycles/device/cpu/device_impl.h b/intern/cycles/device/cpu/device_impl.h index f0e242af237..e6008c8e28d 100644 --- a/intern/cycles/device/cpu/device_impl.h +++ b/intern/cycles/device/cpu/device_impl.h @@ -85,8 +85,8 @@ class CPUDevice : public Device { void *get_guiding_device() const override; void get_cpu_kernel_thread_globals( - vector &kernel_thread_globals) override; - void *get_cpu_osl_memory() override; + vector &kernel_thread_globals) override; + OSLGlobals *get_cpu_osl_memory() override; protected: bool load_kernels(uint /*kernel_features*/) override; diff --git a/intern/cycles/device/cpu/kernel.h b/intern/cycles/device/cpu/kernel.h index 6edca4eb724..bab7e898acf 100644 --- a/intern/cycles/device/cpu/kernel.h +++ b/intern/cycles/device/cpu/kernel.h @@ -9,7 +9,7 @@ CCL_NAMESPACE_BEGIN -struct KernelGlobalsCPU; +struct ThreadKernelGlobalsCPU; struct KernelFilmConvert; struct IntegratorStateCPU; struct TileInfo; @@ -19,10 +19,11 @@ class CPUKernels { /* Integrator. */ using IntegratorFunction = - CPUKernelFunction; - using IntegratorShadeFunction = CPUKernelFunction; - using IntegratorInitFunction = CPUKernelFunction; + using IntegratorShadeFunction = CPUKernelFunction; + using IntegratorInitFunction = CPUKernelFunction; @@ -45,7 +46,7 @@ class CPUKernels { /* Shader evaluation. */ using ShaderEvalFunction = CPUKernelFunction; + const ThreadKernelGlobalsCPU *kg, const KernelShaderEvalInput *, float *, const int)>; ShaderEvalFunction shader_eval_displace; ShaderEvalFunction shader_eval_background; @@ -54,7 +55,7 @@ class CPUKernels { /* Adaptive stopping. */ using AdaptiveSamplingConvergenceCheckFunction = - CPUKernelFunction; using AdaptiveSamplingFilterXFunction = - CPUKernelFunction; using AdaptiveSamplingFilterYFunction = - CPUKernelFunction; + const ThreadKernelGlobalsCPU *kg, ccl_global float *render_buffer, const int pixel_index)>; CryptomattePostprocessFunction cryptomatte_postprocess; diff --git a/intern/cycles/device/cpu/kernel_thread_globals.cpp b/intern/cycles/device/cpu/kernel_thread_globals.cpp deleted file mode 100644 index 998a63aa334..00000000000 --- a/intern/cycles/device/cpu/kernel_thread_globals.cpp +++ /dev/null @@ -1,91 +0,0 @@ -/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation - * - * SPDX-License-Identifier: Apache-2.0 */ - -#include "device/cpu/kernel_thread_globals.h" - -#include "kernel/osl/globals.h" - -#include "util/profiling.h" - -CCL_NAMESPACE_BEGIN - -CPUKernelThreadGlobals::CPUKernelThreadGlobals(const KernelGlobalsCPU &kernel_globals, - void *osl_globals_memory, - Profiler &cpu_profiler, - const int thread_index) - : KernelGlobalsCPU(kernel_globals), cpu_profiler_(cpu_profiler) -{ - clear_runtime_pointers(); - -#ifdef WITH_OSL - OSLGlobals::thread_init(this, static_cast(osl_globals_memory), thread_index); -#else - (void)thread_index; - (void)osl_globals_memory; -#endif - -#ifdef WITH_PATH_GUIDING - opgl_path_segment_storage = new openpgl::cpp::PathSegmentStorage(); -#endif -} - -CPUKernelThreadGlobals::CPUKernelThreadGlobals(CPUKernelThreadGlobals &&other) noexcept - : KernelGlobalsCPU(std::move(other)), cpu_profiler_(other.cpu_profiler_) -{ - other.clear_runtime_pointers(); -} - -CPUKernelThreadGlobals::~CPUKernelThreadGlobals() -{ -#ifdef WITH_OSL - OSLGlobals::thread_free(this); -#endif - -#ifdef WITH_PATH_GUIDING - delete opgl_path_segment_storage; - delete opgl_surface_sampling_distribution; - delete opgl_volume_sampling_distribution; -#endif -} - -CPUKernelThreadGlobals &CPUKernelThreadGlobals::operator=(CPUKernelThreadGlobals &&other) -{ - if (this == &other) { - return *this; - } - - *static_cast(this) = *static_cast(&other); - - other.clear_runtime_pointers(); - - return *this; -} - -void CPUKernelThreadGlobals::clear_runtime_pointers() -{ -#ifdef WITH_OSL - osl = nullptr; -#endif - -#ifdef WITH_PATH_GUIDING - opgl_sample_data_storage = nullptr; - opgl_guiding_field = nullptr; - - opgl_path_segment_storage = nullptr; - opgl_surface_sampling_distribution = nullptr; - opgl_volume_sampling_distribution = nullptr; -#endif -} - -void CPUKernelThreadGlobals::start_profiling() -{ - cpu_profiler_.add_state(&profiler); -} - -void CPUKernelThreadGlobals::stop_profiling() -{ - cpu_profiler_.remove_state(&profiler); -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/device/cpu/kernel_thread_globals.h b/intern/cycles/device/cpu/kernel_thread_globals.h deleted file mode 100644 index 64225d6d554..00000000000 --- a/intern/cycles/device/cpu/kernel_thread_globals.h +++ /dev/null @@ -1,45 +0,0 @@ -/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation - * - * SPDX-License-Identifier: Apache-2.0 */ - -#pragma once - -#include "kernel/globals.h" - -CCL_NAMESPACE_BEGIN - -class Profiler; - -/* A special class which extends memory ownership of the `KernelGlobalsCPU` decoupling any resource - * which is not thread-safe for access. Every worker thread which needs to operate on - * `KernelGlobalsCPU` needs to initialize its own copy of this object. - * - * NOTE: Only minimal subset of objects are copied: `KernelData` is never copied. This means that - * there is no unnecessary data duplication happening when using this object. */ -class CPUKernelThreadGlobals : public KernelGlobalsCPU { - public: - /* TODO(sergey): Would be nice to have properly typed OSLGlobals even in the case when building - * without OSL support. Will avoid need to those unnamed pointers and casts. */ - CPUKernelThreadGlobals(const KernelGlobalsCPU &kernel_globals, - void *osl_globals_memory, - Profiler &cpu_profiler, - const int thread_index); - - ~CPUKernelThreadGlobals(); - - CPUKernelThreadGlobals(const CPUKernelThreadGlobals &other) = delete; - CPUKernelThreadGlobals(CPUKernelThreadGlobals &&other) noexcept; - - CPUKernelThreadGlobals &operator=(const CPUKernelThreadGlobals &other) = delete; - CPUKernelThreadGlobals &operator=(CPUKernelThreadGlobals &&other); - - void start_profiling(); - void stop_profiling(); - - protected: - void clear_runtime_pointers(); - - Profiler &cpu_profiler_; -}; - -CCL_NAMESPACE_END diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp index 7a00aa83048..96ba33e3195 100644 --- a/intern/cycles/device/device.cpp +++ b/intern/cycles/device/device.cpp @@ -483,12 +483,12 @@ const CPUKernels &Device::get_cpu_kernels() } void Device::get_cpu_kernel_thread_globals( - vector & /*kernel_thread_globals*/) + vector & /*kernel_thread_globals*/) { LOG(FATAL) << "Device does not support CPU kernels."; } -void *Device::get_cpu_osl_memory() +OSLGlobals *Device::get_cpu_osl_memory() { return nullptr; } diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index 85c0b48c392..8f9a2fca146 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -28,9 +28,11 @@ class BVH; class DeviceQueue; class Progress; class CPUKernels; -class CPUKernelThreadGlobals; class Scene; +struct OSLGlobals; +struct ThreadKernelGlobalsCPU; + /* Device Types */ enum DeviceType { @@ -216,9 +218,9 @@ class Device { static const CPUKernels &get_cpu_kernels(); /* Get kernel globals to pass to kernels. */ virtual void get_cpu_kernel_thread_globals( - vector & /*kernel_thread_globals*/); + vector & /*kernel_thread_globals*/); /* Get OpenShadingLanguage memory buffer. */ - virtual void *get_cpu_osl_memory(); + virtual OSLGlobals *get_cpu_osl_memory(); /* Acceleration structure building. */ virtual void build_bvh(BVH *bvh, Progress &progress, bool refit); diff --git a/intern/cycles/device/multi/device.cpp b/intern/cycles/device/multi/device.cpp index b72424cad89..0c6c62536ed 100644 --- a/intern/cycles/device/multi/device.cpp +++ b/intern/cycles/device/multi/device.cpp @@ -257,7 +257,7 @@ class MultiDevice : public Device { } } - void *get_cpu_osl_memory() override + OSLGlobals *get_cpu_osl_memory() override { /* Always return the OSL memory of the CPU device (this works since the constructor above * guarantees that CPU devices are always added to the back). */ diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index 0a21ff20b00..1a9a81b2d6a 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -1006,7 +1006,7 @@ bool OptiXDevice::load_osl_kernels() # endif } -void *OptiXDevice::get_cpu_osl_memory() +OSLGlobals *OptiXDevice::get_cpu_osl_memory() { # ifdef WITH_OSL return &osl_globals; diff --git a/intern/cycles/device/optix/device_impl.h b/intern/cycles/device/optix/device_impl.h index 9de13c0e0b9..5fe4bea3895 100644 --- a/intern/cycles/device/optix/device_impl.h +++ b/intern/cycles/device/optix/device_impl.h @@ -115,7 +115,7 @@ class OptiXDevice : public CUDADevice { unique_ptr gpu_queue_create() override; - void *get_cpu_osl_memory() override; + OSLGlobals *get_cpu_osl_memory() override; }; CCL_NAMESPACE_END diff --git a/intern/cycles/integrator/path_trace_work_cpu.cpp b/intern/cycles/integrator/path_trace_work_cpu.cpp index 30dd8adc81d..8c323770fef 100644 --- a/intern/cycles/integrator/path_trace_work_cpu.cpp +++ b/intern/cycles/integrator/path_trace_work_cpu.cpp @@ -28,9 +28,9 @@ static inline tbb::task_arena local_tbb_arena_create(const Device *device) return tbb::task_arena(device->info.cpu_threads); } -/* Get CPUKernelThreadGlobals for the current thread. */ -static inline CPUKernelThreadGlobals *kernel_thread_globals_get( - vector &kernel_thread_globals) +/* Get ThreadKernelGlobalsCPU for the current thread. */ +static inline ThreadKernelGlobalsCPU *kernel_thread_globals_get( + vector &kernel_thread_globals) { const int thread_index = tbb::this_task_arena::current_thread_index(); DCHECK_GE(thread_index, 0); @@ -65,7 +65,7 @@ void PathTraceWorkCPU::render_samples(RenderStatistics &statistics, const int64_t total_pixels_num = image_width * image_height; if (device_->profiler.active()) { - for (CPUKernelThreadGlobals &kernel_globals : kernel_thread_globals_) { + for (ThreadKernelGlobalsCPU &kernel_globals : kernel_thread_globals_) { kernel_globals.start_profiling(); } } @@ -91,13 +91,13 @@ void PathTraceWorkCPU::render_samples(RenderStatistics &statistics, work_tile.offset = effective_buffer_params_.offset; work_tile.stride = effective_buffer_params_.stride; - CPUKernelThreadGlobals *kernel_globals = kernel_thread_globals_get(kernel_thread_globals_); + ThreadKernelGlobalsCPU *kernel_globals = kernel_thread_globals_get(kernel_thread_globals_); render_samples_full_pipeline(kernel_globals, work_tile, samples_num); }); }); if (device_->profiler.active()) { - for (CPUKernelThreadGlobals &kernel_globals : kernel_thread_globals_) { + for (ThreadKernelGlobalsCPU &kernel_globals : kernel_thread_globals_) { kernel_globals.stop_profiling(); } } @@ -105,7 +105,7 @@ void PathTraceWorkCPU::render_samples(RenderStatistics &statistics, statistics.occupancy = 1.0f; } -void PathTraceWorkCPU::render_samples_full_pipeline(KernelGlobalsCPU *kernel_globals, +void PathTraceWorkCPU::render_samples_full_pipeline(ThreadKernelGlobalsCPU *kernel_globals, const KernelWorkTile &work_tile, const int samples_num) { @@ -230,7 +230,7 @@ int PathTraceWorkCPU::adaptive_sampling_converge_filter_count_active(const float /* Check convergency and do x-filter in a single `parallel_for`, to reduce threading overhead. */ local_arena.execute([&]() { parallel_for(full_y, full_y + height, [&](int y) { - CPUKernelThreadGlobals *kernel_globals = kernel_thread_globals_.data(); + ThreadKernelGlobalsCPU *kernel_globals = kernel_thread_globals_.data(); bool row_converged = true; uint num_row_pixels_active = 0; @@ -255,7 +255,7 @@ int PathTraceWorkCPU::adaptive_sampling_converge_filter_count_active(const float if (num_active_pixels) { local_arena.execute([&]() { parallel_for(full_x, full_x + width, [&](int x) { - CPUKernelThreadGlobals *kernel_globals = kernel_thread_globals_.data(); + ThreadKernelGlobalsCPU *kernel_globals = kernel_thread_globals_.data(); kernels_.adaptive_sampling_filter_y( kernel_globals, render_buffer, x, full_y, height, offset, stride); }); @@ -277,7 +277,7 @@ void PathTraceWorkCPU::cryptomatte_postproces() /* Check convergency and do x-filter in a single `parallel_for`, to reduce threading overhead. */ local_arena.execute([&]() { parallel_for(0, height, [&](int y) { - CPUKernelThreadGlobals *kernel_globals = kernel_thread_globals_.data(); + ThreadKernelGlobalsCPU *kernel_globals = kernel_thread_globals_.data(); int pixel_index = y * width; for (int x = 0; x < width; ++x, ++pixel_index) { @@ -297,7 +297,7 @@ void PathTraceWorkCPU::guiding_init_kernel_globals(void *guiding_field, /* Linking the global guiding structures (e.g., Field and SampleStorage) to the per-thread * kernel globals. */ for (int thread_index = 0; thread_index < kernel_thread_globals_.size(); thread_index++) { - CPUKernelThreadGlobals &kg = kernel_thread_globals_[thread_index]; + ThreadKernelGlobalsCPU &kg = kernel_thread_globals_[thread_index]; openpgl::cpp::Field *field = (openpgl::cpp::Field *)guiding_field; /* Allocate sampling distributions. */ @@ -305,17 +305,17 @@ void PathTraceWorkCPU::guiding_init_kernel_globals(void *guiding_field, # if PATH_GUIDING_LEVEL >= 4 if (kg.opgl_surface_sampling_distribution) { - delete kg.opgl_surface_sampling_distribution; - kg.opgl_surface_sampling_distribution = nullptr; + kg.opgl_surface_sampling_distribution.reset(); } if (kg.opgl_volume_sampling_distribution) { - delete kg.opgl_volume_sampling_distribution; - kg.opgl_volume_sampling_distribution = nullptr; + kg.opgl_volume_sampling_distribution.reset(); } if (field) { - kg.opgl_surface_sampling_distribution = new openpgl::cpp::SurfaceSamplingDistribution(field); - kg.opgl_volume_sampling_distribution = new openpgl::cpp::VolumeSamplingDistribution(field); + kg.opgl_surface_sampling_distribution = + make_unique(field); + kg.opgl_volume_sampling_distribution = make_unique( + field); } # endif @@ -332,7 +332,7 @@ void PathTraceWorkCPU::guiding_init_kernel_globals(void *guiding_field, } void PathTraceWorkCPU::guiding_push_sample_data_to_global_storage( - KernelGlobalsCPU *kg, + ThreadKernelGlobalsCPU *kg, IntegratorStateCPU *state, const ccl_global float *ccl_restrict render_buffer) { diff --git a/intern/cycles/integrator/path_trace_work_cpu.h b/intern/cycles/integrator/path_trace_work_cpu.h index 6f35be8cab4..cffea461e9f 100644 --- a/intern/cycles/integrator/path_trace_work_cpu.h +++ b/intern/cycles/integrator/path_trace_work_cpu.h @@ -4,9 +4,9 @@ #pragma once +#include "kernel/device/cpu/globals.h" #include "kernel/integrator/state.h" -#include "device/cpu/kernel_thread_globals.h" #include "device/queue.h" #include "integrator/path_trace_work.h" @@ -16,7 +16,7 @@ CCL_NAMESPACE_BEGIN struct KernelWorkTile; -struct KernelGlobalsCPU; +struct ThreadKernelGlobalsCPU; struct IntegratorStateCPU; class CPUKernels; @@ -63,7 +63,7 @@ class PathTraceWorkCPU : public PathTraceWork { /* Pushes the collected training data/samples of a path to the global sample storage. * This function is called at the end of a random walk/path generation. */ - void guiding_push_sample_data_to_global_storage(KernelGlobalsCPU *kg, + void guiding_push_sample_data_to_global_storage(ThreadKernelGlobalsCPU *kg, IntegratorStateCPU *state, const ccl_global float *ccl_restrict render_buffer); @@ -71,7 +71,7 @@ class PathTraceWorkCPU : public PathTraceWork { protected: /* Core path tracing routine. Renders given work time on the given queue. */ - void render_samples_full_pipeline(KernelGlobalsCPU *kernel_globals, + void render_samples_full_pipeline(ThreadKernelGlobalsCPU *kernel_globals, const KernelWorkTile &work_tile, const int samples_num); @@ -83,7 +83,7 @@ class PathTraceWorkCPU : public PathTraceWork { * More specifically, the `kernel_globals_` is local to each threads and nobody else is * accessing it, but some "localization" is required to decouple from kernel globals stored * on the device level. */ - vector kernel_thread_globals_; + vector kernel_thread_globals_; }; CCL_NAMESPACE_END diff --git a/intern/cycles/integrator/shader_eval.cpp b/intern/cycles/integrator/shader_eval.cpp index 1019f48d104..15d2bc75c13 100644 --- a/intern/cycles/integrator/shader_eval.cpp +++ b/intern/cycles/integrator/shader_eval.cpp @@ -2,13 +2,14 @@ * * SPDX-License-Identifier: Apache-2.0 */ +#include "kernel/device/cpu/globals.h" + #include "integrator/shader_eval.h" #include "device/device.h" #include "device/queue.h" #include "device/cpu/kernel.h" -#include "device/cpu/kernel_thread_globals.h" #include "util/log.h" #include "util/progress.h" @@ -80,7 +81,7 @@ bool ShaderEval::eval_cpu(Device *device, device_vector &output, const int64_t work_size) { - vector kernel_thread_globals; + vector kernel_thread_globals; device->get_cpu_kernel_thread_globals(kernel_thread_globals); /* Find required kernel function. */ @@ -101,7 +102,7 @@ bool ShaderEval::eval_cpu(Device *device, } const int thread_index = tbb::this_task_arena::current_thread_index(); - const KernelGlobalsCPU *kg = &kernel_thread_globals[thread_index]; + const ThreadKernelGlobalsCPU *kg = &kernel_thread_globals[thread_index]; switch (type) { case SHADER_EVAL_DISPLACE: diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 3057ec82bc4..5f56257b4b6 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -13,6 +13,7 @@ set(INC_SYS ) set(SRC_KERNEL_DEVICE_CPU + device/cpu/globals.cpp device/cpu/kernel.cpp device/cpu/kernel_sse42.cpp device/cpu/kernel_avx2.cpp diff --git a/intern/cycles/kernel/device/cpu/bvh.h b/intern/cycles/kernel/device/cpu/bvh.h index 1b8c2c5422d..070e222e4cb 100644 --- a/intern/cycles/kernel/device/cpu/bvh.h +++ b/intern/cycles/kernel/device/cpu/bvh.h @@ -284,7 +284,7 @@ ccl_device_forceinline void kernel_embree_filter_intersection_func_impl( #ifdef __KERNEL_ONEAPI__ KernelGlobalsGPU *kg = nullptr; #else - const KernelGlobalsCPU *kg = ctx->kg; + const ThreadKernelGlobalsCPU *kg = ctx->kg; #endif const Ray *cray = ctx->ray; @@ -324,7 +324,7 @@ ccl_device_forceinline void kernel_embree_filter_occluded_shadow_all_func_impl( #ifdef __KERNEL_ONEAPI__ KernelGlobalsGPU *kg = nullptr; #else - const KernelGlobalsCPU *kg = ctx->kg; + const ThreadKernelGlobalsCPU *kg = ctx->kg; #endif const Ray *cray = ctx->ray; @@ -438,7 +438,7 @@ ccl_device_forceinline void kernel_embree_filter_occluded_local_func_impl( #ifdef __KERNEL_ONEAPI__ KernelGlobalsGPU *kg = nullptr; #else - const KernelGlobalsCPU *kg = ctx->kg; + const ThreadKernelGlobalsCPU *kg = ctx->kg; #endif const Ray *cray = ctx->ray; @@ -541,7 +541,7 @@ ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func_impl( #ifdef __KERNEL_ONEAPI__ KernelGlobalsGPU *kg = nullptr; #else - const KernelGlobalsCPU *kg = ctx->kg; + const ThreadKernelGlobalsCPU *kg = ctx->kg; #endif const Ray *cray = ctx->ray; @@ -622,7 +622,7 @@ ccl_device void kernel_embree_filter_func_backface_cull(const RTCFilterFunctionN } CCLIntersectContext *ctx = ((CCLIntersectContext *)args->context); - const KernelGlobalsCPU *kg = ctx->kg; + const ThreadKernelGlobalsCPU *kg = ctx->kg; const Ray *cray = ctx->ray; if (kernel_embree_is_self_intersection( diff --git a/intern/cycles/kernel/device/cpu/globals.cpp b/intern/cycles/kernel/device/cpu/globals.cpp new file mode 100644 index 00000000000..825233e47c4 --- /dev/null +++ b/intern/cycles/kernel/device/cpu/globals.cpp @@ -0,0 +1,43 @@ +/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation + * + * SPDX-License-Identifier: Apache-2.0 */ + +#include "kernel/device/cpu/globals.h" +#include "kernel/osl/globals.h" + +#include "util/guiding.h" // IWYU pragma: keep +#include "util/profiling.h" + +CCL_NAMESPACE_BEGIN + +ThreadKernelGlobalsCPU::ThreadKernelGlobalsCPU(const KernelGlobalsCPU &kernel_globals, + OSLGlobals *osl_globals, + Profiler &cpu_profiler, + const int thread_index) + : KernelGlobalsCPU(kernel_globals), +#ifdef WITH_OSL + osl(osl_globals, thread_index), +#endif + cpu_profiler_(cpu_profiler) +{ +#ifndef WITH_OSL + (void)thread_index; + (void)osl_globals; +#endif + +#ifdef WITH_PATH_GUIDING + opgl_path_segment_storage = make_unique(); +#endif +} + +void ThreadKernelGlobalsCPU::start_profiling() +{ + cpu_profiler_.add_state(&profiler); +} + +void ThreadKernelGlobalsCPU::stop_profiling() +{ + cpu_profiler_.remove_state(&profiler); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/device/cpu/globals.h b/intern/cycles/kernel/device/cpu/globals.h index 62bd989054b..2c6d5ad29d2 100644 --- a/intern/cycles/kernel/device/cpu/globals.h +++ b/intern/cycles/kernel/device/cpu/globals.h @@ -9,22 +9,23 @@ #include "kernel/types.h" #include "kernel/util/profiler.h" +#ifdef __OSL__ +# include "kernel/osl/globals.h" +#endif + #include "util/guiding.h" // IWYU pragma: keep #include "util/texture.h" // IWYU pragma: keep +#include "util/unique_ptr.h" CCL_NAMESPACE_BEGIN +struct OSLGlobals; + /* On the CPU, we pass along the struct KernelGlobals to nearly everywhere in * the kernel, to access constant data. These are all stored as flat arrays. * these are really just standard arrays. We can't use actually globals because * multiple renders may be running inside the same process. */ -#ifdef __OSL__ -struct OSLGlobals; -struct OSLThreadData; -struct OSLShadingSystem; -#endif - /* Array for kernel data, with size to be able to assert on invalid data access. */ template struct kernel_array { const ccl_always_inline T &fetch(const int index) const @@ -37,38 +38,58 @@ template struct kernel_array { int width = 0; }; +/* Constant globals shared between all threads. */ struct KernelGlobalsCPU { #define KERNEL_DATA_ARRAY(type, name) kernel_array name; #include "kernel/data_arrays.h" KernelData data = {}; + ProfilingState profiler; +}; + +/* Per-thread global state. + * + * To avoid pointer indirection, the constant globals are copied to each thread. + * + * This may not be ideal for cache pressure. Alternative would be to pass an + * additional thread index to every function, and potentially to make the shared + * part an actual global variable. That would match the GPU more closely, but + * also require mutex locks for multiple Cycles instances. */ +struct ThreadKernelGlobalsCPU : public KernelGlobalsCPU { + ThreadKernelGlobalsCPU(const KernelGlobalsCPU &kernel_globals, + OSLGlobals *osl_globals_memory, + Profiler &cpu_profiler, + const int thread_index); + + ThreadKernelGlobalsCPU(ThreadKernelGlobalsCPU &other) = delete; + ThreadKernelGlobalsCPU(ThreadKernelGlobalsCPU &&other) noexcept = default; + ThreadKernelGlobalsCPU &operator=(const ThreadKernelGlobalsCPU &other) = delete; + ThreadKernelGlobalsCPU &operator=(ThreadKernelGlobalsCPU &&other) = delete; + + void start_profiling(); + void stop_profiling(); + #ifdef __OSL__ - /* On the CPU, we also have the OSL globals here. Most data structures are shared - * with SVM, the difference is in the shaders and object/mesh attributes. */ - OSLGlobals *osl = nullptr; - OSLShadingSystem *osl_ss = nullptr; - OSLThreadData *osl_tdata = nullptr; - int osl_thread_index = 0; + OSLThreadData osl; #endif #ifdef __PATH_GUIDING__ - /* Pointers to global data structures. */ + /* Pointers to shared global data structures. */ openpgl::cpp::SampleStorage *opgl_sample_data_storage = nullptr; openpgl::cpp::Field *opgl_guiding_field = nullptr; /* Local data structures owned by the thread. */ - openpgl::cpp::PathSegmentStorage *opgl_path_segment_storage = nullptr; - openpgl::cpp::SurfaceSamplingDistribution *opgl_surface_sampling_distribution = nullptr; - openpgl::cpp::VolumeSamplingDistribution *opgl_volume_sampling_distribution = nullptr; + unique_ptr opgl_path_segment_storage; + unique_ptr opgl_surface_sampling_distribution; + unique_ptr opgl_volume_sampling_distribution; #endif - /* **** Run-time data **** */ - - ProfilingState profiler; + protected: + Profiler &cpu_profiler_; }; -using KernelGlobals = const KernelGlobalsCPU *; +using KernelGlobals = const ThreadKernelGlobalsCPU *; /* Abstraction macros */ #define kernel_data_fetch(name, index) (kg->name.fetch(index)) diff --git a/intern/cycles/kernel/device/cpu/kernel.cpp b/intern/cycles/kernel/device/cpu/kernel.cpp index a5a025c8997..f686867db2f 100644 --- a/intern/cycles/kernel/device/cpu/kernel.cpp +++ b/intern/cycles/kernel/device/cpu/kernel.cpp @@ -43,6 +43,8 @@ /* do nothing */ #endif +#include "kernel/device/cpu/globals.h" + #include "kernel/device/cpu/kernel.h" #define KERNEL_ARCH cpu #include "kernel/device/cpu/kernel_arch_impl.h" diff --git a/intern/cycles/kernel/device/cpu/kernel_arch.h b/intern/cycles/kernel/device/cpu/kernel_arch.h index 700fab9f988..c5b9ed9afa9 100644 --- a/intern/cycles/kernel/device/cpu/kernel_arch.h +++ b/intern/cycles/kernel/device/cpu/kernel_arch.h @@ -9,19 +9,21 @@ */ #define KERNEL_INTEGRATOR_FUNCTION(name) \ - void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *ccl_restrict kg, \ - IntegratorStateCPU *state) + void KERNEL_FUNCTION_FULL_NAME(integrator_##name)( \ + const ThreadKernelGlobalsCPU *ccl_restrict kg, IntegratorStateCPU *state) #define KERNEL_INTEGRATOR_SHADE_FUNCTION(name) \ - void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *ccl_restrict kg, \ - IntegratorStateCPU *state, \ - ccl_global float *render_buffer) + void KERNEL_FUNCTION_FULL_NAME(integrator_##name)( \ + const ThreadKernelGlobalsCPU *ccl_restrict kg, \ + IntegratorStateCPU *state, \ + ccl_global float *render_buffer) #define KERNEL_INTEGRATOR_INIT_FUNCTION(name) \ - bool KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *ccl_restrict kg, \ - IntegratorStateCPU *state, \ - KernelWorkTile *tile, \ - ccl_global float *render_buffer) + bool KERNEL_FUNCTION_FULL_NAME(integrator_##name)( \ + const ThreadKernelGlobalsCPU *ccl_restrict kg, \ + IntegratorStateCPU *state, \ + KernelWorkTile *tile, \ + ccl_global float *render_buffer) KERNEL_INTEGRATOR_INIT_FUNCTION(init_from_camera); KERNEL_INTEGRATOR_INIT_FUNCTION(init_from_bake); @@ -77,16 +79,16 @@ KERNEL_FILM_CONVERT_FUNCTION(float4) * Shader evaluation. */ -void KERNEL_FUNCTION_FULL_NAME(shader_eval_background)(const KernelGlobalsCPU *kg, +void KERNEL_FUNCTION_FULL_NAME(shader_eval_background)(const ThreadKernelGlobalsCPU *kg, const KernelShaderEvalInput *input, float *output, const int offset); -void KERNEL_FUNCTION_FULL_NAME(shader_eval_displace)(const KernelGlobalsCPU *kg, +void KERNEL_FUNCTION_FULL_NAME(shader_eval_displace)(const ThreadKernelGlobalsCPU *kg, const KernelShaderEvalInput *input, float *output, const int offset); void KERNEL_FUNCTION_FULL_NAME(shader_eval_curve_shadow_transparency)( - const KernelGlobalsCPU *kg, + const ThreadKernelGlobalsCPU *kg, const KernelShaderEvalInput *input, float *output, const int offset); @@ -96,7 +98,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader_eval_curve_shadow_transparency)( */ bool KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_convergence_check)( - const KernelGlobalsCPU *kg, + const ThreadKernelGlobalsCPU *kg, ccl_global float *render_buffer, const int x, const int y, @@ -105,14 +107,14 @@ bool KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_convergence_check)( const int offset, int stride); -void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_x)(const KernelGlobalsCPU *kg, +void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_x)(const ThreadKernelGlobalsCPU *kg, ccl_global float *render_buffer, const int y, const int start_x, const int width, const int offset, int stride); -void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_y)(const KernelGlobalsCPU *kg, +void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_y)(const ThreadKernelGlobalsCPU *kg, ccl_global float *render_buffer, const int x, const int start_y, @@ -124,7 +126,7 @@ void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_y)(const KernelGlobalsCP * Cryptomatte. */ -void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const KernelGlobalsCPU *kg, +void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const ThreadKernelGlobalsCPU *kg, ccl_global float *render_buffer, int pixel_index); diff --git a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h index 80dd94f8d74..aa593a41dba 100644 --- a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h +++ b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h @@ -62,7 +62,7 @@ CCL_NAMESPACE_BEGIN /* TODO: Either use something like get_work_pixel(), or simplify tile which is passed here, so * that it does not contain unused fields. */ #define DEFINE_INTEGRATOR_INIT_KERNEL(name) \ - bool KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *kg, \ + bool KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const ThreadKernelGlobalsCPU *kg, \ IntegratorStateCPU *state, \ KernelWorkTile *tile, \ ccl_global float *render_buffer) \ @@ -72,29 +72,31 @@ CCL_NAMESPACE_BEGIN } #define DEFINE_INTEGRATOR_KERNEL(name) \ - void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *kg, \ + void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const ThreadKernelGlobalsCPU *kg, \ IntegratorStateCPU *state) \ { \ KERNEL_INVOKE(name, kg, state); \ } #define DEFINE_INTEGRATOR_SHADE_KERNEL(name) \ - void KERNEL_FUNCTION_FULL_NAME(integrator_##name)( \ - const KernelGlobalsCPU *kg, IntegratorStateCPU *state, ccl_global float *render_buffer) \ + void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const ThreadKernelGlobalsCPU *kg, \ + IntegratorStateCPU *state, \ + ccl_global float *render_buffer) \ { \ KERNEL_INVOKE(name, kg, state, render_buffer); \ } #define DEFINE_INTEGRATOR_SHADOW_KERNEL(name) \ - void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *kg, \ + void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const ThreadKernelGlobalsCPU *kg, \ IntegratorStateCPU *state) \ { \ KERNEL_INVOKE(name, kg, &state->shadow); \ } #define DEFINE_INTEGRATOR_SHADOW_SHADE_KERNEL(name) \ - void KERNEL_FUNCTION_FULL_NAME(integrator_##name)( \ - const KernelGlobalsCPU *kg, IntegratorStateCPU *state, ccl_global float *render_buffer) \ + void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const ThreadKernelGlobalsCPU *kg, \ + IntegratorStateCPU *state, \ + ccl_global float *render_buffer) \ { \ KERNEL_INVOKE(name, kg, &state->shadow, render_buffer); \ } @@ -118,7 +120,7 @@ DEFINE_INTEGRATOR_SHADOW_SHADE_KERNEL(shade_shadow) * Shader evaluation. */ -void KERNEL_FUNCTION_FULL_NAME(shader_eval_displace)(const KernelGlobalsCPU *kg, +void KERNEL_FUNCTION_FULL_NAME(shader_eval_displace)(const ThreadKernelGlobalsCPU *kg, const KernelShaderEvalInput *input, float *output, const int offset) @@ -130,7 +132,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader_eval_displace)(const KernelGlobalsCPU *kg, #endif } -void KERNEL_FUNCTION_FULL_NAME(shader_eval_background)(const KernelGlobalsCPU *kg, +void KERNEL_FUNCTION_FULL_NAME(shader_eval_background)(const ThreadKernelGlobalsCPU *kg, const KernelShaderEvalInput *input, float *output, const int offset) @@ -143,7 +145,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader_eval_background)(const KernelGlobalsCPU *k } void KERNEL_FUNCTION_FULL_NAME(shader_eval_curve_shadow_transparency)( - const KernelGlobalsCPU *kg, + const ThreadKernelGlobalsCPU *kg, const KernelShaderEvalInput *input, float *output, const int offset) @@ -160,7 +162,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader_eval_curve_shadow_transparency)( */ bool KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_convergence_check)( - const KernelGlobalsCPU *kg, + const ThreadKernelGlobalsCPU *kg, ccl_global float *render_buffer, const int x, const int y, @@ -178,7 +180,7 @@ bool KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_convergence_check)( #endif } -void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_x)(const KernelGlobalsCPU *kg, +void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_x)(const ThreadKernelGlobalsCPU *kg, ccl_global float *render_buffer, const int y, const int start_x, @@ -193,7 +195,7 @@ void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_x)(const KernelGlobalsCP #endif } -void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_y)(const KernelGlobalsCPU *kg, +void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_y)(const ThreadKernelGlobalsCPU *kg, ccl_global float *render_buffer, const int x, const int start_y, @@ -212,7 +214,7 @@ void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_y)(const KernelGlobalsCP * Cryptomatte. */ -void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const KernelGlobalsCPU *kg, +void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const ThreadKernelGlobalsCPU *kg, ccl_global float *render_buffer, const int pixel_index) { diff --git a/intern/cycles/kernel/device/cpu/kernel_avx2.cpp b/intern/cycles/kernel/device/cpu/kernel_avx2.cpp index 097601e1950..0d0894c7607 100644 --- a/intern/cycles/kernel/device/cpu/kernel_avx2.cpp +++ b/intern/cycles/kernel/device/cpu/kernel_avx2.cpp @@ -23,6 +23,7 @@ # endif #endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */ +#include "kernel/device/cpu/globals.h" #include "kernel/device/cpu/kernel.h" #define KERNEL_ARCH cpu_avx2 #include "kernel/device/cpu/kernel_arch_impl.h" diff --git a/intern/cycles/kernel/device/cpu/kernel_sse42.cpp b/intern/cycles/kernel/device/cpu/kernel_sse42.cpp index d9a12e8a224..2970d84b27a 100644 --- a/intern/cycles/kernel/device/cpu/kernel_sse42.cpp +++ b/intern/cycles/kernel/device/cpu/kernel_sse42.cpp @@ -21,6 +21,7 @@ # endif #endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE42 */ +#include "kernel/device/cpu/globals.h" #include "kernel/device/cpu/kernel.h" #define KERNEL_ARCH cpu_sse42 #include "kernel/device/cpu/kernel_arch_impl.h" diff --git a/intern/cycles/kernel/osl/closures.cpp b/intern/cycles/kernel/osl/closures.cpp index 4463280d38a..0300151088f 100644 --- a/intern/cycles/kernel/osl/closures.cpp +++ b/intern/cycles/kernel/osl/closures.cpp @@ -76,18 +76,17 @@ void OSLRenderServices::register_closures(OSL::ShadingSystem *ss) /* Surface & Background */ template<> -void osl_eval_nodes(const KernelGlobalsCPU *kg, +void osl_eval_nodes(const ThreadKernelGlobalsCPU *kg, const void *state, ShaderData *sd, const uint32_t path_flag) { /* setup shader globals from shader data */ - OSLThreadData *tdata = kg->osl_tdata; shaderdata_to_shaderglobals( - kg, sd, path_flag, reinterpret_cast(&tdata->globals)); + kg, sd, path_flag, reinterpret_cast(&kg->osl.shader_globals)); /* clear trace data */ - tdata->tracedata.init = false; + kg->osl.tracedata.init = false; /* Used by render-services. */ sd->osl_globals = kg; @@ -101,30 +100,30 @@ void osl_eval_nodes(const KernelGlobalsCPU *kg, } /* execute shader for this point */ - OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss; - OSL::ShaderGlobals *globals = &tdata->globals; - OSL::ShadingContext *octx = tdata->context; + OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl.ss; + OSL::ShaderGlobals *globals = &kg->osl.shader_globals; + OSL::ShadingContext *octx = kg->osl.context; const int shader = sd->shader & SHADER_MASK; if (sd->object == OBJECT_NONE && sd->lamp == LAMP_NONE) { /* background */ - if (kg->osl->background_state) { + if (kg->osl.globals->background_state) { #if OSL_LIBRARY_VERSION_CODE >= 11304 ss->execute(*octx, - *(kg->osl->background_state), - kg->osl_thread_index, + *(kg->osl.globals->background_state), + kg->osl.thread_index, 0, *globals, nullptr, nullptr); #else - ss->execute(octx, *(kg->osl->background_state), *globals); + ss->execute(octx, *(kg->osl.globals->background_state), *globals); #endif } } else { /* automatic bump shader */ - if (kg->osl->bump_state[shader]) { + if (kg->osl.globals->bump_state[shader]) { /* save state */ const float3 P = sd->P; const float dP = sd->dP; @@ -134,12 +133,13 @@ void osl_eval_nodes(const KernelGlobalsCPU *kg, /* set state as if undisplaced */ if (sd->flag & SD_HAS_DISPLACEMENT) { float data[9]; - const bool found = kg->osl->services->get_attribute(sd, - true, - OSLRenderServices::u_empty, - TypeVector, - OSLRenderServices::u_geom_undisplaced, - data); + const bool found = kg->osl.globals->services->get_attribute( + sd, + true, + OSLRenderServices::u_empty, + TypeVector, + OSLRenderServices::u_geom_undisplaced, + data); (void)found; assert(found); @@ -162,14 +162,14 @@ void osl_eval_nodes(const KernelGlobalsCPU *kg, /* execute bump shader */ #if OSL_LIBRARY_VERSION_CODE >= 11304 ss->execute(*octx, - *(kg->osl->bump_state[shader]), - kg->osl_thread_index, + *(kg->osl.globals->bump_state[shader]), + kg->osl.thread_index, 0, *globals, nullptr, nullptr); #else - ss->execute(octx, *(kg->osl->bump_state[shader]), *globals); + ss->execute(octx, *(kg->osl.globals->bump_state[shader]), *globals); #endif /* reset state */ @@ -182,17 +182,17 @@ void osl_eval_nodes(const KernelGlobalsCPU *kg, } /* surface shader */ - if (kg->osl->surface_state[shader]) { + if (kg->osl.globals->surface_state[shader]) { #if OSL_LIBRARY_VERSION_CODE >= 11304 ss->execute(*octx, - *(kg->osl->surface_state[shader]), - kg->osl_thread_index, + *(kg->osl.globals->surface_state[shader]), + kg->osl.thread_index, 0, *globals, nullptr, nullptr); #else - ss->execute(octx, *(kg->osl->surface_state[shader]), *globals); + ss->execute(octx, *(kg->osl.globals->surface_state[shader]), *globals); #endif } } @@ -206,18 +206,17 @@ void osl_eval_nodes(const KernelGlobalsCPU *kg, /* Volume */ template<> -void osl_eval_nodes(const KernelGlobalsCPU *kg, +void osl_eval_nodes(const ThreadKernelGlobalsCPU *kg, const void *state, ShaderData *sd, const uint32_t path_flag) { /* setup shader globals from shader data */ - OSLThreadData *tdata = kg->osl_tdata; shaderdata_to_shaderglobals( - kg, sd, path_flag, reinterpret_cast(&tdata->globals)); + kg, sd, path_flag, reinterpret_cast(&kg->osl.shader_globals)); /* clear trace data */ - tdata->tracedata.init = false; + kg->osl.tracedata.init = false; /* Used by render-services. */ sd->osl_globals = kg; @@ -231,22 +230,22 @@ void osl_eval_nodes(const KernelGlobalsCPU *kg, } /* execute shader */ - OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss; - OSL::ShaderGlobals *globals = &tdata->globals; - OSL::ShadingContext *octx = tdata->context; + OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl.ss; + OSL::ShaderGlobals *globals = &kg->osl.shader_globals; + OSL::ShadingContext *octx = kg->osl.context; const int shader = sd->shader & SHADER_MASK; - if (kg->osl->volume_state[shader]) { + if (kg->osl.globals->volume_state[shader]) { #if OSL_LIBRARY_VERSION_CODE >= 11304 ss->execute(*octx, - *(kg->osl->volume_state[shader]), - kg->osl_thread_index, + *(kg->osl.globals->volume_state[shader]), + kg->osl.thread_index, 0, *globals, nullptr, nullptr); #else - ss->execute(octx, *(kg->osl->volume_state[shader]), *globals); + ss->execute(octx, *(kg->osl.globals->volume_state[shader]), *globals); #endif } @@ -259,18 +258,17 @@ void osl_eval_nodes(const KernelGlobalsCPU *kg, /* Displacement */ template<> -void osl_eval_nodes(const KernelGlobalsCPU *kg, +void osl_eval_nodes(const ThreadKernelGlobalsCPU *kg, const void *state, ShaderData *sd, const uint32_t path_flag) { /* setup shader globals from shader data */ - OSLThreadData *tdata = kg->osl_tdata; shaderdata_to_shaderglobals( - kg, sd, path_flag, reinterpret_cast(&tdata->globals)); + kg, sd, path_flag, reinterpret_cast(&kg->osl.shader_globals)); /* clear trace data */ - tdata->tracedata.init = false; + kg->osl.tracedata.init = false; /* Used by render-services. */ sd->osl_globals = kg; @@ -278,22 +276,22 @@ void osl_eval_nodes(const KernelGlobalsCPU *kg, sd->osl_shadow_path_state = nullptr; /* execute shader */ - OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss; - OSL::ShaderGlobals *globals = &tdata->globals; - OSL::ShadingContext *octx = tdata->context; + OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl.ss; + OSL::ShaderGlobals *globals = &kg->osl.shader_globals; + OSL::ShadingContext *octx = kg->osl.context; const int shader = sd->shader & SHADER_MASK; - if (kg->osl->displacement_state[shader]) { + if (kg->osl.globals->displacement_state[shader]) { #if OSL_LIBRARY_VERSION_CODE >= 11304 ss->execute(*octx, - *(kg->osl->displacement_state[shader]), - kg->osl_thread_index, + *(kg->osl.globals->displacement_state[shader]), + kg->osl.thread_index, 0, *globals, nullptr, nullptr); #else - ss->execute(octx, *(kg->osl->displacement_state[shader]), *globals); + ss->execute(octx, *(kg->osl.globals->displacement_state[shader]), *globals); #endif } diff --git a/intern/cycles/kernel/osl/globals.cpp b/intern/cycles/kernel/osl/globals.cpp index 626036eb27d..52b408b1d69 100644 --- a/intern/cycles/kernel/osl/globals.cpp +++ b/intern/cycles/kernel/osl/globals.cpp @@ -4,57 +4,55 @@ #include -#include "kernel/globals.h" -#include "kernel/types.h" - #include "kernel/osl/globals.h" -#include "kernel/osl/services.h" CCL_NAMESPACE_BEGIN -void OSLGlobals::thread_init(KernelGlobalsCPU *kg, OSLGlobals *osl_globals, const int thread_index) +OSLThreadData::OSLThreadData(OSLGlobals *osl_globals, const int thread_index) + : globals(osl_globals), thread_index(thread_index) { - /* no osl used? */ - if (!osl_globals->use) { - kg->osl = nullptr; + if (globals == nullptr || globals->use == false) { return; } - /* Per thread kernel data init. */ - kg->osl = osl_globals; + ss = globals->ss; - OSL::ShadingSystem *ss = kg->osl->ss; - OSLThreadData *tdata = new OSLThreadData(); + memset((void *)&shader_globals, 0, sizeof(shader_globals)); + shader_globals.tracedata = &tracedata; - memset((void *)&tdata->globals, 0, sizeof(OSL::ShaderGlobals)); - tdata->globals.tracedata = &tdata->tracedata; - tdata->osl_thread_info = ss->create_thread_info(); - tdata->context = ss->get_context(tdata->osl_thread_info); - - tdata->oiio_thread_info = osl_globals->ts->get_perthread_info(); - - kg->osl_ss = (OSLShadingSystem *)ss; - kg->osl_tdata = tdata; - kg->osl_thread_index = thread_index; + osl_thread_info = ss->create_thread_info(); + context = ss->get_context(osl_thread_info); + oiio_thread_info = globals->ts->get_perthread_info(); } -void OSLGlobals::thread_free(KernelGlobalsCPU *kg) +OSLThreadData::~OSLThreadData() { - if (!kg->osl) { - return; + if (context) { + ss->release_context(context); } + if (osl_thread_info) { + ss->destroy_thread_info(osl_thread_info); + } +} - OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss; - OSLThreadData *tdata = kg->osl_tdata; - ss->release_context(tdata->context); +OSLThreadData::OSLThreadData(OSLThreadData &&other) noexcept + : globals(other.globals), + ss(other.ss), + thread_index(other.thread_index), + shader_globals(other.shader_globals), + tracedata(other.tracedata), + osl_thread_info(other.osl_thread_info), + context(other.context), + oiio_thread_info(other.oiio_thread_info) +{ + shader_globals.tracedata = &tracedata; - ss->destroy_thread_info(tdata->osl_thread_info); - - delete tdata; - - kg->osl = nullptr; - kg->osl_ss = nullptr; - kg->osl_tdata = nullptr; + memset((void *)&other.shader_globals, 0, sizeof(other.shader_globals)); + memset((void *)&other.tracedata, 0, sizeof(other.tracedata)); + other.thread_index = -1; + other.context = nullptr; + other.osl_thread_info = nullptr; + other.oiio_thread_info = nullptr; } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/osl/globals.h b/intern/cycles/kernel/osl/globals.h index 3308603642c..3a12b30fe47 100644 --- a/intern/cycles/kernel/osl/globals.h +++ b/intern/cycles/kernel/osl/globals.h @@ -23,6 +23,7 @@ CCL_NAMESPACE_BEGIN class OSLRenderServices; class ColorSpaceProcessor; +struct ThreadKernelGlobalsCPU; /* OSL Globals * @@ -39,12 +40,6 @@ struct OSLGlobals { use = false; } - /* per thread data */ - static void thread_init(struct KernelGlobalsCPU *kg, - OSLGlobals *osl_globals, - const int thread_index); - static void thread_free(struct KernelGlobalsCPU *kg); - bool use; /* shading system */ @@ -78,11 +73,27 @@ struct OSLTraceData { /* thread key for thread specific data lookup */ struct OSLThreadData { - OSL::ShaderGlobals globals; - OSL::PerThreadInfo *osl_thread_info; - OSLTraceData tracedata; - OSL::ShadingContext *context; - OIIO::TextureSystem::Perthread *oiio_thread_info; + /* Global Data */ + OSLGlobals *globals = nullptr; + OSL::ShadingSystem *ss = nullptr; + + /* Per-thread data. */ + int thread_index = -1; + + mutable OSL::ShaderGlobals shader_globals; + mutable OSLTraceData tracedata; + + OSL::PerThreadInfo *osl_thread_info = nullptr; + OSL::ShadingContext *context = nullptr; + OIIO::TextureSystem::Perthread *oiio_thread_info = nullptr; + + OSLThreadData(OSLGlobals *globals, const int thread_index); + ~OSLThreadData(); + + OSLThreadData(OSLThreadData &other) = delete; + OSLThreadData(OSLThreadData &&other) noexcept; + OSLThreadData &operator=(const OSLThreadData &other) = delete; + OSLThreadData &operator=(OSLThreadData &&other) = delete; }; CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/osl/osl.h b/intern/cycles/kernel/osl/osl.h index 7f634f1b660..bbb909e3458 100644 --- a/intern/cycles/kernel/osl/osl.h +++ b/intern/cycles/kernel/osl/osl.h @@ -172,7 +172,7 @@ ccl_device void flatten_closure_tree(KernelGlobals kg, #ifndef __KERNEL_GPU__ template -void osl_eval_nodes(const KernelGlobalsCPU *kg, +void osl_eval_nodes(const ThreadKernelGlobalsCPU *kg, const void *state, ShaderData *sd, uint32_t path_flag); diff --git a/intern/cycles/kernel/osl/services.cpp b/intern/cycles/kernel/osl/services.cpp index e733178aba0..7a60edf7519 100644 --- a/intern/cycles/kernel/osl/services.cpp +++ b/intern/cycles/kernel/osl/services.cpp @@ -148,7 +148,7 @@ bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, * a concept of shader space, so we just use object space for both. */ if (xform) { const ShaderData *sd = (const ShaderData *)xform; - const KernelGlobalsCPU *kg = sd->osl_globals; + const ThreadKernelGlobalsCPU *kg = sd->osl_globals; const int object = sd->object; if (object != OBJECT_NONE) { @@ -188,7 +188,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, * a concept of shader space, so we just use object space for both. */ if (xform) { const ShaderData *sd = (const ShaderData *)xform; - const KernelGlobalsCPU *kg = sd->osl_globals; + const ThreadKernelGlobalsCPU *kg = sd->osl_globals; const int object = sd->object; if (object != OBJECT_NONE) { @@ -225,7 +225,7 @@ bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, const float time) { ShaderData *sd = (ShaderData *)(sg->renderstate); - const KernelGlobalsCPU *kg = sd->osl_globals; + const ThreadKernelGlobalsCPU *kg = sd->osl_globals; if (from == u_ndc) { copy_matrix(result, kernel_data.cam.ndctoworld); @@ -257,7 +257,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, const float time) { ShaderData *sd = (ShaderData *)(sg->renderstate); - const KernelGlobalsCPU *kg = sd->osl_globals; + const ThreadKernelGlobalsCPU *kg = sd->osl_globals; if (to == u_ndc) { copy_matrix(result, kernel_data.cam.worldtondc); @@ -291,7 +291,7 @@ bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, * a concept of shader space, so we just use object space for both. */ if (xform) { const ShaderData *sd = (const ShaderData *)xform; - const KernelGlobalsCPU *kg = sd->osl_globals; + const ThreadKernelGlobalsCPU *kg = sd->osl_globals; const int object = sd->object; if (object != OBJECT_NONE) { @@ -319,7 +319,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, * a concept of shader space, so we just use object space for both. */ if (xform) { const ShaderData *sd = (const ShaderData *)xform; - const KernelGlobalsCPU *kg = sd->osl_globals; + const ThreadKernelGlobalsCPU *kg = sd->osl_globals; const int object = sd->object; if (object != OBJECT_NONE) { @@ -344,7 +344,7 @@ bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSLUStringHash from) { ShaderData *sd = (ShaderData *)(sg->renderstate); - const KernelGlobalsCPU *kg = sd->osl_globals; + const ThreadKernelGlobalsCPU *kg = sd->osl_globals; if (from == u_ndc) { copy_matrix(result, kernel_data.cam.ndctoworld); @@ -371,7 +371,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSLUStringHash to) { ShaderData *sd = (ShaderData *)(sg->renderstate); - const KernelGlobalsCPU *kg = sd->osl_globals; + const ThreadKernelGlobalsCPU *kg = sd->osl_globals; if (to == u_ndc) { copy_matrix(result, kernel_data.cam.worldtondc); @@ -727,7 +727,7 @@ static bool set_attribute_matrix(const Transform &tfm, const TypeDesc type, void return false; } -static bool get_object_attribute(const KernelGlobalsCPU *kg, +static bool get_object_attribute(const ThreadKernelGlobalsCPU *kg, ShaderData *sd, const AttributeDescriptor &desc, const TypeDesc &type, @@ -803,7 +803,7 @@ static bool get_object_attribute(const KernelGlobalsCPU *kg, return false; } -bool OSLRenderServices::get_object_standard_attribute(const KernelGlobalsCPU *kg, +bool OSLRenderServices::get_object_standard_attribute(const ThreadKernelGlobalsCPU *kg, ShaderData *sd, OSLUStringHash name, const TypeDesc type, @@ -924,7 +924,7 @@ bool OSLRenderServices::get_object_standard_attribute(const KernelGlobalsCPU *kg return set_attribute_float3_3(P, type, derivatives, val); } if (name == u_geom_name) { - const ustring object_name = kg->osl->object_names[sd->object]; + const ustring object_name = kg->osl.globals->object_names[sd->object]; return set_attribute_string(object_name, type, derivatives, val); } if (name == u_is_smooth) { @@ -979,7 +979,7 @@ bool OSLRenderServices::get_object_standard_attribute(const KernelGlobalsCPU *kg return get_background_attribute(kg, sd, name, type, derivatives, val); } -bool OSLRenderServices::get_background_attribute(const KernelGlobalsCPU *kg, +bool OSLRenderServices::get_background_attribute(const ThreadKernelGlobalsCPU *kg, ShaderData *sd, OSLUStringHash name, const TypeDesc type, @@ -1038,8 +1038,7 @@ bool OSLRenderServices::get_background_attribute(const KernelGlobalsCPU *kg, } if (name == u_ndc) { /* NDC coordinates with special exception for orthographic projection. */ - OSLThreadData *tdata = kg->osl_tdata; - OSL::ShaderGlobals *globals = &tdata->globals; + OSL::ShaderGlobals *globals = &kg->osl.shader_globals; float3 ndc[3]; if ((globals->raytype & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && @@ -1090,14 +1089,15 @@ bool OSLRenderServices::get_attribute(ShaderData *sd, OSLUStringHash name, void *val) { - const KernelGlobalsCPU *kg = sd->osl_globals; + const ThreadKernelGlobalsCPU *kg = sd->osl_globals; int object; /* lookup of attribute on another object */ if (object_name != u_empty) { - const OSLGlobals::ObjectNameMap::iterator it = kg->osl->object_name_map.find(object_name); + const OSLGlobals::ObjectNameMap::iterator it = kg->osl.globals->object_name_map.find( + object_name); - if (it == kg->osl->object_name_map.end()) { + if (it == kg->osl.globals->object_name_map.end()) { return false; } @@ -1246,7 +1246,7 @@ bool OSLRenderServices::texture(OSLUStringHash filename, OSLTextureHandle *handle = (OSLTextureHandle *)texture_handle; const OSLTextureHandle::Type texture_type = (handle) ? handle->type : OSLTextureHandle::OIIO; ShaderData *sd = (ShaderData *)(sg->renderstate); - KernelGlobals kernel_globals = sd->osl_globals; + const ThreadKernelGlobalsCPU *kernel_globals = sd->osl_globals; bool status = false; switch (texture_type) { @@ -1351,8 +1351,7 @@ bool OSLRenderServices::texture(OSLUStringHash filename, if (handle && handle->oiio_handle) { if (texture_thread_info == nullptr) { - OSLThreadData *tdata = kernel_globals->osl_tdata; - texture_thread_info = tdata->oiio_thread_info; + texture_thread_info = kernel_globals->osl.oiio_thread_info; } status = ts->texture(handle->oiio_handle, @@ -1460,9 +1459,8 @@ bool OSLRenderServices::texture3d(OSLUStringHash filename, if (handle && handle->oiio_handle) { if (texture_thread_info == nullptr) { ShaderData *sd = (ShaderData *)(sg->renderstate); - KernelGlobals kernel_globals = sd->osl_globals; - OSLThreadData *tdata = kernel_globals->osl_tdata; - texture_thread_info = tdata->oiio_thread_info; + const ThreadKernelGlobalsCPU *kernel_globals = sd->osl_globals; + texture_thread_info = kernel_globals->osl.oiio_thread_info; } status = ts->texture3d(handle->oiio_handle, @@ -1546,9 +1544,8 @@ bool OSLRenderServices::environment(OSLUStringHash filename, if (handle && handle->oiio_handle) { if (thread_info == nullptr) { ShaderData *sd = (ShaderData *)(sg->renderstate); - KernelGlobals kernel_globals = sd->osl_globals; - OSLThreadData *tdata = kernel_globals->osl_tdata; - thread_info = tdata->oiio_thread_info; + const ThreadKernelGlobalsCPU *kernel_globals = sd->osl_globals; + thread_info = kernel_globals->osl.oiio_thread_info; } status = ts->environment(handle->oiio_handle, @@ -1726,7 +1723,7 @@ bool OSLRenderServices::trace(TraceOpt &options, tracedata->hit = false; tracedata->sd.osl_globals = sd->osl_globals; - const KernelGlobalsCPU *kg = sd->osl_globals; + const ThreadKernelGlobalsCPU *kg = sd->osl_globals; /* Can't ray-trace from shaders like displacement, before BVH exists. */ if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) { @@ -1759,7 +1756,7 @@ bool OSLRenderServices::getmessage(OSL::ShaderGlobals *sg, } ShaderData *sd = &tracedata->sd; - const KernelGlobalsCPU *kg = sd->osl_globals; + const ThreadKernelGlobalsCPU *kg = sd->osl_globals; if (!tracedata->setup) { /* lazy shader data setup */ diff --git a/intern/cycles/kernel/osl/services.h b/intern/cycles/kernel/osl/services.h index 0b063d894d0..28e6417d3e0 100644 --- a/intern/cycles/kernel/osl/services.h +++ b/intern/cycles/kernel/osl/services.h @@ -30,7 +30,7 @@ CCL_NAMESPACE_BEGIN class Scene; struct ShaderData; -struct KernelGlobalsCPU; +struct ThreadKernelGlobalsCPU; /* OSL Texture Handle * @@ -276,13 +276,13 @@ class OSLRenderServices : public OSL::RendererServices { void *data) override; #endif - static bool get_background_attribute(const KernelGlobalsCPU *kg, + static bool get_background_attribute(const ThreadKernelGlobalsCPU *kg, ShaderData *sd, OSLUStringHash name, const TypeDesc type, bool derivatives, void *val); - static bool get_object_standard_attribute(const KernelGlobalsCPU *kg, + static bool get_object_standard_attribute(const ThreadKernelGlobalsCPU *kg, ShaderData *sd, OSLUStringHash name, const TypeDesc type, diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h index 5d7064f4987..07c82959aa0 100644 --- a/intern/cycles/kernel/types.h +++ b/intern/cycles/kernel/types.h @@ -1191,7 +1191,7 @@ struct ccl_align(16) ShaderData # ifdef __KERNEL_GPU__ ccl_private uint8_t *osl_closure_pool; # else - const struct KernelGlobalsCPU *osl_globals; + const struct ThreadKernelGlobalsCPU *osl_globals; const struct IntegratorStateCPU *osl_path_state; const struct IntegratorShadowStateCPU *osl_shadow_path_state; # endif diff --git a/intern/cycles/scene/osl.cpp b/intern/cycles/scene/osl.cpp index 302f2348179..13f4450a965 100644 --- a/intern/cycles/scene/osl.cpp +++ b/intern/cycles/scene/osl.cpp @@ -141,7 +141,7 @@ void OSLShaderManager::device_update_specific(Device *device, /* collect shader groups from all shaders */ for (Shader *shader : scene->shaders) { device->foreach_device([shader, background_shader](Device *sub_device) { - OSLGlobals *og = (OSLGlobals *)sub_device->get_cpu_osl_memory(); + OSLGlobals *og = sub_device->get_cpu_osl_memory(); /* push state to array for lookup */ og->surface_state.push_back(shader->osl_surface_ref); @@ -161,7 +161,7 @@ void OSLShaderManager::device_update_specific(Device *device, /* setup shader engine */ device->foreach_device([](Device *sub_device) { - OSLGlobals *og = (OSLGlobals *)sub_device->get_cpu_osl_memory(); + OSLGlobals *og = sub_device->get_cpu_osl_memory(); OSL::ShadingSystem *ss = ss_shared[sub_device->info.type]; og->ss = ss; @@ -228,7 +228,7 @@ void OSLShaderManager::device_free(Device *device, DeviceScene *dscene, Scene *s /* clear shader engine */ device->foreach_device([](Device *sub_device) { - OSLGlobals *og = (OSLGlobals *)sub_device->get_cpu_osl_memory(); + OSLGlobals *og = sub_device->get_cpu_osl_memory(); og->use = false; og->ss = nullptr; @@ -712,7 +712,7 @@ void OSLShaderManager::osl_image_slots(Device *device, { set services_shared; device->foreach_device([&services_shared](Device *sub_device) { - OSLGlobals *og = (OSLGlobals *)sub_device->get_cpu_osl_memory(); + OSLGlobals *og = sub_device->get_cpu_osl_memory(); services_shared.insert(og->services); });