Refactor: Cycles: Make CPU kernel globals storage more sane
Pull Request: https://projects.blender.org/blender/blender/pulls/132361
This commit is contained in:
@@ -19,8 +19,6 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
static float precompute_ggx_E(const float rough, const float mu, const float3 rand)
|
||||
{
|
||||
KernelGlobalsCPU kg;
|
||||
|
||||
MicrofacetBsdf bsdf;
|
||||
bsdf.weight = one_float3();
|
||||
bsdf.sample_weight = 1.0f;
|
||||
@@ -36,7 +34,7 @@ static float precompute_ggx_E(const float rough, const float mu, const float3 ra
|
||||
float pdf = 0.0f;
|
||||
float sampled_eta;
|
||||
float2 sampled_roughness;
|
||||
bsdf_microfacet_ggx_sample(&kg,
|
||||
bsdf_microfacet_ggx_sample(nullptr,
|
||||
(ShaderClosure *)&bsdf,
|
||||
make_float3(0.0f, 0.0f, 1.0f),
|
||||
make_float3(sqrtf(1.0f - sqr(mu)), 0.0f, mu),
|
||||
@@ -57,8 +55,6 @@ static float precompute_ggx_glass_E(const float rough,
|
||||
const float eta,
|
||||
const float3 rand)
|
||||
{
|
||||
KernelGlobalsCPU kg;
|
||||
|
||||
MicrofacetBsdf bsdf;
|
||||
bsdf.weight = one_float3();
|
||||
bsdf.sample_weight = 1.0f;
|
||||
@@ -74,7 +70,7 @@ static float precompute_ggx_glass_E(const float rough,
|
||||
float pdf = 0.0f;
|
||||
float sampled_eta;
|
||||
float2 sampled_roughness;
|
||||
bsdf_microfacet_ggx_sample(&kg,
|
||||
bsdf_microfacet_ggx_sample(nullptr,
|
||||
(ShaderClosure *)&bsdf,
|
||||
make_float3(0.0f, 0.0f, 1.0f),
|
||||
make_float3(sqrtf(1.0f - sqr(mu)), 0.0f, mu),
|
||||
@@ -93,8 +89,6 @@ static float precompute_ggx_glass_E(const float rough,
|
||||
static float precompute_ggx_gen_schlick_s(
|
||||
const float rough, const float mu, const float eta, const float exponent, const float3 rand)
|
||||
{
|
||||
KernelGlobalsCPU kg;
|
||||
|
||||
MicrofacetBsdf bsdf;
|
||||
bsdf.weight = one_float3();
|
||||
bsdf.sample_weight = 1.0f;
|
||||
@@ -120,7 +114,7 @@ static float precompute_ggx_gen_schlick_s(
|
||||
float pdf = 0.0f;
|
||||
float sampled_eta;
|
||||
float2 sampled_roughness;
|
||||
bsdf_microfacet_ggx_sample(&kg,
|
||||
bsdf_microfacet_ggx_sample(nullptr,
|
||||
(ShaderClosure *)&bsdf,
|
||||
make_float3(0.0f, 0.0f, 1.0f),
|
||||
make_float3(sqrtf(1.0f - sqr(mu)), 0.0f, mu),
|
||||
|
||||
@@ -33,8 +33,6 @@ set(SRC_CPU
|
||||
cpu/kernel.cpp
|
||||
cpu/kernel.h
|
||||
cpu/kernel_function.h
|
||||
cpu/kernel_thread_globals.cpp
|
||||
cpu/kernel_thread_globals.h
|
||||
)
|
||||
|
||||
set(SRC_CUDA
|
||||
|
||||
@@ -25,7 +25,6 @@
|
||||
#endif
|
||||
|
||||
#include "device/cpu/kernel.h"
|
||||
#include "device/cpu/kernel_thread_globals.h"
|
||||
|
||||
#include "device/device.h"
|
||||
|
||||
@@ -56,9 +55,6 @@ CPUDevice::CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_
|
||||
info.cpu_threads = TaskScheduler::max_concurrency();
|
||||
}
|
||||
|
||||
#ifdef WITH_OSL
|
||||
kernel_globals.osl = &osl_globals;
|
||||
#endif
|
||||
#ifdef WITH_EMBREE
|
||||
embree_device = rtcNewDevice("verbose=0");
|
||||
#endif
|
||||
@@ -296,19 +292,19 @@ void *CPUDevice::get_guiding_device() const
|
||||
}
|
||||
|
||||
void CPUDevice::get_cpu_kernel_thread_globals(
|
||||
vector<CPUKernelThreadGlobals> &kernel_thread_globals)
|
||||
vector<ThreadKernelGlobalsCPU> &kernel_thread_globals)
|
||||
{
|
||||
/* Ensure latest texture info is loaded into kernel globals before returning. */
|
||||
load_texture_info();
|
||||
|
||||
kernel_thread_globals.clear();
|
||||
void *osl_memory = get_cpu_osl_memory();
|
||||
OSLGlobals *osl_globals = get_cpu_osl_memory();
|
||||
for (int i = 0; i < info.cpu_threads; i++) {
|
||||
kernel_thread_globals.emplace_back(kernel_globals, osl_memory, profiler, i);
|
||||
kernel_thread_globals.emplace_back(kernel_globals, osl_globals, profiler, i);
|
||||
}
|
||||
}
|
||||
|
||||
void *CPUDevice::get_cpu_osl_memory()
|
||||
OSLGlobals *CPUDevice::get_cpu_osl_memory()
|
||||
{
|
||||
#ifdef WITH_OSL
|
||||
return &osl_globals;
|
||||
|
||||
@@ -85,8 +85,8 @@ class CPUDevice : public Device {
|
||||
void *get_guiding_device() const override;
|
||||
|
||||
void get_cpu_kernel_thread_globals(
|
||||
vector<CPUKernelThreadGlobals> &kernel_thread_globals) override;
|
||||
void *get_cpu_osl_memory() override;
|
||||
vector<ThreadKernelGlobalsCPU> &kernel_thread_globals) override;
|
||||
OSLGlobals *get_cpu_osl_memory() override;
|
||||
|
||||
protected:
|
||||
bool load_kernels(uint /*kernel_features*/) override;
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
struct KernelGlobalsCPU;
|
||||
struct ThreadKernelGlobalsCPU;
|
||||
struct KernelFilmConvert;
|
||||
struct IntegratorStateCPU;
|
||||
struct TileInfo;
|
||||
@@ -19,10 +19,11 @@ class CPUKernels {
|
||||
/* Integrator. */
|
||||
|
||||
using IntegratorFunction =
|
||||
CPUKernelFunction<void (*)(const KernelGlobalsCPU *kg, IntegratorStateCPU *state)>;
|
||||
using IntegratorShadeFunction = CPUKernelFunction<void (*)(
|
||||
const KernelGlobalsCPU *kg, IntegratorStateCPU *state, ccl_global float *render_buffer)>;
|
||||
using IntegratorInitFunction = CPUKernelFunction<bool (*)(const KernelGlobalsCPU *kg,
|
||||
CPUKernelFunction<void (*)(const ThreadKernelGlobalsCPU *kg, IntegratorStateCPU *state)>;
|
||||
using IntegratorShadeFunction = CPUKernelFunction<void (*)(const ThreadKernelGlobalsCPU *kg,
|
||||
IntegratorStateCPU *state,
|
||||
ccl_global float *render_buffer)>;
|
||||
using IntegratorInitFunction = CPUKernelFunction<bool (*)(const ThreadKernelGlobalsCPU *kg,
|
||||
IntegratorStateCPU *state,
|
||||
KernelWorkTile *tile,
|
||||
ccl_global float *render_buffer)>;
|
||||
@@ -45,7 +46,7 @@ class CPUKernels {
|
||||
/* Shader evaluation. */
|
||||
|
||||
using ShaderEvalFunction = CPUKernelFunction<void (*)(
|
||||
const KernelGlobalsCPU *kg, const KernelShaderEvalInput *, float *, const int)>;
|
||||
const ThreadKernelGlobalsCPU *kg, const KernelShaderEvalInput *, float *, const int)>;
|
||||
|
||||
ShaderEvalFunction shader_eval_displace;
|
||||
ShaderEvalFunction shader_eval_background;
|
||||
@@ -54,7 +55,7 @@ class CPUKernels {
|
||||
/* Adaptive stopping. */
|
||||
|
||||
using AdaptiveSamplingConvergenceCheckFunction =
|
||||
CPUKernelFunction<bool (*)(const KernelGlobalsCPU *kg,
|
||||
CPUKernelFunction<bool (*)(const ThreadKernelGlobalsCPU *kg,
|
||||
ccl_global float *render_buffer,
|
||||
const int x,
|
||||
const int y,
|
||||
@@ -64,7 +65,7 @@ class CPUKernels {
|
||||
int stride)>;
|
||||
|
||||
using AdaptiveSamplingFilterXFunction =
|
||||
CPUKernelFunction<void (*)(const KernelGlobalsCPU *kg,
|
||||
CPUKernelFunction<void (*)(const ThreadKernelGlobalsCPU *kg,
|
||||
ccl_global float *render_buffer,
|
||||
const int y,
|
||||
const int start_x,
|
||||
@@ -73,7 +74,7 @@ class CPUKernels {
|
||||
int stride)>;
|
||||
|
||||
using AdaptiveSamplingFilterYFunction =
|
||||
CPUKernelFunction<void (*)(const KernelGlobalsCPU *kg,
|
||||
CPUKernelFunction<void (*)(const ThreadKernelGlobalsCPU *kg,
|
||||
ccl_global float *render_buffer,
|
||||
const int x,
|
||||
const int start_y,
|
||||
@@ -89,7 +90,7 @@ class CPUKernels {
|
||||
/* Cryptomatte. */
|
||||
|
||||
using CryptomattePostprocessFunction = CPUKernelFunction<void (*)(
|
||||
const KernelGlobalsCPU *kg, ccl_global float *render_buffer, const int pixel_index)>;
|
||||
const ThreadKernelGlobalsCPU *kg, ccl_global float *render_buffer, const int pixel_index)>;
|
||||
|
||||
CryptomattePostprocessFunction cryptomatte_postprocess;
|
||||
|
||||
|
||||
@@ -1,91 +0,0 @@
|
||||
/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 */
|
||||
|
||||
#include "device/cpu/kernel_thread_globals.h"
|
||||
|
||||
#include "kernel/osl/globals.h"
|
||||
|
||||
#include "util/profiling.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
CPUKernelThreadGlobals::CPUKernelThreadGlobals(const KernelGlobalsCPU &kernel_globals,
|
||||
void *osl_globals_memory,
|
||||
Profiler &cpu_profiler,
|
||||
const int thread_index)
|
||||
: KernelGlobalsCPU(kernel_globals), cpu_profiler_(cpu_profiler)
|
||||
{
|
||||
clear_runtime_pointers();
|
||||
|
||||
#ifdef WITH_OSL
|
||||
OSLGlobals::thread_init(this, static_cast<OSLGlobals *>(osl_globals_memory), thread_index);
|
||||
#else
|
||||
(void)thread_index;
|
||||
(void)osl_globals_memory;
|
||||
#endif
|
||||
|
||||
#ifdef WITH_PATH_GUIDING
|
||||
opgl_path_segment_storage = new openpgl::cpp::PathSegmentStorage();
|
||||
#endif
|
||||
}
|
||||
|
||||
CPUKernelThreadGlobals::CPUKernelThreadGlobals(CPUKernelThreadGlobals &&other) noexcept
|
||||
: KernelGlobalsCPU(std::move(other)), cpu_profiler_(other.cpu_profiler_)
|
||||
{
|
||||
other.clear_runtime_pointers();
|
||||
}
|
||||
|
||||
CPUKernelThreadGlobals::~CPUKernelThreadGlobals()
|
||||
{
|
||||
#ifdef WITH_OSL
|
||||
OSLGlobals::thread_free(this);
|
||||
#endif
|
||||
|
||||
#ifdef WITH_PATH_GUIDING
|
||||
delete opgl_path_segment_storage;
|
||||
delete opgl_surface_sampling_distribution;
|
||||
delete opgl_volume_sampling_distribution;
|
||||
#endif
|
||||
}
|
||||
|
||||
CPUKernelThreadGlobals &CPUKernelThreadGlobals::operator=(CPUKernelThreadGlobals &&other)
|
||||
{
|
||||
if (this == &other) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
*static_cast<KernelGlobalsCPU *>(this) = *static_cast<KernelGlobalsCPU *>(&other);
|
||||
|
||||
other.clear_runtime_pointers();
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
void CPUKernelThreadGlobals::clear_runtime_pointers()
|
||||
{
|
||||
#ifdef WITH_OSL
|
||||
osl = nullptr;
|
||||
#endif
|
||||
|
||||
#ifdef WITH_PATH_GUIDING
|
||||
opgl_sample_data_storage = nullptr;
|
||||
opgl_guiding_field = nullptr;
|
||||
|
||||
opgl_path_segment_storage = nullptr;
|
||||
opgl_surface_sampling_distribution = nullptr;
|
||||
opgl_volume_sampling_distribution = nullptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
void CPUKernelThreadGlobals::start_profiling()
|
||||
{
|
||||
cpu_profiler_.add_state(&profiler);
|
||||
}
|
||||
|
||||
void CPUKernelThreadGlobals::stop_profiling()
|
||||
{
|
||||
cpu_profiler_.remove_state(&profiler);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
@@ -1,45 +0,0 @@
|
||||
/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "kernel/globals.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
class Profiler;
|
||||
|
||||
/* A special class which extends memory ownership of the `KernelGlobalsCPU` decoupling any resource
|
||||
* which is not thread-safe for access. Every worker thread which needs to operate on
|
||||
* `KernelGlobalsCPU` needs to initialize its own copy of this object.
|
||||
*
|
||||
* NOTE: Only minimal subset of objects are copied: `KernelData` is never copied. This means that
|
||||
* there is no unnecessary data duplication happening when using this object. */
|
||||
class CPUKernelThreadGlobals : public KernelGlobalsCPU {
|
||||
public:
|
||||
/* TODO(sergey): Would be nice to have properly typed OSLGlobals even in the case when building
|
||||
* without OSL support. Will avoid need to those unnamed pointers and casts. */
|
||||
CPUKernelThreadGlobals(const KernelGlobalsCPU &kernel_globals,
|
||||
void *osl_globals_memory,
|
||||
Profiler &cpu_profiler,
|
||||
const int thread_index);
|
||||
|
||||
~CPUKernelThreadGlobals();
|
||||
|
||||
CPUKernelThreadGlobals(const CPUKernelThreadGlobals &other) = delete;
|
||||
CPUKernelThreadGlobals(CPUKernelThreadGlobals &&other) noexcept;
|
||||
|
||||
CPUKernelThreadGlobals &operator=(const CPUKernelThreadGlobals &other) = delete;
|
||||
CPUKernelThreadGlobals &operator=(CPUKernelThreadGlobals &&other);
|
||||
|
||||
void start_profiling();
|
||||
void stop_profiling();
|
||||
|
||||
protected:
|
||||
void clear_runtime_pointers();
|
||||
|
||||
Profiler &cpu_profiler_;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
@@ -483,12 +483,12 @@ const CPUKernels &Device::get_cpu_kernels()
|
||||
}
|
||||
|
||||
void Device::get_cpu_kernel_thread_globals(
|
||||
vector<CPUKernelThreadGlobals> & /*kernel_thread_globals*/)
|
||||
vector<ThreadKernelGlobalsCPU> & /*kernel_thread_globals*/)
|
||||
{
|
||||
LOG(FATAL) << "Device does not support CPU kernels.";
|
||||
}
|
||||
|
||||
void *Device::get_cpu_osl_memory()
|
||||
OSLGlobals *Device::get_cpu_osl_memory()
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@@ -28,9 +28,11 @@ class BVH;
|
||||
class DeviceQueue;
|
||||
class Progress;
|
||||
class CPUKernels;
|
||||
class CPUKernelThreadGlobals;
|
||||
class Scene;
|
||||
|
||||
struct OSLGlobals;
|
||||
struct ThreadKernelGlobalsCPU;
|
||||
|
||||
/* Device Types */
|
||||
|
||||
enum DeviceType {
|
||||
@@ -216,9 +218,9 @@ class Device {
|
||||
static const CPUKernels &get_cpu_kernels();
|
||||
/* Get kernel globals to pass to kernels. */
|
||||
virtual void get_cpu_kernel_thread_globals(
|
||||
vector<CPUKernelThreadGlobals> & /*kernel_thread_globals*/);
|
||||
vector<ThreadKernelGlobalsCPU> & /*kernel_thread_globals*/);
|
||||
/* Get OpenShadingLanguage memory buffer. */
|
||||
virtual void *get_cpu_osl_memory();
|
||||
virtual OSLGlobals *get_cpu_osl_memory();
|
||||
|
||||
/* Acceleration structure building. */
|
||||
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit);
|
||||
|
||||
@@ -257,7 +257,7 @@ class MultiDevice : public Device {
|
||||
}
|
||||
}
|
||||
|
||||
void *get_cpu_osl_memory() override
|
||||
OSLGlobals *get_cpu_osl_memory() override
|
||||
{
|
||||
/* Always return the OSL memory of the CPU device (this works since the constructor above
|
||||
* guarantees that CPU devices are always added to the back). */
|
||||
|
||||
@@ -1006,7 +1006,7 @@ bool OptiXDevice::load_osl_kernels()
|
||||
# endif
|
||||
}
|
||||
|
||||
void *OptiXDevice::get_cpu_osl_memory()
|
||||
OSLGlobals *OptiXDevice::get_cpu_osl_memory()
|
||||
{
|
||||
# ifdef WITH_OSL
|
||||
return &osl_globals;
|
||||
|
||||
@@ -115,7 +115,7 @@ class OptiXDevice : public CUDADevice {
|
||||
|
||||
unique_ptr<DeviceQueue> gpu_queue_create() override;
|
||||
|
||||
void *get_cpu_osl_memory() override;
|
||||
OSLGlobals *get_cpu_osl_memory() override;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
@@ -28,9 +28,9 @@ static inline tbb::task_arena local_tbb_arena_create(const Device *device)
|
||||
return tbb::task_arena(device->info.cpu_threads);
|
||||
}
|
||||
|
||||
/* Get CPUKernelThreadGlobals for the current thread. */
|
||||
static inline CPUKernelThreadGlobals *kernel_thread_globals_get(
|
||||
vector<CPUKernelThreadGlobals> &kernel_thread_globals)
|
||||
/* Get ThreadKernelGlobalsCPU for the current thread. */
|
||||
static inline ThreadKernelGlobalsCPU *kernel_thread_globals_get(
|
||||
vector<ThreadKernelGlobalsCPU> &kernel_thread_globals)
|
||||
{
|
||||
const int thread_index = tbb::this_task_arena::current_thread_index();
|
||||
DCHECK_GE(thread_index, 0);
|
||||
@@ -65,7 +65,7 @@ void PathTraceWorkCPU::render_samples(RenderStatistics &statistics,
|
||||
const int64_t total_pixels_num = image_width * image_height;
|
||||
|
||||
if (device_->profiler.active()) {
|
||||
for (CPUKernelThreadGlobals &kernel_globals : kernel_thread_globals_) {
|
||||
for (ThreadKernelGlobalsCPU &kernel_globals : kernel_thread_globals_) {
|
||||
kernel_globals.start_profiling();
|
||||
}
|
||||
}
|
||||
@@ -91,13 +91,13 @@ void PathTraceWorkCPU::render_samples(RenderStatistics &statistics,
|
||||
work_tile.offset = effective_buffer_params_.offset;
|
||||
work_tile.stride = effective_buffer_params_.stride;
|
||||
|
||||
CPUKernelThreadGlobals *kernel_globals = kernel_thread_globals_get(kernel_thread_globals_);
|
||||
ThreadKernelGlobalsCPU *kernel_globals = kernel_thread_globals_get(kernel_thread_globals_);
|
||||
|
||||
render_samples_full_pipeline(kernel_globals, work_tile, samples_num);
|
||||
});
|
||||
});
|
||||
if (device_->profiler.active()) {
|
||||
for (CPUKernelThreadGlobals &kernel_globals : kernel_thread_globals_) {
|
||||
for (ThreadKernelGlobalsCPU &kernel_globals : kernel_thread_globals_) {
|
||||
kernel_globals.stop_profiling();
|
||||
}
|
||||
}
|
||||
@@ -105,7 +105,7 @@ void PathTraceWorkCPU::render_samples(RenderStatistics &statistics,
|
||||
statistics.occupancy = 1.0f;
|
||||
}
|
||||
|
||||
void PathTraceWorkCPU::render_samples_full_pipeline(KernelGlobalsCPU *kernel_globals,
|
||||
void PathTraceWorkCPU::render_samples_full_pipeline(ThreadKernelGlobalsCPU *kernel_globals,
|
||||
const KernelWorkTile &work_tile,
|
||||
const int samples_num)
|
||||
{
|
||||
@@ -230,7 +230,7 @@ int PathTraceWorkCPU::adaptive_sampling_converge_filter_count_active(const float
|
||||
/* Check convergency and do x-filter in a single `parallel_for`, to reduce threading overhead. */
|
||||
local_arena.execute([&]() {
|
||||
parallel_for(full_y, full_y + height, [&](int y) {
|
||||
CPUKernelThreadGlobals *kernel_globals = kernel_thread_globals_.data();
|
||||
ThreadKernelGlobalsCPU *kernel_globals = kernel_thread_globals_.data();
|
||||
|
||||
bool row_converged = true;
|
||||
uint num_row_pixels_active = 0;
|
||||
@@ -255,7 +255,7 @@ int PathTraceWorkCPU::adaptive_sampling_converge_filter_count_active(const float
|
||||
if (num_active_pixels) {
|
||||
local_arena.execute([&]() {
|
||||
parallel_for(full_x, full_x + width, [&](int x) {
|
||||
CPUKernelThreadGlobals *kernel_globals = kernel_thread_globals_.data();
|
||||
ThreadKernelGlobalsCPU *kernel_globals = kernel_thread_globals_.data();
|
||||
kernels_.adaptive_sampling_filter_y(
|
||||
kernel_globals, render_buffer, x, full_y, height, offset, stride);
|
||||
});
|
||||
@@ -277,7 +277,7 @@ void PathTraceWorkCPU::cryptomatte_postproces()
|
||||
/* Check convergency and do x-filter in a single `parallel_for`, to reduce threading overhead. */
|
||||
local_arena.execute([&]() {
|
||||
parallel_for(0, height, [&](int y) {
|
||||
CPUKernelThreadGlobals *kernel_globals = kernel_thread_globals_.data();
|
||||
ThreadKernelGlobalsCPU *kernel_globals = kernel_thread_globals_.data();
|
||||
int pixel_index = y * width;
|
||||
|
||||
for (int x = 0; x < width; ++x, ++pixel_index) {
|
||||
@@ -297,7 +297,7 @@ void PathTraceWorkCPU::guiding_init_kernel_globals(void *guiding_field,
|
||||
/* Linking the global guiding structures (e.g., Field and SampleStorage) to the per-thread
|
||||
* kernel globals. */
|
||||
for (int thread_index = 0; thread_index < kernel_thread_globals_.size(); thread_index++) {
|
||||
CPUKernelThreadGlobals &kg = kernel_thread_globals_[thread_index];
|
||||
ThreadKernelGlobalsCPU &kg = kernel_thread_globals_[thread_index];
|
||||
openpgl::cpp::Field *field = (openpgl::cpp::Field *)guiding_field;
|
||||
|
||||
/* Allocate sampling distributions. */
|
||||
@@ -305,17 +305,17 @@ void PathTraceWorkCPU::guiding_init_kernel_globals(void *guiding_field,
|
||||
|
||||
# if PATH_GUIDING_LEVEL >= 4
|
||||
if (kg.opgl_surface_sampling_distribution) {
|
||||
delete kg.opgl_surface_sampling_distribution;
|
||||
kg.opgl_surface_sampling_distribution = nullptr;
|
||||
kg.opgl_surface_sampling_distribution.reset();
|
||||
}
|
||||
if (kg.opgl_volume_sampling_distribution) {
|
||||
delete kg.opgl_volume_sampling_distribution;
|
||||
kg.opgl_volume_sampling_distribution = nullptr;
|
||||
kg.opgl_volume_sampling_distribution.reset();
|
||||
}
|
||||
|
||||
if (field) {
|
||||
kg.opgl_surface_sampling_distribution = new openpgl::cpp::SurfaceSamplingDistribution(field);
|
||||
kg.opgl_volume_sampling_distribution = new openpgl::cpp::VolumeSamplingDistribution(field);
|
||||
kg.opgl_surface_sampling_distribution =
|
||||
make_unique<openpgl::cpp::SurfaceSamplingDistribution>(field);
|
||||
kg.opgl_volume_sampling_distribution = make_unique<openpgl::cpp::VolumeSamplingDistribution>(
|
||||
field);
|
||||
}
|
||||
# endif
|
||||
|
||||
@@ -332,7 +332,7 @@ void PathTraceWorkCPU::guiding_init_kernel_globals(void *guiding_field,
|
||||
}
|
||||
|
||||
void PathTraceWorkCPU::guiding_push_sample_data_to_global_storage(
|
||||
KernelGlobalsCPU *kg,
|
||||
ThreadKernelGlobalsCPU *kg,
|
||||
IntegratorStateCPU *state,
|
||||
const ccl_global float *ccl_restrict render_buffer)
|
||||
{
|
||||
|
||||
@@ -4,9 +4,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "kernel/device/cpu/globals.h"
|
||||
#include "kernel/integrator/state.h"
|
||||
|
||||
#include "device/cpu/kernel_thread_globals.h"
|
||||
#include "device/queue.h"
|
||||
|
||||
#include "integrator/path_trace_work.h"
|
||||
@@ -16,7 +16,7 @@
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
struct KernelWorkTile;
|
||||
struct KernelGlobalsCPU;
|
||||
struct ThreadKernelGlobalsCPU;
|
||||
struct IntegratorStateCPU;
|
||||
|
||||
class CPUKernels;
|
||||
@@ -63,7 +63,7 @@ class PathTraceWorkCPU : public PathTraceWork {
|
||||
|
||||
/* Pushes the collected training data/samples of a path to the global sample storage.
|
||||
* This function is called at the end of a random walk/path generation. */
|
||||
void guiding_push_sample_data_to_global_storage(KernelGlobalsCPU *kg,
|
||||
void guiding_push_sample_data_to_global_storage(ThreadKernelGlobalsCPU *kg,
|
||||
IntegratorStateCPU *state,
|
||||
const ccl_global float *ccl_restrict
|
||||
render_buffer);
|
||||
@@ -71,7 +71,7 @@ class PathTraceWorkCPU : public PathTraceWork {
|
||||
|
||||
protected:
|
||||
/* Core path tracing routine. Renders given work time on the given queue. */
|
||||
void render_samples_full_pipeline(KernelGlobalsCPU *kernel_globals,
|
||||
void render_samples_full_pipeline(ThreadKernelGlobalsCPU *kernel_globals,
|
||||
const KernelWorkTile &work_tile,
|
||||
const int samples_num);
|
||||
|
||||
@@ -83,7 +83,7 @@ class PathTraceWorkCPU : public PathTraceWork {
|
||||
* More specifically, the `kernel_globals_` is local to each threads and nobody else is
|
||||
* accessing it, but some "localization" is required to decouple from kernel globals stored
|
||||
* on the device level. */
|
||||
vector<CPUKernelThreadGlobals> kernel_thread_globals_;
|
||||
vector<ThreadKernelGlobalsCPU> kernel_thread_globals_;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
@@ -2,13 +2,14 @@
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 */
|
||||
|
||||
#include "kernel/device/cpu/globals.h"
|
||||
|
||||
#include "integrator/shader_eval.h"
|
||||
|
||||
#include "device/device.h"
|
||||
#include "device/queue.h"
|
||||
|
||||
#include "device/cpu/kernel.h"
|
||||
#include "device/cpu/kernel_thread_globals.h"
|
||||
|
||||
#include "util/log.h"
|
||||
#include "util/progress.h"
|
||||
@@ -80,7 +81,7 @@ bool ShaderEval::eval_cpu(Device *device,
|
||||
device_vector<float> &output,
|
||||
const int64_t work_size)
|
||||
{
|
||||
vector<CPUKernelThreadGlobals> kernel_thread_globals;
|
||||
vector<ThreadKernelGlobalsCPU> kernel_thread_globals;
|
||||
device->get_cpu_kernel_thread_globals(kernel_thread_globals);
|
||||
|
||||
/* Find required kernel function. */
|
||||
@@ -101,7 +102,7 @@ bool ShaderEval::eval_cpu(Device *device,
|
||||
}
|
||||
|
||||
const int thread_index = tbb::this_task_arena::current_thread_index();
|
||||
const KernelGlobalsCPU *kg = &kernel_thread_globals[thread_index];
|
||||
const ThreadKernelGlobalsCPU *kg = &kernel_thread_globals[thread_index];
|
||||
|
||||
switch (type) {
|
||||
case SHADER_EVAL_DISPLACE:
|
||||
|
||||
@@ -13,6 +13,7 @@ set(INC_SYS
|
||||
)
|
||||
|
||||
set(SRC_KERNEL_DEVICE_CPU
|
||||
device/cpu/globals.cpp
|
||||
device/cpu/kernel.cpp
|
||||
device/cpu/kernel_sse42.cpp
|
||||
device/cpu/kernel_avx2.cpp
|
||||
|
||||
@@ -284,7 +284,7 @@ ccl_device_forceinline void kernel_embree_filter_intersection_func_impl(
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
KernelGlobalsGPU *kg = nullptr;
|
||||
#else
|
||||
const KernelGlobalsCPU *kg = ctx->kg;
|
||||
const ThreadKernelGlobalsCPU *kg = ctx->kg;
|
||||
#endif
|
||||
const Ray *cray = ctx->ray;
|
||||
|
||||
@@ -324,7 +324,7 @@ ccl_device_forceinline void kernel_embree_filter_occluded_shadow_all_func_impl(
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
KernelGlobalsGPU *kg = nullptr;
|
||||
#else
|
||||
const KernelGlobalsCPU *kg = ctx->kg;
|
||||
const ThreadKernelGlobalsCPU *kg = ctx->kg;
|
||||
#endif
|
||||
const Ray *cray = ctx->ray;
|
||||
|
||||
@@ -438,7 +438,7 @@ ccl_device_forceinline void kernel_embree_filter_occluded_local_func_impl(
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
KernelGlobalsGPU *kg = nullptr;
|
||||
#else
|
||||
const KernelGlobalsCPU *kg = ctx->kg;
|
||||
const ThreadKernelGlobalsCPU *kg = ctx->kg;
|
||||
#endif
|
||||
const Ray *cray = ctx->ray;
|
||||
|
||||
@@ -541,7 +541,7 @@ ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func_impl(
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
KernelGlobalsGPU *kg = nullptr;
|
||||
#else
|
||||
const KernelGlobalsCPU *kg = ctx->kg;
|
||||
const ThreadKernelGlobalsCPU *kg = ctx->kg;
|
||||
#endif
|
||||
const Ray *cray = ctx->ray;
|
||||
|
||||
@@ -622,7 +622,7 @@ ccl_device void kernel_embree_filter_func_backface_cull(const RTCFilterFunctionN
|
||||
}
|
||||
|
||||
CCLIntersectContext *ctx = ((CCLIntersectContext *)args->context);
|
||||
const KernelGlobalsCPU *kg = ctx->kg;
|
||||
const ThreadKernelGlobalsCPU *kg = ctx->kg;
|
||||
const Ray *cray = ctx->ray;
|
||||
|
||||
if (kernel_embree_is_self_intersection(
|
||||
|
||||
43
intern/cycles/kernel/device/cpu/globals.cpp
Normal file
43
intern/cycles/kernel/device/cpu/globals.cpp
Normal file
@@ -0,0 +1,43 @@
|
||||
/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 */
|
||||
|
||||
#include "kernel/device/cpu/globals.h"
|
||||
#include "kernel/osl/globals.h"
|
||||
|
||||
#include "util/guiding.h" // IWYU pragma: keep
|
||||
#include "util/profiling.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
ThreadKernelGlobalsCPU::ThreadKernelGlobalsCPU(const KernelGlobalsCPU &kernel_globals,
|
||||
OSLGlobals *osl_globals,
|
||||
Profiler &cpu_profiler,
|
||||
const int thread_index)
|
||||
: KernelGlobalsCPU(kernel_globals),
|
||||
#ifdef WITH_OSL
|
||||
osl(osl_globals, thread_index),
|
||||
#endif
|
||||
cpu_profiler_(cpu_profiler)
|
||||
{
|
||||
#ifndef WITH_OSL
|
||||
(void)thread_index;
|
||||
(void)osl_globals;
|
||||
#endif
|
||||
|
||||
#ifdef WITH_PATH_GUIDING
|
||||
opgl_path_segment_storage = make_unique<openpgl::cpp::PathSegmentStorage>();
|
||||
#endif
|
||||
}
|
||||
|
||||
void ThreadKernelGlobalsCPU::start_profiling()
|
||||
{
|
||||
cpu_profiler_.add_state(&profiler);
|
||||
}
|
||||
|
||||
void ThreadKernelGlobalsCPU::stop_profiling()
|
||||
{
|
||||
cpu_profiler_.remove_state(&profiler);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
@@ -9,22 +9,23 @@
|
||||
#include "kernel/types.h"
|
||||
#include "kernel/util/profiler.h"
|
||||
|
||||
#ifdef __OSL__
|
||||
# include "kernel/osl/globals.h"
|
||||
#endif
|
||||
|
||||
#include "util/guiding.h" // IWYU pragma: keep
|
||||
#include "util/texture.h" // IWYU pragma: keep
|
||||
#include "util/unique_ptr.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
struct OSLGlobals;
|
||||
|
||||
/* On the CPU, we pass along the struct KernelGlobals to nearly everywhere in
|
||||
* the kernel, to access constant data. These are all stored as flat arrays.
|
||||
* these are really just standard arrays. We can't use actually globals because
|
||||
* multiple renders may be running inside the same process. */
|
||||
|
||||
#ifdef __OSL__
|
||||
struct OSLGlobals;
|
||||
struct OSLThreadData;
|
||||
struct OSLShadingSystem;
|
||||
#endif
|
||||
|
||||
/* Array for kernel data, with size to be able to assert on invalid data access. */
|
||||
template<typename T> struct kernel_array {
|
||||
const ccl_always_inline T &fetch(const int index) const
|
||||
@@ -37,38 +38,58 @@ template<typename T> struct kernel_array {
|
||||
int width = 0;
|
||||
};
|
||||
|
||||
/* Constant globals shared between all threads. */
|
||||
struct KernelGlobalsCPU {
|
||||
#define KERNEL_DATA_ARRAY(type, name) kernel_array<type> name;
|
||||
#include "kernel/data_arrays.h"
|
||||
|
||||
KernelData data = {};
|
||||
|
||||
ProfilingState profiler;
|
||||
};
|
||||
|
||||
/* Per-thread global state.
|
||||
*
|
||||
* To avoid pointer indirection, the constant globals are copied to each thread.
|
||||
*
|
||||
* This may not be ideal for cache pressure. Alternative would be to pass an
|
||||
* additional thread index to every function, and potentially to make the shared
|
||||
* part an actual global variable. That would match the GPU more closely, but
|
||||
* also require mutex locks for multiple Cycles instances. */
|
||||
struct ThreadKernelGlobalsCPU : public KernelGlobalsCPU {
|
||||
ThreadKernelGlobalsCPU(const KernelGlobalsCPU &kernel_globals,
|
||||
OSLGlobals *osl_globals_memory,
|
||||
Profiler &cpu_profiler,
|
||||
const int thread_index);
|
||||
|
||||
ThreadKernelGlobalsCPU(ThreadKernelGlobalsCPU &other) = delete;
|
||||
ThreadKernelGlobalsCPU(ThreadKernelGlobalsCPU &&other) noexcept = default;
|
||||
ThreadKernelGlobalsCPU &operator=(const ThreadKernelGlobalsCPU &other) = delete;
|
||||
ThreadKernelGlobalsCPU &operator=(ThreadKernelGlobalsCPU &&other) = delete;
|
||||
|
||||
void start_profiling();
|
||||
void stop_profiling();
|
||||
|
||||
#ifdef __OSL__
|
||||
/* On the CPU, we also have the OSL globals here. Most data structures are shared
|
||||
* with SVM, the difference is in the shaders and object/mesh attributes. */
|
||||
OSLGlobals *osl = nullptr;
|
||||
OSLShadingSystem *osl_ss = nullptr;
|
||||
OSLThreadData *osl_tdata = nullptr;
|
||||
int osl_thread_index = 0;
|
||||
OSLThreadData osl;
|
||||
#endif
|
||||
|
||||
#ifdef __PATH_GUIDING__
|
||||
/* Pointers to global data structures. */
|
||||
/* Pointers to shared global data structures. */
|
||||
openpgl::cpp::SampleStorage *opgl_sample_data_storage = nullptr;
|
||||
openpgl::cpp::Field *opgl_guiding_field = nullptr;
|
||||
|
||||
/* Local data structures owned by the thread. */
|
||||
openpgl::cpp::PathSegmentStorage *opgl_path_segment_storage = nullptr;
|
||||
openpgl::cpp::SurfaceSamplingDistribution *opgl_surface_sampling_distribution = nullptr;
|
||||
openpgl::cpp::VolumeSamplingDistribution *opgl_volume_sampling_distribution = nullptr;
|
||||
unique_ptr<openpgl::cpp::PathSegmentStorage> opgl_path_segment_storage;
|
||||
unique_ptr<openpgl::cpp::SurfaceSamplingDistribution> opgl_surface_sampling_distribution;
|
||||
unique_ptr<openpgl::cpp::VolumeSamplingDistribution> opgl_volume_sampling_distribution;
|
||||
#endif
|
||||
|
||||
/* **** Run-time data **** */
|
||||
|
||||
ProfilingState profiler;
|
||||
protected:
|
||||
Profiler &cpu_profiler_;
|
||||
};
|
||||
|
||||
using KernelGlobals = const KernelGlobalsCPU *;
|
||||
using KernelGlobals = const ThreadKernelGlobalsCPU *;
|
||||
|
||||
/* Abstraction macros */
|
||||
#define kernel_data_fetch(name, index) (kg->name.fetch(index))
|
||||
|
||||
@@ -43,6 +43,8 @@
|
||||
/* do nothing */
|
||||
#endif
|
||||
|
||||
#include "kernel/device/cpu/globals.h"
|
||||
|
||||
#include "kernel/device/cpu/kernel.h"
|
||||
#define KERNEL_ARCH cpu
|
||||
#include "kernel/device/cpu/kernel_arch_impl.h"
|
||||
|
||||
@@ -9,19 +9,21 @@
|
||||
*/
|
||||
|
||||
#define KERNEL_INTEGRATOR_FUNCTION(name) \
|
||||
void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *ccl_restrict kg, \
|
||||
IntegratorStateCPU *state)
|
||||
void KERNEL_FUNCTION_FULL_NAME(integrator_##name)( \
|
||||
const ThreadKernelGlobalsCPU *ccl_restrict kg, IntegratorStateCPU *state)
|
||||
|
||||
#define KERNEL_INTEGRATOR_SHADE_FUNCTION(name) \
|
||||
void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *ccl_restrict kg, \
|
||||
IntegratorStateCPU *state, \
|
||||
ccl_global float *render_buffer)
|
||||
void KERNEL_FUNCTION_FULL_NAME(integrator_##name)( \
|
||||
const ThreadKernelGlobalsCPU *ccl_restrict kg, \
|
||||
IntegratorStateCPU *state, \
|
||||
ccl_global float *render_buffer)
|
||||
|
||||
#define KERNEL_INTEGRATOR_INIT_FUNCTION(name) \
|
||||
bool KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *ccl_restrict kg, \
|
||||
IntegratorStateCPU *state, \
|
||||
KernelWorkTile *tile, \
|
||||
ccl_global float *render_buffer)
|
||||
bool KERNEL_FUNCTION_FULL_NAME(integrator_##name)( \
|
||||
const ThreadKernelGlobalsCPU *ccl_restrict kg, \
|
||||
IntegratorStateCPU *state, \
|
||||
KernelWorkTile *tile, \
|
||||
ccl_global float *render_buffer)
|
||||
|
||||
KERNEL_INTEGRATOR_INIT_FUNCTION(init_from_camera);
|
||||
KERNEL_INTEGRATOR_INIT_FUNCTION(init_from_bake);
|
||||
@@ -77,16 +79,16 @@ KERNEL_FILM_CONVERT_FUNCTION(float4)
|
||||
* Shader evaluation.
|
||||
*/
|
||||
|
||||
void KERNEL_FUNCTION_FULL_NAME(shader_eval_background)(const KernelGlobalsCPU *kg,
|
||||
void KERNEL_FUNCTION_FULL_NAME(shader_eval_background)(const ThreadKernelGlobalsCPU *kg,
|
||||
const KernelShaderEvalInput *input,
|
||||
float *output,
|
||||
const int offset);
|
||||
void KERNEL_FUNCTION_FULL_NAME(shader_eval_displace)(const KernelGlobalsCPU *kg,
|
||||
void KERNEL_FUNCTION_FULL_NAME(shader_eval_displace)(const ThreadKernelGlobalsCPU *kg,
|
||||
const KernelShaderEvalInput *input,
|
||||
float *output,
|
||||
const int offset);
|
||||
void KERNEL_FUNCTION_FULL_NAME(shader_eval_curve_shadow_transparency)(
|
||||
const KernelGlobalsCPU *kg,
|
||||
const ThreadKernelGlobalsCPU *kg,
|
||||
const KernelShaderEvalInput *input,
|
||||
float *output,
|
||||
const int offset);
|
||||
@@ -96,7 +98,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader_eval_curve_shadow_transparency)(
|
||||
*/
|
||||
|
||||
bool KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_convergence_check)(
|
||||
const KernelGlobalsCPU *kg,
|
||||
const ThreadKernelGlobalsCPU *kg,
|
||||
ccl_global float *render_buffer,
|
||||
const int x,
|
||||
const int y,
|
||||
@@ -105,14 +107,14 @@ bool KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_convergence_check)(
|
||||
const int offset,
|
||||
int stride);
|
||||
|
||||
void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_x)(const KernelGlobalsCPU *kg,
|
||||
void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_x)(const ThreadKernelGlobalsCPU *kg,
|
||||
ccl_global float *render_buffer,
|
||||
const int y,
|
||||
const int start_x,
|
||||
const int width,
|
||||
const int offset,
|
||||
int stride);
|
||||
void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_y)(const KernelGlobalsCPU *kg,
|
||||
void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_y)(const ThreadKernelGlobalsCPU *kg,
|
||||
ccl_global float *render_buffer,
|
||||
const int x,
|
||||
const int start_y,
|
||||
@@ -124,7 +126,7 @@ void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_y)(const KernelGlobalsCP
|
||||
* Cryptomatte.
|
||||
*/
|
||||
|
||||
void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const KernelGlobalsCPU *kg,
|
||||
void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const ThreadKernelGlobalsCPU *kg,
|
||||
ccl_global float *render_buffer,
|
||||
int pixel_index);
|
||||
|
||||
|
||||
@@ -62,7 +62,7 @@ CCL_NAMESPACE_BEGIN
|
||||
/* TODO: Either use something like get_work_pixel(), or simplify tile which is passed here, so
|
||||
* that it does not contain unused fields. */
|
||||
#define DEFINE_INTEGRATOR_INIT_KERNEL(name) \
|
||||
bool KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *kg, \
|
||||
bool KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const ThreadKernelGlobalsCPU *kg, \
|
||||
IntegratorStateCPU *state, \
|
||||
KernelWorkTile *tile, \
|
||||
ccl_global float *render_buffer) \
|
||||
@@ -72,29 +72,31 @@ CCL_NAMESPACE_BEGIN
|
||||
}
|
||||
|
||||
#define DEFINE_INTEGRATOR_KERNEL(name) \
|
||||
void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *kg, \
|
||||
void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const ThreadKernelGlobalsCPU *kg, \
|
||||
IntegratorStateCPU *state) \
|
||||
{ \
|
||||
KERNEL_INVOKE(name, kg, state); \
|
||||
}
|
||||
|
||||
#define DEFINE_INTEGRATOR_SHADE_KERNEL(name) \
|
||||
void KERNEL_FUNCTION_FULL_NAME(integrator_##name)( \
|
||||
const KernelGlobalsCPU *kg, IntegratorStateCPU *state, ccl_global float *render_buffer) \
|
||||
void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const ThreadKernelGlobalsCPU *kg, \
|
||||
IntegratorStateCPU *state, \
|
||||
ccl_global float *render_buffer) \
|
||||
{ \
|
||||
KERNEL_INVOKE(name, kg, state, render_buffer); \
|
||||
}
|
||||
|
||||
#define DEFINE_INTEGRATOR_SHADOW_KERNEL(name) \
|
||||
void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *kg, \
|
||||
void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const ThreadKernelGlobalsCPU *kg, \
|
||||
IntegratorStateCPU *state) \
|
||||
{ \
|
||||
KERNEL_INVOKE(name, kg, &state->shadow); \
|
||||
}
|
||||
|
||||
#define DEFINE_INTEGRATOR_SHADOW_SHADE_KERNEL(name) \
|
||||
void KERNEL_FUNCTION_FULL_NAME(integrator_##name)( \
|
||||
const KernelGlobalsCPU *kg, IntegratorStateCPU *state, ccl_global float *render_buffer) \
|
||||
void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const ThreadKernelGlobalsCPU *kg, \
|
||||
IntegratorStateCPU *state, \
|
||||
ccl_global float *render_buffer) \
|
||||
{ \
|
||||
KERNEL_INVOKE(name, kg, &state->shadow, render_buffer); \
|
||||
}
|
||||
@@ -118,7 +120,7 @@ DEFINE_INTEGRATOR_SHADOW_SHADE_KERNEL(shade_shadow)
|
||||
* Shader evaluation.
|
||||
*/
|
||||
|
||||
void KERNEL_FUNCTION_FULL_NAME(shader_eval_displace)(const KernelGlobalsCPU *kg,
|
||||
void KERNEL_FUNCTION_FULL_NAME(shader_eval_displace)(const ThreadKernelGlobalsCPU *kg,
|
||||
const KernelShaderEvalInput *input,
|
||||
float *output,
|
||||
const int offset)
|
||||
@@ -130,7 +132,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader_eval_displace)(const KernelGlobalsCPU *kg,
|
||||
#endif
|
||||
}
|
||||
|
||||
void KERNEL_FUNCTION_FULL_NAME(shader_eval_background)(const KernelGlobalsCPU *kg,
|
||||
void KERNEL_FUNCTION_FULL_NAME(shader_eval_background)(const ThreadKernelGlobalsCPU *kg,
|
||||
const KernelShaderEvalInput *input,
|
||||
float *output,
|
||||
const int offset)
|
||||
@@ -143,7 +145,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader_eval_background)(const KernelGlobalsCPU *k
|
||||
}
|
||||
|
||||
void KERNEL_FUNCTION_FULL_NAME(shader_eval_curve_shadow_transparency)(
|
||||
const KernelGlobalsCPU *kg,
|
||||
const ThreadKernelGlobalsCPU *kg,
|
||||
const KernelShaderEvalInput *input,
|
||||
float *output,
|
||||
const int offset)
|
||||
@@ -160,7 +162,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader_eval_curve_shadow_transparency)(
|
||||
*/
|
||||
|
||||
bool KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_convergence_check)(
|
||||
const KernelGlobalsCPU *kg,
|
||||
const ThreadKernelGlobalsCPU *kg,
|
||||
ccl_global float *render_buffer,
|
||||
const int x,
|
||||
const int y,
|
||||
@@ -178,7 +180,7 @@ bool KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_convergence_check)(
|
||||
#endif
|
||||
}
|
||||
|
||||
void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_x)(const KernelGlobalsCPU *kg,
|
||||
void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_x)(const ThreadKernelGlobalsCPU *kg,
|
||||
ccl_global float *render_buffer,
|
||||
const int y,
|
||||
const int start_x,
|
||||
@@ -193,7 +195,7 @@ void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_x)(const KernelGlobalsCP
|
||||
#endif
|
||||
}
|
||||
|
||||
void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_y)(const KernelGlobalsCPU *kg,
|
||||
void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_y)(const ThreadKernelGlobalsCPU *kg,
|
||||
ccl_global float *render_buffer,
|
||||
const int x,
|
||||
const int start_y,
|
||||
@@ -212,7 +214,7 @@ void KERNEL_FUNCTION_FULL_NAME(adaptive_sampling_filter_y)(const KernelGlobalsCP
|
||||
* Cryptomatte.
|
||||
*/
|
||||
|
||||
void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const KernelGlobalsCPU *kg,
|
||||
void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const ThreadKernelGlobalsCPU *kg,
|
||||
ccl_global float *render_buffer,
|
||||
const int pixel_index)
|
||||
{
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
# endif
|
||||
#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 */
|
||||
|
||||
#include "kernel/device/cpu/globals.h"
|
||||
#include "kernel/device/cpu/kernel.h"
|
||||
#define KERNEL_ARCH cpu_avx2
|
||||
#include "kernel/device/cpu/kernel_arch_impl.h"
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
# endif
|
||||
#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE42 */
|
||||
|
||||
#include "kernel/device/cpu/globals.h"
|
||||
#include "kernel/device/cpu/kernel.h"
|
||||
#define KERNEL_ARCH cpu_sse42
|
||||
#include "kernel/device/cpu/kernel_arch_impl.h"
|
||||
|
||||
@@ -76,18 +76,17 @@ void OSLRenderServices::register_closures(OSL::ShadingSystem *ss)
|
||||
/* Surface & Background */
|
||||
|
||||
template<>
|
||||
void osl_eval_nodes<SHADER_TYPE_SURFACE>(const KernelGlobalsCPU *kg,
|
||||
void osl_eval_nodes<SHADER_TYPE_SURFACE>(const ThreadKernelGlobalsCPU *kg,
|
||||
const void *state,
|
||||
ShaderData *sd,
|
||||
const uint32_t path_flag)
|
||||
{
|
||||
/* setup shader globals from shader data */
|
||||
OSLThreadData *tdata = kg->osl_tdata;
|
||||
shaderdata_to_shaderglobals(
|
||||
kg, sd, path_flag, reinterpret_cast<ShaderGlobals *>(&tdata->globals));
|
||||
kg, sd, path_flag, reinterpret_cast<ShaderGlobals *>(&kg->osl.shader_globals));
|
||||
|
||||
/* clear trace data */
|
||||
tdata->tracedata.init = false;
|
||||
kg->osl.tracedata.init = false;
|
||||
|
||||
/* Used by render-services. */
|
||||
sd->osl_globals = kg;
|
||||
@@ -101,30 +100,30 @@ void osl_eval_nodes<SHADER_TYPE_SURFACE>(const KernelGlobalsCPU *kg,
|
||||
}
|
||||
|
||||
/* execute shader for this point */
|
||||
OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
|
||||
OSL::ShaderGlobals *globals = &tdata->globals;
|
||||
OSL::ShadingContext *octx = tdata->context;
|
||||
OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl.ss;
|
||||
OSL::ShaderGlobals *globals = &kg->osl.shader_globals;
|
||||
OSL::ShadingContext *octx = kg->osl.context;
|
||||
const int shader = sd->shader & SHADER_MASK;
|
||||
|
||||
if (sd->object == OBJECT_NONE && sd->lamp == LAMP_NONE) {
|
||||
/* background */
|
||||
if (kg->osl->background_state) {
|
||||
if (kg->osl.globals->background_state) {
|
||||
#if OSL_LIBRARY_VERSION_CODE >= 11304
|
||||
ss->execute(*octx,
|
||||
*(kg->osl->background_state),
|
||||
kg->osl_thread_index,
|
||||
*(kg->osl.globals->background_state),
|
||||
kg->osl.thread_index,
|
||||
0,
|
||||
*globals,
|
||||
nullptr,
|
||||
nullptr);
|
||||
#else
|
||||
ss->execute(octx, *(kg->osl->background_state), *globals);
|
||||
ss->execute(octx, *(kg->osl.globals->background_state), *globals);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* automatic bump shader */
|
||||
if (kg->osl->bump_state[shader]) {
|
||||
if (kg->osl.globals->bump_state[shader]) {
|
||||
/* save state */
|
||||
const float3 P = sd->P;
|
||||
const float dP = sd->dP;
|
||||
@@ -134,12 +133,13 @@ void osl_eval_nodes<SHADER_TYPE_SURFACE>(const KernelGlobalsCPU *kg,
|
||||
/* set state as if undisplaced */
|
||||
if (sd->flag & SD_HAS_DISPLACEMENT) {
|
||||
float data[9];
|
||||
const bool found = kg->osl->services->get_attribute(sd,
|
||||
true,
|
||||
OSLRenderServices::u_empty,
|
||||
TypeVector,
|
||||
OSLRenderServices::u_geom_undisplaced,
|
||||
data);
|
||||
const bool found = kg->osl.globals->services->get_attribute(
|
||||
sd,
|
||||
true,
|
||||
OSLRenderServices::u_empty,
|
||||
TypeVector,
|
||||
OSLRenderServices::u_geom_undisplaced,
|
||||
data);
|
||||
(void)found;
|
||||
assert(found);
|
||||
|
||||
@@ -162,14 +162,14 @@ void osl_eval_nodes<SHADER_TYPE_SURFACE>(const KernelGlobalsCPU *kg,
|
||||
/* execute bump shader */
|
||||
#if OSL_LIBRARY_VERSION_CODE >= 11304
|
||||
ss->execute(*octx,
|
||||
*(kg->osl->bump_state[shader]),
|
||||
kg->osl_thread_index,
|
||||
*(kg->osl.globals->bump_state[shader]),
|
||||
kg->osl.thread_index,
|
||||
0,
|
||||
*globals,
|
||||
nullptr,
|
||||
nullptr);
|
||||
#else
|
||||
ss->execute(octx, *(kg->osl->bump_state[shader]), *globals);
|
||||
ss->execute(octx, *(kg->osl.globals->bump_state[shader]), *globals);
|
||||
#endif
|
||||
|
||||
/* reset state */
|
||||
@@ -182,17 +182,17 @@ void osl_eval_nodes<SHADER_TYPE_SURFACE>(const KernelGlobalsCPU *kg,
|
||||
}
|
||||
|
||||
/* surface shader */
|
||||
if (kg->osl->surface_state[shader]) {
|
||||
if (kg->osl.globals->surface_state[shader]) {
|
||||
#if OSL_LIBRARY_VERSION_CODE >= 11304
|
||||
ss->execute(*octx,
|
||||
*(kg->osl->surface_state[shader]),
|
||||
kg->osl_thread_index,
|
||||
*(kg->osl.globals->surface_state[shader]),
|
||||
kg->osl.thread_index,
|
||||
0,
|
||||
*globals,
|
||||
nullptr,
|
||||
nullptr);
|
||||
#else
|
||||
ss->execute(octx, *(kg->osl->surface_state[shader]), *globals);
|
||||
ss->execute(octx, *(kg->osl.globals->surface_state[shader]), *globals);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@@ -206,18 +206,17 @@ void osl_eval_nodes<SHADER_TYPE_SURFACE>(const KernelGlobalsCPU *kg,
|
||||
/* Volume */
|
||||
|
||||
template<>
|
||||
void osl_eval_nodes<SHADER_TYPE_VOLUME>(const KernelGlobalsCPU *kg,
|
||||
void osl_eval_nodes<SHADER_TYPE_VOLUME>(const ThreadKernelGlobalsCPU *kg,
|
||||
const void *state,
|
||||
ShaderData *sd,
|
||||
const uint32_t path_flag)
|
||||
{
|
||||
/* setup shader globals from shader data */
|
||||
OSLThreadData *tdata = kg->osl_tdata;
|
||||
shaderdata_to_shaderglobals(
|
||||
kg, sd, path_flag, reinterpret_cast<ShaderGlobals *>(&tdata->globals));
|
||||
kg, sd, path_flag, reinterpret_cast<ShaderGlobals *>(&kg->osl.shader_globals));
|
||||
|
||||
/* clear trace data */
|
||||
tdata->tracedata.init = false;
|
||||
kg->osl.tracedata.init = false;
|
||||
|
||||
/* Used by render-services. */
|
||||
sd->osl_globals = kg;
|
||||
@@ -231,22 +230,22 @@ void osl_eval_nodes<SHADER_TYPE_VOLUME>(const KernelGlobalsCPU *kg,
|
||||
}
|
||||
|
||||
/* execute shader */
|
||||
OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
|
||||
OSL::ShaderGlobals *globals = &tdata->globals;
|
||||
OSL::ShadingContext *octx = tdata->context;
|
||||
OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl.ss;
|
||||
OSL::ShaderGlobals *globals = &kg->osl.shader_globals;
|
||||
OSL::ShadingContext *octx = kg->osl.context;
|
||||
const int shader = sd->shader & SHADER_MASK;
|
||||
|
||||
if (kg->osl->volume_state[shader]) {
|
||||
if (kg->osl.globals->volume_state[shader]) {
|
||||
#if OSL_LIBRARY_VERSION_CODE >= 11304
|
||||
ss->execute(*octx,
|
||||
*(kg->osl->volume_state[shader]),
|
||||
kg->osl_thread_index,
|
||||
*(kg->osl.globals->volume_state[shader]),
|
||||
kg->osl.thread_index,
|
||||
0,
|
||||
*globals,
|
||||
nullptr,
|
||||
nullptr);
|
||||
#else
|
||||
ss->execute(octx, *(kg->osl->volume_state[shader]), *globals);
|
||||
ss->execute(octx, *(kg->osl.globals->volume_state[shader]), *globals);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -259,18 +258,17 @@ void osl_eval_nodes<SHADER_TYPE_VOLUME>(const KernelGlobalsCPU *kg,
|
||||
/* Displacement */
|
||||
|
||||
template<>
|
||||
void osl_eval_nodes<SHADER_TYPE_DISPLACEMENT>(const KernelGlobalsCPU *kg,
|
||||
void osl_eval_nodes<SHADER_TYPE_DISPLACEMENT>(const ThreadKernelGlobalsCPU *kg,
|
||||
const void *state,
|
||||
ShaderData *sd,
|
||||
const uint32_t path_flag)
|
||||
{
|
||||
/* setup shader globals from shader data */
|
||||
OSLThreadData *tdata = kg->osl_tdata;
|
||||
shaderdata_to_shaderglobals(
|
||||
kg, sd, path_flag, reinterpret_cast<ShaderGlobals *>(&tdata->globals));
|
||||
kg, sd, path_flag, reinterpret_cast<ShaderGlobals *>(&kg->osl.shader_globals));
|
||||
|
||||
/* clear trace data */
|
||||
tdata->tracedata.init = false;
|
||||
kg->osl.tracedata.init = false;
|
||||
|
||||
/* Used by render-services. */
|
||||
sd->osl_globals = kg;
|
||||
@@ -278,22 +276,22 @@ void osl_eval_nodes<SHADER_TYPE_DISPLACEMENT>(const KernelGlobalsCPU *kg,
|
||||
sd->osl_shadow_path_state = nullptr;
|
||||
|
||||
/* execute shader */
|
||||
OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
|
||||
OSL::ShaderGlobals *globals = &tdata->globals;
|
||||
OSL::ShadingContext *octx = tdata->context;
|
||||
OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl.ss;
|
||||
OSL::ShaderGlobals *globals = &kg->osl.shader_globals;
|
||||
OSL::ShadingContext *octx = kg->osl.context;
|
||||
const int shader = sd->shader & SHADER_MASK;
|
||||
|
||||
if (kg->osl->displacement_state[shader]) {
|
||||
if (kg->osl.globals->displacement_state[shader]) {
|
||||
#if OSL_LIBRARY_VERSION_CODE >= 11304
|
||||
ss->execute(*octx,
|
||||
*(kg->osl->displacement_state[shader]),
|
||||
kg->osl_thread_index,
|
||||
*(kg->osl.globals->displacement_state[shader]),
|
||||
kg->osl.thread_index,
|
||||
0,
|
||||
*globals,
|
||||
nullptr,
|
||||
nullptr);
|
||||
#else
|
||||
ss->execute(octx, *(kg->osl->displacement_state[shader]), *globals);
|
||||
ss->execute(octx, *(kg->osl.globals->displacement_state[shader]), *globals);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -4,57 +4,55 @@
|
||||
|
||||
#include <OSL/oslexec.h>
|
||||
|
||||
#include "kernel/globals.h"
|
||||
#include "kernel/types.h"
|
||||
|
||||
#include "kernel/osl/globals.h"
|
||||
#include "kernel/osl/services.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
void OSLGlobals::thread_init(KernelGlobalsCPU *kg, OSLGlobals *osl_globals, const int thread_index)
|
||||
OSLThreadData::OSLThreadData(OSLGlobals *osl_globals, const int thread_index)
|
||||
: globals(osl_globals), thread_index(thread_index)
|
||||
{
|
||||
/* no osl used? */
|
||||
if (!osl_globals->use) {
|
||||
kg->osl = nullptr;
|
||||
if (globals == nullptr || globals->use == false) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Per thread kernel data init. */
|
||||
kg->osl = osl_globals;
|
||||
ss = globals->ss;
|
||||
|
||||
OSL::ShadingSystem *ss = kg->osl->ss;
|
||||
OSLThreadData *tdata = new OSLThreadData();
|
||||
memset((void *)&shader_globals, 0, sizeof(shader_globals));
|
||||
shader_globals.tracedata = &tracedata;
|
||||
|
||||
memset((void *)&tdata->globals, 0, sizeof(OSL::ShaderGlobals));
|
||||
tdata->globals.tracedata = &tdata->tracedata;
|
||||
tdata->osl_thread_info = ss->create_thread_info();
|
||||
tdata->context = ss->get_context(tdata->osl_thread_info);
|
||||
|
||||
tdata->oiio_thread_info = osl_globals->ts->get_perthread_info();
|
||||
|
||||
kg->osl_ss = (OSLShadingSystem *)ss;
|
||||
kg->osl_tdata = tdata;
|
||||
kg->osl_thread_index = thread_index;
|
||||
osl_thread_info = ss->create_thread_info();
|
||||
context = ss->get_context(osl_thread_info);
|
||||
oiio_thread_info = globals->ts->get_perthread_info();
|
||||
}
|
||||
|
||||
void OSLGlobals::thread_free(KernelGlobalsCPU *kg)
|
||||
OSLThreadData::~OSLThreadData()
|
||||
{
|
||||
if (!kg->osl) {
|
||||
return;
|
||||
if (context) {
|
||||
ss->release_context(context);
|
||||
}
|
||||
if (osl_thread_info) {
|
||||
ss->destroy_thread_info(osl_thread_info);
|
||||
}
|
||||
}
|
||||
|
||||
OSL::ShadingSystem *ss = (OSL::ShadingSystem *)kg->osl_ss;
|
||||
OSLThreadData *tdata = kg->osl_tdata;
|
||||
ss->release_context(tdata->context);
|
||||
OSLThreadData::OSLThreadData(OSLThreadData &&other) noexcept
|
||||
: globals(other.globals),
|
||||
ss(other.ss),
|
||||
thread_index(other.thread_index),
|
||||
shader_globals(other.shader_globals),
|
||||
tracedata(other.tracedata),
|
||||
osl_thread_info(other.osl_thread_info),
|
||||
context(other.context),
|
||||
oiio_thread_info(other.oiio_thread_info)
|
||||
{
|
||||
shader_globals.tracedata = &tracedata;
|
||||
|
||||
ss->destroy_thread_info(tdata->osl_thread_info);
|
||||
|
||||
delete tdata;
|
||||
|
||||
kg->osl = nullptr;
|
||||
kg->osl_ss = nullptr;
|
||||
kg->osl_tdata = nullptr;
|
||||
memset((void *)&other.shader_globals, 0, sizeof(other.shader_globals));
|
||||
memset((void *)&other.tracedata, 0, sizeof(other.tracedata));
|
||||
other.thread_index = -1;
|
||||
other.context = nullptr;
|
||||
other.osl_thread_info = nullptr;
|
||||
other.oiio_thread_info = nullptr;
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
@@ -23,6 +23,7 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
class OSLRenderServices;
|
||||
class ColorSpaceProcessor;
|
||||
struct ThreadKernelGlobalsCPU;
|
||||
|
||||
/* OSL Globals
|
||||
*
|
||||
@@ -39,12 +40,6 @@ struct OSLGlobals {
|
||||
use = false;
|
||||
}
|
||||
|
||||
/* per thread data */
|
||||
static void thread_init(struct KernelGlobalsCPU *kg,
|
||||
OSLGlobals *osl_globals,
|
||||
const int thread_index);
|
||||
static void thread_free(struct KernelGlobalsCPU *kg);
|
||||
|
||||
bool use;
|
||||
|
||||
/* shading system */
|
||||
@@ -78,11 +73,27 @@ struct OSLTraceData {
|
||||
|
||||
/* thread key for thread specific data lookup */
|
||||
struct OSLThreadData {
|
||||
OSL::ShaderGlobals globals;
|
||||
OSL::PerThreadInfo *osl_thread_info;
|
||||
OSLTraceData tracedata;
|
||||
OSL::ShadingContext *context;
|
||||
OIIO::TextureSystem::Perthread *oiio_thread_info;
|
||||
/* Global Data */
|
||||
OSLGlobals *globals = nullptr;
|
||||
OSL::ShadingSystem *ss = nullptr;
|
||||
|
||||
/* Per-thread data. */
|
||||
int thread_index = -1;
|
||||
|
||||
mutable OSL::ShaderGlobals shader_globals;
|
||||
mutable OSLTraceData tracedata;
|
||||
|
||||
OSL::PerThreadInfo *osl_thread_info = nullptr;
|
||||
OSL::ShadingContext *context = nullptr;
|
||||
OIIO::TextureSystem::Perthread *oiio_thread_info = nullptr;
|
||||
|
||||
OSLThreadData(OSLGlobals *globals, const int thread_index);
|
||||
~OSLThreadData();
|
||||
|
||||
OSLThreadData(OSLThreadData &other) = delete;
|
||||
OSLThreadData(OSLThreadData &&other) noexcept;
|
||||
OSLThreadData &operator=(const OSLThreadData &other) = delete;
|
||||
OSLThreadData &operator=(OSLThreadData &&other) = delete;
|
||||
};
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
@@ -172,7 +172,7 @@ ccl_device void flatten_closure_tree(KernelGlobals kg,
|
||||
#ifndef __KERNEL_GPU__
|
||||
|
||||
template<ShaderType type>
|
||||
void osl_eval_nodes(const KernelGlobalsCPU *kg,
|
||||
void osl_eval_nodes(const ThreadKernelGlobalsCPU *kg,
|
||||
const void *state,
|
||||
ShaderData *sd,
|
||||
uint32_t path_flag);
|
||||
|
||||
@@ -148,7 +148,7 @@ bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg,
|
||||
* a concept of shader space, so we just use object space for both. */
|
||||
if (xform) {
|
||||
const ShaderData *sd = (const ShaderData *)xform;
|
||||
const KernelGlobalsCPU *kg = sd->osl_globals;
|
||||
const ThreadKernelGlobalsCPU *kg = sd->osl_globals;
|
||||
const int object = sd->object;
|
||||
|
||||
if (object != OBJECT_NONE) {
|
||||
@@ -188,7 +188,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg,
|
||||
* a concept of shader space, so we just use object space for both. */
|
||||
if (xform) {
|
||||
const ShaderData *sd = (const ShaderData *)xform;
|
||||
const KernelGlobalsCPU *kg = sd->osl_globals;
|
||||
const ThreadKernelGlobalsCPU *kg = sd->osl_globals;
|
||||
const int object = sd->object;
|
||||
|
||||
if (object != OBJECT_NONE) {
|
||||
@@ -225,7 +225,7 @@ bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg,
|
||||
const float time)
|
||||
{
|
||||
ShaderData *sd = (ShaderData *)(sg->renderstate);
|
||||
const KernelGlobalsCPU *kg = sd->osl_globals;
|
||||
const ThreadKernelGlobalsCPU *kg = sd->osl_globals;
|
||||
|
||||
if (from == u_ndc) {
|
||||
copy_matrix(result, kernel_data.cam.ndctoworld);
|
||||
@@ -257,7 +257,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg,
|
||||
const float time)
|
||||
{
|
||||
ShaderData *sd = (ShaderData *)(sg->renderstate);
|
||||
const KernelGlobalsCPU *kg = sd->osl_globals;
|
||||
const ThreadKernelGlobalsCPU *kg = sd->osl_globals;
|
||||
|
||||
if (to == u_ndc) {
|
||||
copy_matrix(result, kernel_data.cam.worldtondc);
|
||||
@@ -291,7 +291,7 @@ bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg,
|
||||
* a concept of shader space, so we just use object space for both. */
|
||||
if (xform) {
|
||||
const ShaderData *sd = (const ShaderData *)xform;
|
||||
const KernelGlobalsCPU *kg = sd->osl_globals;
|
||||
const ThreadKernelGlobalsCPU *kg = sd->osl_globals;
|
||||
const int object = sd->object;
|
||||
|
||||
if (object != OBJECT_NONE) {
|
||||
@@ -319,7 +319,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg,
|
||||
* a concept of shader space, so we just use object space for both. */
|
||||
if (xform) {
|
||||
const ShaderData *sd = (const ShaderData *)xform;
|
||||
const KernelGlobalsCPU *kg = sd->osl_globals;
|
||||
const ThreadKernelGlobalsCPU *kg = sd->osl_globals;
|
||||
const int object = sd->object;
|
||||
|
||||
if (object != OBJECT_NONE) {
|
||||
@@ -344,7 +344,7 @@ bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg,
|
||||
OSLUStringHash from)
|
||||
{
|
||||
ShaderData *sd = (ShaderData *)(sg->renderstate);
|
||||
const KernelGlobalsCPU *kg = sd->osl_globals;
|
||||
const ThreadKernelGlobalsCPU *kg = sd->osl_globals;
|
||||
|
||||
if (from == u_ndc) {
|
||||
copy_matrix(result, kernel_data.cam.ndctoworld);
|
||||
@@ -371,7 +371,7 @@ bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg,
|
||||
OSLUStringHash to)
|
||||
{
|
||||
ShaderData *sd = (ShaderData *)(sg->renderstate);
|
||||
const KernelGlobalsCPU *kg = sd->osl_globals;
|
||||
const ThreadKernelGlobalsCPU *kg = sd->osl_globals;
|
||||
|
||||
if (to == u_ndc) {
|
||||
copy_matrix(result, kernel_data.cam.worldtondc);
|
||||
@@ -727,7 +727,7 @@ static bool set_attribute_matrix(const Transform &tfm, const TypeDesc type, void
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool get_object_attribute(const KernelGlobalsCPU *kg,
|
||||
static bool get_object_attribute(const ThreadKernelGlobalsCPU *kg,
|
||||
ShaderData *sd,
|
||||
const AttributeDescriptor &desc,
|
||||
const TypeDesc &type,
|
||||
@@ -803,7 +803,7 @@ static bool get_object_attribute(const KernelGlobalsCPU *kg,
|
||||
return false;
|
||||
}
|
||||
|
||||
bool OSLRenderServices::get_object_standard_attribute(const KernelGlobalsCPU *kg,
|
||||
bool OSLRenderServices::get_object_standard_attribute(const ThreadKernelGlobalsCPU *kg,
|
||||
ShaderData *sd,
|
||||
OSLUStringHash name,
|
||||
const TypeDesc type,
|
||||
@@ -924,7 +924,7 @@ bool OSLRenderServices::get_object_standard_attribute(const KernelGlobalsCPU *kg
|
||||
return set_attribute_float3_3(P, type, derivatives, val);
|
||||
}
|
||||
if (name == u_geom_name) {
|
||||
const ustring object_name = kg->osl->object_names[sd->object];
|
||||
const ustring object_name = kg->osl.globals->object_names[sd->object];
|
||||
return set_attribute_string(object_name, type, derivatives, val);
|
||||
}
|
||||
if (name == u_is_smooth) {
|
||||
@@ -979,7 +979,7 @@ bool OSLRenderServices::get_object_standard_attribute(const KernelGlobalsCPU *kg
|
||||
return get_background_attribute(kg, sd, name, type, derivatives, val);
|
||||
}
|
||||
|
||||
bool OSLRenderServices::get_background_attribute(const KernelGlobalsCPU *kg,
|
||||
bool OSLRenderServices::get_background_attribute(const ThreadKernelGlobalsCPU *kg,
|
||||
ShaderData *sd,
|
||||
OSLUStringHash name,
|
||||
const TypeDesc type,
|
||||
@@ -1038,8 +1038,7 @@ bool OSLRenderServices::get_background_attribute(const KernelGlobalsCPU *kg,
|
||||
}
|
||||
if (name == u_ndc) {
|
||||
/* NDC coordinates with special exception for orthographic projection. */
|
||||
OSLThreadData *tdata = kg->osl_tdata;
|
||||
OSL::ShaderGlobals *globals = &tdata->globals;
|
||||
OSL::ShaderGlobals *globals = &kg->osl.shader_globals;
|
||||
float3 ndc[3];
|
||||
|
||||
if ((globals->raytype & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE &&
|
||||
@@ -1090,14 +1089,15 @@ bool OSLRenderServices::get_attribute(ShaderData *sd,
|
||||
OSLUStringHash name,
|
||||
void *val)
|
||||
{
|
||||
const KernelGlobalsCPU *kg = sd->osl_globals;
|
||||
const ThreadKernelGlobalsCPU *kg = sd->osl_globals;
|
||||
int object;
|
||||
|
||||
/* lookup of attribute on another object */
|
||||
if (object_name != u_empty) {
|
||||
const OSLGlobals::ObjectNameMap::iterator it = kg->osl->object_name_map.find(object_name);
|
||||
const OSLGlobals::ObjectNameMap::iterator it = kg->osl.globals->object_name_map.find(
|
||||
object_name);
|
||||
|
||||
if (it == kg->osl->object_name_map.end()) {
|
||||
if (it == kg->osl.globals->object_name_map.end()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1246,7 +1246,7 @@ bool OSLRenderServices::texture(OSLUStringHash filename,
|
||||
OSLTextureHandle *handle = (OSLTextureHandle *)texture_handle;
|
||||
const OSLTextureHandle::Type texture_type = (handle) ? handle->type : OSLTextureHandle::OIIO;
|
||||
ShaderData *sd = (ShaderData *)(sg->renderstate);
|
||||
KernelGlobals kernel_globals = sd->osl_globals;
|
||||
const ThreadKernelGlobalsCPU *kernel_globals = sd->osl_globals;
|
||||
bool status = false;
|
||||
|
||||
switch (texture_type) {
|
||||
@@ -1351,8 +1351,7 @@ bool OSLRenderServices::texture(OSLUStringHash filename,
|
||||
|
||||
if (handle && handle->oiio_handle) {
|
||||
if (texture_thread_info == nullptr) {
|
||||
OSLThreadData *tdata = kernel_globals->osl_tdata;
|
||||
texture_thread_info = tdata->oiio_thread_info;
|
||||
texture_thread_info = kernel_globals->osl.oiio_thread_info;
|
||||
}
|
||||
|
||||
status = ts->texture(handle->oiio_handle,
|
||||
@@ -1460,9 +1459,8 @@ bool OSLRenderServices::texture3d(OSLUStringHash filename,
|
||||
if (handle && handle->oiio_handle) {
|
||||
if (texture_thread_info == nullptr) {
|
||||
ShaderData *sd = (ShaderData *)(sg->renderstate);
|
||||
KernelGlobals kernel_globals = sd->osl_globals;
|
||||
OSLThreadData *tdata = kernel_globals->osl_tdata;
|
||||
texture_thread_info = tdata->oiio_thread_info;
|
||||
const ThreadKernelGlobalsCPU *kernel_globals = sd->osl_globals;
|
||||
texture_thread_info = kernel_globals->osl.oiio_thread_info;
|
||||
}
|
||||
|
||||
status = ts->texture3d(handle->oiio_handle,
|
||||
@@ -1546,9 +1544,8 @@ bool OSLRenderServices::environment(OSLUStringHash filename,
|
||||
if (handle && handle->oiio_handle) {
|
||||
if (thread_info == nullptr) {
|
||||
ShaderData *sd = (ShaderData *)(sg->renderstate);
|
||||
KernelGlobals kernel_globals = sd->osl_globals;
|
||||
OSLThreadData *tdata = kernel_globals->osl_tdata;
|
||||
thread_info = tdata->oiio_thread_info;
|
||||
const ThreadKernelGlobalsCPU *kernel_globals = sd->osl_globals;
|
||||
thread_info = kernel_globals->osl.oiio_thread_info;
|
||||
}
|
||||
|
||||
status = ts->environment(handle->oiio_handle,
|
||||
@@ -1726,7 +1723,7 @@ bool OSLRenderServices::trace(TraceOpt &options,
|
||||
tracedata->hit = false;
|
||||
tracedata->sd.osl_globals = sd->osl_globals;
|
||||
|
||||
const KernelGlobalsCPU *kg = sd->osl_globals;
|
||||
const ThreadKernelGlobalsCPU *kg = sd->osl_globals;
|
||||
|
||||
/* Can't ray-trace from shaders like displacement, before BVH exists. */
|
||||
if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) {
|
||||
@@ -1759,7 +1756,7 @@ bool OSLRenderServices::getmessage(OSL::ShaderGlobals *sg,
|
||||
}
|
||||
|
||||
ShaderData *sd = &tracedata->sd;
|
||||
const KernelGlobalsCPU *kg = sd->osl_globals;
|
||||
const ThreadKernelGlobalsCPU *kg = sd->osl_globals;
|
||||
|
||||
if (!tracedata->setup) {
|
||||
/* lazy shader data setup */
|
||||
|
||||
@@ -30,7 +30,7 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
class Scene;
|
||||
struct ShaderData;
|
||||
struct KernelGlobalsCPU;
|
||||
struct ThreadKernelGlobalsCPU;
|
||||
|
||||
/* OSL Texture Handle
|
||||
*
|
||||
@@ -276,13 +276,13 @@ class OSLRenderServices : public OSL::RendererServices {
|
||||
void *data) override;
|
||||
#endif
|
||||
|
||||
static bool get_background_attribute(const KernelGlobalsCPU *kg,
|
||||
static bool get_background_attribute(const ThreadKernelGlobalsCPU *kg,
|
||||
ShaderData *sd,
|
||||
OSLUStringHash name,
|
||||
const TypeDesc type,
|
||||
bool derivatives,
|
||||
void *val);
|
||||
static bool get_object_standard_attribute(const KernelGlobalsCPU *kg,
|
||||
static bool get_object_standard_attribute(const ThreadKernelGlobalsCPU *kg,
|
||||
ShaderData *sd,
|
||||
OSLUStringHash name,
|
||||
const TypeDesc type,
|
||||
|
||||
@@ -1191,7 +1191,7 @@ struct ccl_align(16) ShaderData
|
||||
# ifdef __KERNEL_GPU__
|
||||
ccl_private uint8_t *osl_closure_pool;
|
||||
# else
|
||||
const struct KernelGlobalsCPU *osl_globals;
|
||||
const struct ThreadKernelGlobalsCPU *osl_globals;
|
||||
const struct IntegratorStateCPU *osl_path_state;
|
||||
const struct IntegratorShadowStateCPU *osl_shadow_path_state;
|
||||
# endif
|
||||
|
||||
@@ -141,7 +141,7 @@ void OSLShaderManager::device_update_specific(Device *device,
|
||||
/* collect shader groups from all shaders */
|
||||
for (Shader *shader : scene->shaders) {
|
||||
device->foreach_device([shader, background_shader](Device *sub_device) {
|
||||
OSLGlobals *og = (OSLGlobals *)sub_device->get_cpu_osl_memory();
|
||||
OSLGlobals *og = sub_device->get_cpu_osl_memory();
|
||||
|
||||
/* push state to array for lookup */
|
||||
og->surface_state.push_back(shader->osl_surface_ref);
|
||||
@@ -161,7 +161,7 @@ void OSLShaderManager::device_update_specific(Device *device,
|
||||
|
||||
/* setup shader engine */
|
||||
device->foreach_device([](Device *sub_device) {
|
||||
OSLGlobals *og = (OSLGlobals *)sub_device->get_cpu_osl_memory();
|
||||
OSLGlobals *og = sub_device->get_cpu_osl_memory();
|
||||
OSL::ShadingSystem *ss = ss_shared[sub_device->info.type];
|
||||
|
||||
og->ss = ss;
|
||||
@@ -228,7 +228,7 @@ void OSLShaderManager::device_free(Device *device, DeviceScene *dscene, Scene *s
|
||||
|
||||
/* clear shader engine */
|
||||
device->foreach_device([](Device *sub_device) {
|
||||
OSLGlobals *og = (OSLGlobals *)sub_device->get_cpu_osl_memory();
|
||||
OSLGlobals *og = sub_device->get_cpu_osl_memory();
|
||||
|
||||
og->use = false;
|
||||
og->ss = nullptr;
|
||||
@@ -712,7 +712,7 @@ void OSLShaderManager::osl_image_slots(Device *device,
|
||||
{
|
||||
set<OSLRenderServices *> services_shared;
|
||||
device->foreach_device([&services_shared](Device *sub_device) {
|
||||
OSLGlobals *og = (OSLGlobals *)sub_device->get_cpu_osl_memory();
|
||||
OSLGlobals *og = sub_device->get_cpu_osl_memory();
|
||||
services_shared.insert(og->services);
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user