Cycles: regular rendering now works tiled, and supports save buffers to

save memory during render and cache render results.


Implementation notes:

In the render engine API it's now possible to get the render result for
one render layer only, and retrieve the expected tile size in case save
buffers is used. This is needed because EXR expects tiles with particular
size and coordinates.

The EXR temporary files are now also separated per layer, since Cycles
can't give the full render result for all render layers, and EXR doesn't
support writing parts of tiles.

In Cycles internally the handling of render buffers and multi GPU
rendering in particular changed quite a bit, and could use a bit more
refactoring to make things more consistent and simple.
This commit is contained in:
Brecht Van Lommel
2012-06-28 10:34:38 +00:00
parent 53bb66e291
commit 7c8f82a174
30 changed files with 751 additions and 393 deletions

View File

@@ -197,8 +197,16 @@ class CyclesRender_PT_performance(CyclesButtonsPanel, Panel):
sub = col.column(align=True)
sub.label(text="Tiles:")
sub.prop(cscene, "debug_tile_size")
sub.prop(cscene, "debug_min_size")
sub.prop(rd, "parts_x", text="X")
sub.prop(rd, "parts_y", text="Y")
subsub = sub.column()
subsub.enabled = not rd.use_border
subsub.prop(rd, "use_save_buffers")
#sub.prop(cscene, "debug_tile_size")
#sub.prop(cscene, "debug_min_size")
col = split.column()

View File

@@ -42,8 +42,7 @@ CCL_NAMESPACE_BEGIN
BlenderSession::BlenderSession(BL::RenderEngine b_engine_, BL::UserPreferences b_userpref_,
BL::BlendData b_data_, BL::Scene b_scene_)
: b_engine(b_engine_), b_userpref(b_userpref_), b_data(b_data_), b_scene(b_scene_),
b_v3d(PointerRNA_NULL), b_rv3d(PointerRNA_NULL),
b_rr(PointerRNA_NULL), b_rlay(PointerRNA_NULL)
b_v3d(PointerRNA_NULL), b_rv3d(PointerRNA_NULL)
{
/* offline render */
BL::RenderSettings r = b_scene.render();
@@ -60,7 +59,7 @@ BlenderSession::BlenderSession(BL::RenderEngine b_engine_, BL::UserPreferences b
BL::BlendData b_data_, BL::Scene b_scene_,
BL::SpaceView3D b_v3d_, BL::RegionView3D b_rv3d_, int width_, int height_)
: b_engine(b_engine_), b_userpref(b_userpref_), b_data(b_data_), b_scene(b_scene_),
b_v3d(b_v3d_), b_rv3d(b_rv3d_), b_rr(PointerRNA_NULL), b_rlay(PointerRNA_NULL)
b_v3d(b_v3d_), b_rv3d(b_rv3d_)
{
/* 3d view render */
width = width_;
@@ -80,7 +79,7 @@ BlenderSession::~BlenderSession()
void BlenderSession::create_session()
{
SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
SessionParams session_params = BlenderSync::get_session_params(b_userpref, b_scene, background);
SessionParams session_params = BlenderSync::get_session_params(b_engine, b_userpref, b_scene, background);
/* reset status/progress */
last_status = "";
@@ -90,7 +89,7 @@ void BlenderSession::create_session()
scene = new Scene(scene_params);
/* create sync */
sync = new BlenderSync(b_data, b_scene, scene, !background);
sync = new BlenderSync(b_engine, b_data, b_scene, scene, !background);
sync->sync_data(b_v3d, b_engine.camera_override());
if(b_rv3d)
@@ -177,35 +176,76 @@ static PassType get_pass_type(BL::RenderPass b_pass)
return PASS_NONE;
}
void BlenderSession::render()
static BL::RenderResult begin_render_result(BL::RenderEngine b_engine, int x, int y, int w, int h, const char *layername)
{
/* get buffer parameters */
SessionParams session_params = BlenderSync::get_session_params(b_userpref, b_scene, background);
BufferParams buffer_params = BlenderSync::get_buffer_params(b_scene, scene->camera, width, height);
int w = buffer_params.width, h = buffer_params.height;
/* create render result */
RenderResult *rrp = RE_engine_begin_result((RenderEngine*)b_engine.ptr.data, 0, 0, w, h);
RenderResult *rrp = RE_engine_begin_result((RenderEngine*)b_engine.ptr.data, x, y, w, h, layername);
PointerRNA rrptr;
RNA_pointer_create(NULL, &RNA_RenderResult, rrp, &rrptr);
b_rr = BL::RenderResult(rrptr);
return BL::RenderResult(rrptr);
}
static void end_render_result(BL::RenderEngine b_engine, BL::RenderResult b_rr, bool cancel = false)
{
RE_engine_end_result((RenderEngine*)b_engine.ptr.data, (RenderResult*)b_rr.ptr.data, (int)cancel);
}
void BlenderSession::write_render_buffers(RenderBuffers *buffers)
{
BufferParams& params = buffers->params;
int x = params.full_x - session->tile_manager.params.full_x;
int y = params.full_y - session->tile_manager.params.full_y;
int w = params.width;
int h = params.height;
/* get render result */
BL::RenderResult b_rr = begin_render_result(b_engine, x, y, w, h, b_rlay_name.c_str());
BL::RenderResult::layers_iterator b_single_rlay;
b_rr.layers.begin(b_single_rlay);
BL::RenderLayer b_rlay = *b_single_rlay;
/* write result */
write_render_result(b_rr, b_rlay, buffers);
end_render_result(b_engine, b_rr);
}
void BlenderSession::render()
{
/* set callback to write out render results */
session->write_render_buffers_cb = function_bind(&BlenderSession::write_render_buffers, this, _1);
/* get buffer parameters */
SessionParams session_params = BlenderSync::get_session_params(b_engine, b_userpref, b_scene, background);
BufferParams buffer_params = BlenderSync::get_buffer_params(b_scene, scene->camera, width, height);
BL::RenderSettings r = b_scene.render();
BL::RenderResult::layers_iterator b_iter;
BL::RenderLayers b_rr_layers(r.ptr);
/* render each layer */
for(b_rr.layers.begin(b_iter); b_iter != b_rr.layers.end(); ++b_iter) {
/* set layer */
b_rlay = *b_iter;
BL::RenderSettings r = b_scene.render();
BL::RenderSettings::layers_iterator b_iter;
for(r.layers.begin(b_iter); b_iter != r.layers.end(); ++b_iter) {
b_rlay_name = b_iter->name();
/* temporary render result to find needed passes */
BL::RenderResult b_rr = begin_render_result(b_engine, 0, 0, 1, 1, b_rlay_name.c_str());
BL::RenderResult::layers_iterator b_single_rlay;
b_rr.layers.begin(b_single_rlay);
/* layer will be missing if it was disabled in the UI */
if(b_single_rlay == b_rr.layers.end()) {
end_render_result(b_engine, b_rr, true);
continue;
}
BL::RenderLayer b_rlay = *b_single_rlay;
/* add passes */
vector<Pass> passes;
Pass::add(PASS_COMBINED, passes);
if(session_params.device.advanced_shading) {
/* loop over passes */
BL::RenderLayer::passes_iterator b_pass_iter;
for(b_rlay.passes.begin(b_pass_iter); b_pass_iter != b_rlay.passes.end(); ++b_pass_iter) {
BL::RenderPass b_pass(*b_pass_iter);
PassType pass_type = get_pass_type(b_pass);
@@ -217,13 +257,16 @@ void BlenderSession::render()
}
}
/* free result without merging */
end_render_result(b_engine, b_rr, true);
buffer_params.passes = passes;
scene->film->tag_passes_update(scene, passes);
scene->film->tag_update(scene);
scene->integrator->tag_update(scene);
/* update scene */
sync->sync_data(b_v3d, b_engine.camera_override(), b_iter->name().c_str());
sync->sync_data(b_v3d, b_engine.camera_override(), b_rlay_name.c_str());
/* update session */
int samples = sync->get_layer_samples();
@@ -235,20 +278,14 @@ void BlenderSession::render()
if(session->progress.get_cancel())
break;
/* write result */
write_render_result();
}
/* delete render result */
RE_engine_end_result((RenderEngine*)b_engine.ptr.data, (RenderResult*)b_rr.ptr.data);
/* clear callback */
session->write_render_buffers_cb = NULL;
}
void BlenderSession::write_render_result()
void BlenderSession::write_render_result(BL::RenderResult b_rr, BL::RenderLayer b_rlay, RenderBuffers *buffers)
{
/* get state */
RenderBuffers *buffers = session->buffers;
/* copy data from device */
if(!buffers->copy_from_device())
return;
@@ -289,7 +326,7 @@ void BlenderSession::synchronize()
{
/* on session/scene parameter changes, we recreate session entirely */
SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
SessionParams session_params = BlenderSync::get_session_params(b_userpref, b_scene, background);
SessionParams session_params = BlenderSync::get_session_params(b_engine, b_userpref, b_scene, background);
if(session->params.modified(session_params) ||
scene->params.modified(scene_params))
@@ -364,7 +401,7 @@ bool BlenderSession::draw(int w, int h)
/* reset if requested */
if(reset) {
SessionParams session_params = BlenderSync::get_session_params(b_userpref, b_scene, background);
SessionParams session_params = BlenderSync::get_session_params(b_engine, b_userpref, b_scene, background);
BufferParams buffer_params = BlenderSync::get_buffer_params(b_scene, scene->camera, w, h);
session->reset(buffer_params, session_params.samples);
@@ -435,7 +472,6 @@ void BlenderSession::tag_redraw()
/* offline render, redraw if timeout passed */
if(time_dt() - last_redraw_time > 1.0) {
write_render_result();
engine_tag_redraw((RenderEngine*)b_engine.ptr.data);
last_redraw_time = time_dt();
}

View File

@@ -29,6 +29,7 @@ CCL_NAMESPACE_BEGIN
class Scene;
class Session;
class RenderBuffers;
class BlenderSession {
public:
@@ -46,7 +47,8 @@ public:
/* offline render */
void render();
void write_render_result();
void write_render_result(BL::RenderResult b_rr, BL::RenderLayer b_rlay, RenderBuffers *buffers);
void write_render_buffers(RenderBuffers *buffers);
/* interactive updates */
void synchronize();
@@ -72,8 +74,7 @@ public:
BL::Scene b_scene;
BL::SpaceView3D b_v3d;
BL::RegionView3D b_rv3d;
BL::RenderResult b_rr;
BL::RenderLayer b_rlay;
string b_rlay_name;
string last_status;
float last_progress;

View File

@@ -40,8 +40,9 @@ CCL_NAMESPACE_BEGIN
/* Constructor */
BlenderSync::BlenderSync(BL::BlendData b_data_, BL::Scene b_scene_, Scene *scene_, bool preview_)
: b_data(b_data_), b_scene(b_scene_),
BlenderSync::BlenderSync(BL::RenderEngine b_engine_, BL::BlendData b_data_, BL::Scene b_scene_, Scene *scene_, bool preview_)
: b_engine(b_engine_),
b_data(b_data_), b_scene(b_scene_),
shader_map(&scene_->shaders),
object_map(&scene_->objects),
mesh_map(&scene_->meshes),
@@ -284,7 +285,7 @@ bool BlenderSync::get_session_pause(BL::Scene b_scene, bool background)
return (background)? false: get_boolean(cscene, "preview_pause");
}
SessionParams BlenderSync::get_session_params(BL::UserPreferences b_userpref, BL::Scene b_scene, bool background)
SessionParams BlenderSync::get_session_params(BL::RenderEngine b_engine, BL::UserPreferences b_userpref, BL::Scene b_scene, bool background)
{
SessionParams params;
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
@@ -338,25 +339,36 @@ SessionParams BlenderSync::get_session_params(BL::UserPreferences b_userpref, BL
}
}
/* tiles */
int tile_x = b_engine.tile_x();
int tile_y = b_engine.tile_y();
if(tile_x == 0 || tile_y == 0) {
tile_x = get_int(cscene, "debug_tile_size");
tile_y = tile_x;
params.tile_size = make_int2(tile_x, tile_y);
params.min_size = get_int(cscene, "debug_min_size");
}
else {
params.tile_size = make_int2(tile_x, tile_y);
params.min_size = min(tile_x, tile_y);
}
/* other parameters */
params.threads = b_scene.render().threads();
params.tile_size = get_int(cscene, "debug_tile_size");
params.min_size = get_int(cscene, "debug_min_size");
params.cancel_timeout = get_float(cscene, "debug_cancel_timeout");
params.reset_timeout = get_float(cscene, "debug_reset_timeout");
params.text_timeout = get_float(cscene, "debug_text_timeout");
if(background) {
params.progressive = true;
params.progressive = false;
params.min_size = INT_MAX;
}
else
params.progressive = true;
/* todo: multi device only works with single tiles now */
if(params.device.type == DEVICE_MULTI)
params.tile_size = INT_MAX;
return params;
}

View File

@@ -49,7 +49,7 @@ class ShaderNode;
class BlenderSync {
public:
BlenderSync(BL::BlendData b_data, BL::Scene b_scene, Scene *scene_, bool preview_);
BlenderSync(BL::RenderEngine b_engine_, BL::BlendData b_data, BL::Scene b_scene, Scene *scene_, bool preview_);
~BlenderSync();
/* sync */
@@ -61,7 +61,7 @@ public:
/* get parameters */
static SceneParams get_scene_params(BL::Scene b_scene, bool background);
static SessionParams get_session_params(BL::UserPreferences b_userpref, BL::Scene b_scene, bool background);
static SessionParams get_session_params(BL::RenderEngine b_engine, BL::UserPreferences b_userpref, BL::Scene b_scene, bool background);
static bool get_session_pause(BL::Scene b_scene, bool background);
static BufferParams get_buffer_params(BL::Scene b_scene, Camera *cam, int width, int height);
@@ -96,6 +96,7 @@ private:
int object_count_particles(BL::Object b_ob);
/* variables */
BL::RenderEngine b_engine;
BL::BlendData b_data;
BL::Scene b_scene;

View File

@@ -40,9 +40,9 @@ void rna_Object_create_duplilist(void *ob, void *reports, void *sce);
void rna_Object_free_duplilist(void *ob, void *reports);
void rna_RenderLayer_rect_set(PointerRNA *ptr, const float *values);
void rna_RenderPass_rect_set(PointerRNA *ptr, const float *values);
struct RenderResult *RE_engine_begin_result(struct RenderEngine *engine, int x, int y, int w, int h);
struct RenderResult *RE_engine_begin_result(struct RenderEngine *engine, int x, int y, int w, int h, const char *layername);
void RE_engine_update_result(struct RenderEngine *engine, struct RenderResult *result);
void RE_engine_end_result(struct RenderEngine *engine, struct RenderResult *result);
void RE_engine_end_result(struct RenderEngine *engine, struct RenderResult *result, int cancel);
int RE_engine_test_break(struct RenderEngine *engine);
void RE_engine_update_stats(struct RenderEngine *engine, const char *stats, const char *info);
void RE_engine_update_progress(struct RenderEngine *engine, float progress);

View File

@@ -17,6 +17,7 @@ set(SRC
device_multi.cpp
device_network.cpp
device_opencl.cpp
device_task.cpp
)
set(SRC_HEADERS
@@ -24,6 +25,7 @@ set(SRC_HEADERS
device_memory.h
device_intern.h
device_network.h
device_task.h
)
add_definitions(-DGLEW_STATIC)

View File

@@ -33,65 +33,6 @@
CCL_NAMESPACE_BEGIN
/* Device Task */
DeviceTask::DeviceTask(Type type_)
: type(type_), x(0), y(0), w(0), h(0), rng_state(0), rgba(0), buffer(0),
sample(0), resolution(0),
shader_input(0), shader_output(0),
shader_eval_type(0), shader_x(0), shader_w(0)
{
}
void DeviceTask::split_max_size(list<DeviceTask>& tasks, int max_size)
{
int num;
if(type == SHADER) {
num = (shader_w + max_size - 1)/max_size;
}
else {
max_size = max(1, max_size/w);
num = (h + max_size - 1)/max_size;
}
split(tasks, num);
}
void DeviceTask::split(list<DeviceTask>& tasks, int num)
{
if(type == SHADER) {
num = min(shader_w, num);
for(int i = 0; i < num; i++) {
int tx = shader_x + (shader_w/num)*i;
int tw = (i == num-1)? shader_w - i*(shader_w/num): shader_w/num;
DeviceTask task = *this;
task.shader_x = tx;
task.shader_w = tw;
tasks.push_back(task);
}
}
else {
num = min(h, num);
for(int i = 0; i < num; i++) {
int ty = y + (h/num)*i;
int th = (i == num-1)? h - i*(h/num): h/num;
DeviceTask task = *this;
task.y = ty;
task.h = th;
tasks.push_back(task);
}
}
}
/* Device */
void Device::pixels_alloc(device_memory& mem)

View File

@@ -22,10 +22,10 @@
#include <stdlib.h>
#include "device_memory.h"
#include "device_task.h"
#include "util_list.h"
#include "util_string.h"
#include "util_task.h"
#include "util_thread.h"
#include "util_types.h"
#include "util_vector.h"
@@ -33,6 +33,7 @@
CCL_NAMESPACE_BEGIN
class Progress;
class RenderTile;
/* Device Types */
@@ -67,32 +68,6 @@ public:
}
};
/* Device Task */
class DeviceTask : public Task {
public:
typedef enum { PATH_TRACE, TONEMAP, SHADER } Type;
Type type;
int x, y, w, h;
device_ptr rng_state;
device_ptr rgba;
device_ptr buffer;
int sample;
int resolution;
int offset, stride;
device_ptr shader_input;
device_ptr shader_output;
int shader_eval_type;
int shader_x, shader_w;
DeviceTask(Type type = PATH_TRACE);
void split(list<DeviceTask>& tasks, int num);
void split_max_size(list<DeviceTask>& tasks, int max_size);
};
/* Device */
class Device {
@@ -150,6 +125,10 @@ public:
void server_run();
#endif
/* multi device */
virtual void map_tile(Device *sub_device, RenderTile& tile) {}
virtual int device_number(Device *sub_device) { return 0; }
/* static */
static Device *create(DeviceInfo& info, bool background = true, int threads = 0);

View File

@@ -27,6 +27,8 @@
#include "osl_shader.h"
#include "buffers.h"
#include "util_debug.h"
#include "util_foreach.h"
#include "util_function.h"
@@ -141,28 +143,44 @@ public:
OSLShader::thread_init(kg);
#endif
RenderTile tile;
while(task.acquire_tile(this, tile)) {
float *render_buffer = (float*)tile.buffer;
uint *rng_state = (uint*)tile.rng_state;
int start_sample = tile.start_sample;
int end_sample = tile.start_sample + tile.num_samples;
#ifdef WITH_OPTIMIZED_KERNEL
if(system_cpu_support_optimized()) {
for(int y = task.y; y < task.y + task.h; y++) {
for(int x = task.x; x < task.x + task.w; x++)
kernel_cpu_optimized_path_trace(kg, (float*)task.buffer, (unsigned int*)task.rng_state,
task.sample, x, y, task.offset, task.stride);
if(system_cpu_support_optimized()) {
for(int y = tile.y; y < tile.y + tile.h; y++) {
for(int x = tile.x; x < tile.x + tile.w; x++)
for(int sample = start_sample; sample < end_sample; sample++)
kernel_cpu_optimized_path_trace(kg, render_buffer, rng_state,
sample, x, y, tile.offset, tile.stride);
if(task_pool.cancelled())
break;
if(task_pool.cancelled())
break;
}
}
}
else
else
#endif
{
for(int y = task.y; y < task.y + task.h; y++) {
for(int x = task.x; x < task.x + task.w; x++)
kernel_cpu_path_trace(kg, (float*)task.buffer, (unsigned int*)task.rng_state,
task.sample, x, y, task.offset, task.stride);
{
for(int y = tile.y; y < tile.y + tile.h; y++) {
for(int x = tile.x; x < tile.x + tile.w; x++)
for(int sample = start_sample; sample < end_sample; sample++)
kernel_cpu_path_trace(kg, render_buffer, rng_state,
sample, x, y, tile.offset, tile.stride);
if(task_pool.cancelled())
break;
if(task_pool.cancelled())
break;
}
}
task.release_tile(tile);
if(task_pool.cancelled())
break;
}
#ifdef WITH_OSL
@@ -228,8 +246,7 @@ public:
/* split task into smaller ones, more than number of threads for uneven
* workloads where some parts of the image render slower than others */
list<DeviceTask> tasks;
task.split(tasks, TaskScheduler::num_threads()*10);
task.split(tasks, TaskScheduler::num_threads()+1);
foreach(DeviceTask& task, tasks)
task_pool.push(new CPUDeviceTask(this, task));

View File

@@ -23,6 +23,8 @@
#include "device.h"
#include "device_intern.h"
#include "buffers.h"
#include "util_cuda.h"
#include "util_debug.h"
#include "util_map.h"
@@ -37,6 +39,7 @@ CCL_NAMESPACE_BEGIN
class CUDADevice : public Device
{
public:
TaskPool task_pool;
CUdevice cuDevice;
CUcontext cuContext;
CUmodule cuModule;
@@ -192,6 +195,8 @@ public:
~CUDADevice()
{
task_pool.stop();
cuda_push_context();
cuda_assert(cuCtxDetach(cuContext))
}
@@ -466,13 +471,13 @@ public:
}
}
void path_trace(DeviceTask& task)
void path_trace(RenderTile& rtile, int sample)
{
cuda_push_context();
CUfunction cuPathTrace;
CUdeviceptr d_buffer = cuda_device_ptr(task.buffer);
CUdeviceptr d_rng_state = cuda_device_ptr(task.rng_state);
CUdeviceptr d_buffer = cuda_device_ptr(rtile.buffer);
CUdeviceptr d_rng_state = cuda_device_ptr(rtile.rng_state);
/* get kernel function */
cuda_assert(cuModuleGetFunction(&cuPathTrace, cuModule, "kernel_cuda_path_trace"))
@@ -486,29 +491,28 @@ public:
cuda_assert(cuParamSetv(cuPathTrace, offset, &d_rng_state, sizeof(d_rng_state)))
offset += sizeof(d_rng_state);
int sample = task.sample;
offset = align_up(offset, __alignof(sample));
cuda_assert(cuParamSeti(cuPathTrace, offset, task.sample))
offset += sizeof(task.sample);
cuda_assert(cuParamSeti(cuPathTrace, offset, sample))
offset += sizeof(sample);
cuda_assert(cuParamSeti(cuPathTrace, offset, task.x))
offset += sizeof(task.x);
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.x))
offset += sizeof(rtile.x);
cuda_assert(cuParamSeti(cuPathTrace, offset, task.y))
offset += sizeof(task.y);
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.y))
offset += sizeof(rtile.y);
cuda_assert(cuParamSeti(cuPathTrace, offset, task.w))
offset += sizeof(task.w);
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.w))
offset += sizeof(rtile.w);
cuda_assert(cuParamSeti(cuPathTrace, offset, task.h))
offset += sizeof(task.h);
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.h))
offset += sizeof(rtile.h);
cuda_assert(cuParamSeti(cuPathTrace, offset, task.offset))
offset += sizeof(task.offset);
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.offset))
offset += sizeof(rtile.offset);
cuda_assert(cuParamSeti(cuPathTrace, offset, task.stride))
offset += sizeof(task.stride);
cuda_assert(cuParamSeti(cuPathTrace, offset, rtile.stride))
offset += sizeof(rtile.stride);
cuda_assert(cuParamSetSize(cuPathTrace, offset))
@@ -520,8 +524,8 @@ public:
int xthreads = 8;
int ythreads = 8;
#endif
int xblocks = (task.w + xthreads - 1)/xthreads;
int yblocks = (task.h + ythreads - 1)/ythreads;
int xblocks = (rtile.w + xthreads - 1)/xthreads;
int yblocks = (rtile.h + ythreads - 1)/ythreads;
cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1))
cuda_assert(cuFuncSetBlockShape(cuPathTrace, xthreads, ythreads, 1))
@@ -530,13 +534,13 @@ public:
cuda_pop_context();
}
void tonemap(DeviceTask& task)
void tonemap(DeviceTask& task, device_ptr buffer, device_ptr rgba)
{
cuda_push_context();
CUfunction cuFilmConvert;
CUdeviceptr d_rgba = map_pixels(task.rgba);
CUdeviceptr d_buffer = cuda_device_ptr(task.buffer);
CUdeviceptr d_rgba = map_pixels(rgba);
CUdeviceptr d_buffer = cuda_device_ptr(buffer);
/* get kernel function */
cuda_assert(cuModuleGetFunction(&cuFilmConvert, cuModule, "kernel_cuda_tonemap"))
@@ -820,27 +824,67 @@ public:
Device::draw_pixels(mem, y, w, h, dy, width, height, transparent);
}
void thread_run(DeviceTask *task)
{
if(task->type == DeviceTask::PATH_TRACE) {
RenderTile tile;
/* keep rendering tiles until done */
while(task->acquire_tile(this, tile)) {
int start_sample = tile.start_sample;
int end_sample = tile.start_sample + tile.num_samples;
for(int sample = start_sample; sample < end_sample; sample++)
path_trace(tile, sample);
cuda_push_context();
cuda_assert(cuCtxSynchronize())
cuda_pop_context();
task->release_tile(tile);
}
}
else if(task->type == DeviceTask::SHADER) {
shader(*task);
cuda_push_context();
cuda_assert(cuCtxSynchronize())
cuda_pop_context();
}
}
class CUDADeviceTask : public DeviceTask {
public:
CUDADeviceTask(CUDADevice *device, DeviceTask& task)
: DeviceTask(task)
{
run = function_bind(&CUDADevice::thread_run, device, this);
}
};
void task_add(DeviceTask& task)
{
if(task.type == DeviceTask::TONEMAP)
tonemap(task);
else if(task.type == DeviceTask::PATH_TRACE)
path_trace(task);
else if(task.type == DeviceTask::SHADER)
shader(task);
if(task.type == DeviceTask::TONEMAP) {
/* must be done in main thread due to opengl access */
tonemap(task, task.buffer, task.rgba);
cuda_push_context();
cuda_assert(cuCtxSynchronize())
cuda_pop_context();
}
else {
task_pool.push(new CUDADeviceTask(this, task));
}
}
void task_wait()
{
cuda_push_context();
cuda_assert(cuCtxSynchronize())
cuda_pop_context();
task_pool.wait_work();
}
void task_cancel()
{
task_pool.cancel();
}
};

View File

@@ -23,6 +23,8 @@
#include "device_intern.h"
#include "device_network.h"
#include "buffers.h"
#include "util_foreach.h"
#include "util_list.h"
#include "util_map.h"
@@ -255,6 +257,30 @@ public:
rgba.device_pointer = tmp;
}
void map_tile(Device *sub_device, RenderTile& tile)
{
foreach(SubDevice& sub, devices) {
if(sub.device == sub_device) {
if(tile.buffer) tile.buffer = sub.ptr_map[tile.buffer];
if(tile.rng_state) tile.rng_state = sub.ptr_map[tile.rng_state];
if(tile.rgba) tile.rgba = sub.ptr_map[tile.rgba];
}
}
}
int device_number(Device *sub_device)
{
int i = 0;
foreach(SubDevice& sub, devices) {
if(sub.device == sub_device)
return i;
i++;
}
return -1;
}
void task_add(DeviceTask& task)
{
list<DeviceTask> tasks;
@@ -266,7 +292,6 @@ public:
tasks.pop_front();
if(task.buffer) subtask.buffer = sub.ptr_map[task.buffer];
if(task.rng_state) subtask.rng_state = sub.ptr_map[task.rng_state];
if(task.rgba) subtask.rgba = sub.ptr_map[task.rgba];
if(task.shader_input) subtask.shader_input = sub.ptr_map[task.shader_input];
if(task.shader_output) subtask.shader_output = sub.ptr_map[task.shader_output];

View File

@@ -25,6 +25,8 @@
#include "device.h"
#include "device_intern.h"
#include "buffers.h"
#include "util_foreach.h"
#include "util_map.h"
#include "util_math.h"
@@ -41,6 +43,7 @@ CCL_NAMESPACE_BEGIN
class OpenCLDevice : public Device
{
public:
TaskPool task_pool;
cl_context cxContext;
cl_command_queue cqCommandQueue;
cl_platform_id cpPlatform;
@@ -438,6 +441,8 @@ public:
~OpenCLDevice()
{
task_pool.stop();
if(null_mem)
clReleaseMemObject(CL_MEM_PTR(null_mem));
@@ -543,19 +548,19 @@ public:
return global_size + ((r == 0)? 0: group_size - r);
}
void path_trace(DeviceTask& task)
void path_trace(RenderTile& rtile, int sample)
{
/* cast arguments to cl types */
cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
cl_mem d_buffer = CL_MEM_PTR(task.buffer);
cl_mem d_rng_state = CL_MEM_PTR(task.rng_state);
cl_int d_x = task.x;
cl_int d_y = task.y;
cl_int d_w = task.w;
cl_int d_h = task.h;
cl_int d_sample = task.sample;
cl_int d_offset = task.offset;
cl_int d_stride = task.stride;
cl_mem d_buffer = CL_MEM_PTR(rtile.buffer);
cl_mem d_rng_state = CL_MEM_PTR(rtile.rng_state);
cl_int d_x = rtile.x;
cl_int d_y = rtile.y;
cl_int d_w = rtile.w;
cl_int d_h = rtile.h;
cl_int d_sample = sample;
cl_int d_offset = rtile.offset;
cl_int d_stride = rtile.stride;
/* sample arguments */
int narg = 0;
@@ -616,12 +621,12 @@ public:
return err;
}
void tonemap(DeviceTask& task)
void tonemap(DeviceTask& task, device_ptr buffer, device_ptr rgba)
{
/* cast arguments to cl types */
cl_mem d_data = CL_MEM_PTR(const_mem_map["__data"]->device_pointer);
cl_mem d_rgba = CL_MEM_PTR(task.rgba);
cl_mem d_buffer = CL_MEM_PTR(task.buffer);
cl_mem d_rgba = CL_MEM_PTR(rgba);
cl_mem d_buffer = CL_MEM_PTR(buffer);
cl_int d_x = task.x;
cl_int d_y = task.y;
cl_int d_w = task.w;
@@ -670,30 +675,49 @@ public:
opencl_assert(clFinish(cqCommandQueue));
}
void task_add(DeviceTask& maintask)
void thread_run(DeviceTask *task)
{
list<DeviceTask> tasks;
/* arbitrary limit to work around apple ATI opencl issue */
if(platform_name == "Apple")
maintask.split_max_size(tasks, 76800);
else
tasks.push_back(maintask);
foreach(DeviceTask& task, tasks) {
if(task.type == DeviceTask::TONEMAP)
tonemap(task);
else if(task.type == DeviceTask::PATH_TRACE)
path_trace(task);
if(task->type == DeviceTask::TONEMAP) {
tonemap(*task, task->buffer, task->rgba);
}
else if(task->type == DeviceTask::PATH_TRACE) {
RenderTile tile;
/* keep rendering tiles until done */
while(task->acquire_tile(this, tile)) {
int start_sample = tile.start_sample;
int end_sample = tile.start_sample + tile.num_samples;
for(int sample = start_sample; sample < end_sample; sample++)
path_trace(tile, sample);
task->release_tile(tile);
}
}
}
class OpenCLDeviceTask : public DeviceTask {
public:
OpenCLDeviceTask(OpenCLDevice *device, DeviceTask& task)
: DeviceTask(task)
{
run = function_bind(&OpenCLDevice::thread_run, device, this);
}
};
void task_add(DeviceTask& task)
{
task_pool.push(new OpenCLDeviceTask(this, task));
}
void task_wait()
{
task_pool.wait_work();
}
void task_cancel()
{
task_pool.cancel();
}
};

View File

@@ -74,6 +74,29 @@ int BufferParams::get_passes_size()
return align_up(size, 4);
}
/* Render Buffer Task */
RenderTile::RenderTile()
{
x = 0;
y = 0;
w = 0;
h = 0;
start_sample = 0;
num_samples = 0;
resolution = 0;
offset = 0;
stride = 0;
buffer = 0;
rng_state = 0;
rgba = 0;
buffers = NULL;
}
/* Render Buffers */
RenderBuffers::RenderBuffers(Device *device_)

View File

@@ -67,12 +67,11 @@ class RenderBuffers {
public:
/* buffer parameters */
BufferParams params;
/* float buffer */
device_vector<float> buffer;
/* random number generator state */
device_vector<uint> rng_state;
/* mutex, must be locked manually by callers */
thread_mutex mutex;
RenderBuffers(Device *device);
~RenderBuffers();
@@ -105,8 +104,6 @@ public:
bool transparent;
/* byte buffer for tonemapped result */
device_vector<uchar4> rgba;
/* mutex, must be locked manually by callers */
thread_mutex mutex;
DisplayBuffer(Device *device);
~DisplayBuffer();
@@ -124,6 +121,27 @@ protected:
Device *device;
};
/* Render Tile
* Rendering task on a buffer */
class RenderTile {
public:
int x, y, w, h;
int start_sample;
int num_samples;
int resolution;
int offset;
int stride;
device_ptr buffer;
device_ptr rng_state;
device_ptr rgba;
RenderBuffers *buffers;
RenderTile();
};
CCL_NAMESPACE_END
#endif /* __BUFFERS_H__ */

View File

@@ -27,6 +27,7 @@
#include "util_foreach.h"
#include "util_function.h"
#include "util_math.h"
#include "util_opengl.h"
#include "util_task.h"
#include "util_time.h"
@@ -35,15 +36,23 @@ CCL_NAMESPACE_BEGIN
Session::Session(const SessionParams& params_)
: params(params_),
tile_manager(params.progressive, params.samples, params.tile_size, params.min_size)
tile_manager(params.progressive, params.samples, params.tile_size, params.min_size,
(params.background)? 1: max(params.device.multi_devices.size(), 1))
{
device_use_gl = ((params.device.type != DEVICE_CPU) && !params.background);
TaskScheduler::init(params.threads);
device = Device::create(params.device, params.background, params.threads);
buffers = new RenderBuffers(device);
display = new DisplayBuffer(device);
if(params.background) {
buffers = NULL;
display = NULL;
}
else {
buffers = new RenderBuffers(device);
display = new DisplayBuffer(device);
}
session_thread = NULL;
scene = NULL;
@@ -81,7 +90,7 @@ Session::~Session()
wait();
}
if(params.output_path != "") {
if(display && params.output_path != "") {
tonemap();
progress.set_status("Writing Image", params.output_path);
@@ -118,8 +127,8 @@ void Session::reset_gpu(BufferParams& buffer_params, int samples)
/* block for buffer acces and reset immediately. we can't do this
* in the thread, because we need to allocate an OpenGL buffer, and
* that only works in the main thread */
thread_scoped_lock display_lock(display->mutex);
thread_scoped_lock buffers_lock(buffers->mutex);
thread_scoped_lock display_lock(display_mutex);
thread_scoped_lock buffers_lock(buffers_mutex);
display_outdated = true;
reset_time = time_dt();
@@ -135,7 +144,7 @@ void Session::reset_gpu(BufferParams& buffer_params, int samples)
bool Session::draw_gpu(BufferParams& buffer_params)
{
/* block for buffer access */
thread_scoped_lock display_lock(display->mutex);
thread_scoped_lock display_lock(display_mutex);
/* first check we already rendered something */
if(gpu_draw_ready) {
@@ -145,7 +154,7 @@ bool Session::draw_gpu(BufferParams& buffer_params)
/* for CUDA we need to do tonemapping still, since we can
* only access GL buffers from the main thread */
if(gpu_need_tonemap) {
thread_scoped_lock buffers_lock(buffers->mutex);
thread_scoped_lock buffers_lock(buffers_mutex);
tonemap();
gpu_need_tonemap = false;
gpu_need_tonemap_cond.notify_all();
@@ -226,23 +235,18 @@ void Session::run_gpu()
/* buffers mutex is locked entirely while rendering each
* sample, and released/reacquired on each iteration to allow
* reset and draw in between */
thread_scoped_lock buffers_lock(buffers->mutex);
thread_scoped_lock buffers_lock(buffers_mutex);
/* update status and timing */
update_status_time();
/* path trace */
foreach(Tile& tile, tile_manager.state.tiles) {
path_trace(tile);
path_trace();
device->task_wait();
device->task_wait();
if(device->error_message() != "")
progress.set_cancel(device->error_message());
if(progress.get_cancel())
break;
}
if(device->error_message() != "")
progress.set_cancel(device->error_message());
/* update status and timing */
update_status_time();
@@ -289,7 +293,7 @@ void Session::reset_cpu(BufferParams& buffer_params, int samples)
bool Session::draw_cpu(BufferParams& buffer_params)
{
thread_scoped_lock display_lock(display->mutex);
thread_scoped_lock display_lock(display_mutex);
/* first check we already rendered something */
if(display->draw_ready()) {
@@ -308,13 +312,88 @@ bool Session::draw_cpu(BufferParams& buffer_params)
return false;
}
bool Session::acquire_tile(Device *tile_device, RenderTile& rtile)
{
if(progress.get_cancel())
return false;
thread_scoped_lock tile_lock(tile_mutex);
/* get next tile from manager */
Tile tile;
int device_num = device->device_number(tile_device);
if(!tile_manager.next_tile(tile, device_num))
return false;
/* fill render tile */
rtile.x = tile_manager.state.buffer.full_x + tile.x;
rtile.y = tile_manager.state.buffer.full_y + tile.y;
rtile.w = tile.w;
rtile.h = tile.h;
rtile.start_sample = tile_manager.state.sample;
rtile.num_samples = tile_manager.state.num_samples;
rtile.resolution = tile_manager.state.resolution;
tile_lock.unlock();
/* in case of a permant buffer, return it, otherwise we will allocate
* a new temporary buffer */
if(!write_render_buffers_cb) {
tile_manager.state.buffer.get_offset_stride(rtile.offset, rtile.stride);
rtile.buffer = buffers->buffer.device_pointer;
rtile.rng_state = buffers->rng_state.device_pointer;
rtile.rgba = display->rgba.device_pointer;
rtile.buffers = buffers;
device->map_tile(tile_device, rtile);
return true;
}
/* fill buffer parameters */
BufferParams buffer_params = tile_manager.params;
buffer_params.full_x = rtile.x;
buffer_params.full_y = rtile.y;
buffer_params.width = rtile.w;
buffer_params.height = rtile.h;
buffer_params.get_offset_stride(rtile.offset, rtile.stride);
/* allocate buffers */
RenderBuffers *tilebuffers = new RenderBuffers(tile_device);
tilebuffers->reset(tile_device, buffer_params);
rtile.buffer = tilebuffers->buffer.device_pointer;
rtile.rng_state = tilebuffers->rng_state.device_pointer;
rtile.rgba = 0;
rtile.buffers = tilebuffers;
return true;
}
void Session::release_tile(RenderTile& rtile)
{
thread_scoped_lock tile_lock(tile_mutex);
if(write_render_buffers_cb) {
/* todo: optimize this by making it thread safe and removing lock */
if(!progress.get_cancel())
write_render_buffers_cb(rtile.buffers);
delete rtile.buffers;
}
update_status_time();
}
void Session::run_cpu()
{
{
/* reset once to start */
thread_scoped_lock reset_lock(delayed_reset.mutex);
thread_scoped_lock buffers_lock(buffers->mutex);
thread_scoped_lock display_lock(display->mutex);
thread_scoped_lock buffers_lock(buffers_mutex);
thread_scoped_lock display_lock(display_mutex);
reset_(delayed_reset.params, delayed_reset.samples);
delayed_reset.do_reset = false;
@@ -364,7 +443,7 @@ void Session::run_cpu()
/* buffers mutex is locked entirely while rendering each
* sample, and released/reacquired on each iteration to allow
* reset and draw in between */
thread_scoped_lock buffers_lock(buffers->mutex);
thread_scoped_lock buffers_lock(buffers_mutex);
/* update scene */
update_scene();
@@ -379,8 +458,7 @@ void Session::run_cpu()
update_status_time();
/* path trace */
foreach(Tile& tile, tile_manager.state.tiles)
path_trace(tile);
path_trace();
/* update status and timing */
update_status_time();
@@ -396,8 +474,8 @@ void Session::run_cpu()
{
thread_scoped_lock reset_lock(delayed_reset.mutex);
thread_scoped_lock buffers_lock(buffers->mutex);
thread_scoped_lock display_lock(display->mutex);
thread_scoped_lock buffers_lock(buffers_mutex);
thread_scoped_lock display_lock(display_mutex);
if(delayed_reset.do_reset) {
/* reset rendering if request from main thread */
@@ -465,10 +543,12 @@ bool Session::draw(BufferParams& buffer_params)
void Session::reset_(BufferParams& buffer_params, int samples)
{
if(buffer_params.modified(buffers->params)) {
gpu_draw_ready = false;
buffers->reset(device, buffer_params);
display->reset(device, buffer_params);
if(buffers) {
if(buffer_params.modified(buffers->params)) {
gpu_draw_ready = false;
buffers->reset(device, buffer_params);
display->reset(device, buffer_params);
}
}
tile_manager.reset(buffer_params, samples);
@@ -532,8 +612,6 @@ void Session::update_scene()
{
thread_scoped_lock scene_lock(scene->mutex);
progress.set_status("Updating Scene");
/* update camera if dimensions changed for progressive render. the camera
* knows nothing about progressive or cropped rendering, it just gets the
* image dimensions passed in */
@@ -548,20 +626,25 @@ void Session::update_scene()
}
/* update scene */
if(scene->need_update())
if(scene->need_update()) {
progress.set_status("Updating Scene");
scene->device_update(device, progress);
}
}
void Session::update_status_time(bool show_pause, bool show_done)
{
int sample = tile_manager.state.sample;
int num_samples = tile_manager.state.num_samples;
int resolution = tile_manager.state.resolution;
int num_tiles = tile_manager.state.num_tiles;
int tile = num_tiles - tile_manager.state.tiles.size();
/* update status */
string status, substatus;
if(!params.progressive)
substatus = "Path Tracing";
substatus = string_printf("Path Tracing Tile %d/%d", tile, num_tiles);
else if(params.samples == INT_MAX)
substatus = string_printf("Path Tracing Sample %d", sample+1);
else
@@ -585,23 +668,16 @@ void Session::update_status_time(bool show_pause, bool show_done)
/* negative can happen when we pause a bit before rendering, can discard that */
if(preview_time < 0.0) preview_time = 0.0;
progress.set_sample(sample + 1, sample_time);
progress.set_sample(sample + num_samples, sample_time);
}
void Session::path_trace(Tile& tile)
void Session::path_trace()
{
/* add path trace task */
DeviceTask task(DeviceTask::PATH_TRACE);
task.x = tile_manager.state.buffer.full_x + tile.x;
task.y = tile_manager.state.buffer.full_y + tile.y;
task.w = tile.w;
task.h = tile.h;
task.buffer = buffers->buffer.device_pointer;
task.rng_state = buffers->rng_state.device_pointer;
task.sample = tile_manager.state.sample;
task.resolution = tile_manager.state.resolution;
tile_manager.state.buffer.get_offset_stride(task.offset, task.stride);
task.acquire_tile = function_bind(&Session::acquire_tile, this, _1, _2);
task.release_tile = function_bind(&Session::release_tile, this, _1);
device->task_add(task);
}

View File

@@ -47,7 +47,7 @@ public:
bool progressive;
bool experimental;
int samples;
int tile_size;
int2 tile_size;
int min_size;
int threads;
@@ -63,7 +63,7 @@ public:
progressive = false;
experimental = false;
samples = INT_MAX;
tile_size = 64;
tile_size = make_int2(64, 64);
min_size = 64;
threads = 0;
@@ -102,8 +102,11 @@ public:
DisplayBuffer *display;
Progress progress;
SessionParams params;
TileManager tile_manager;
int sample;
boost::function<void(RenderBuffers*)> write_render_buffers_cb;
Session(const SessionParams& params);
~Session();
@@ -130,7 +133,7 @@ protected:
void update_status_time(bool show_pause = false, bool show_done = false);
void tonemap();
void path_trace(Tile& tile);
void path_trace();
void reset_(BufferParams& params, int samples);
void run_cpu();
@@ -141,7 +144,9 @@ protected:
bool draw_gpu(BufferParams& params);
void reset_gpu(BufferParams& params, int samples);
TileManager tile_manager;
bool acquire_tile(Device *tile_device, RenderTile& tile);
void release_tile(RenderTile& tile);
bool device_use_gl;
thread *session_thread;
@@ -155,6 +160,9 @@ protected:
bool pause;
thread_condition_variable pause_cond;
thread_mutex pause_mutex;
thread_mutex tile_mutex;
thread_mutex buffers_mutex;
thread_mutex display_mutex;
bool kernels_loaded;

View File

@@ -22,11 +22,12 @@
CCL_NAMESPACE_BEGIN
TileManager::TileManager(bool progressive_, int samples_, int tile_size_, int min_size_)
TileManager::TileManager(bool progressive_, int num_samples_, int2 tile_size_, int min_size_, int num_devices_)
{
progressive = progressive_;
tile_size = tile_size_;
min_size = min_size_;
num_devices = num_devices_;
BufferParams buffer_params;
reset(buffer_params, 0);
@@ -36,7 +37,7 @@ TileManager::~TileManager()
{
}
void TileManager::reset(BufferParams& params_, int samples_)
void TileManager::reset(BufferParams& params_, int num_samples_)
{
params = params_;
@@ -53,17 +54,19 @@ void TileManager::reset(BufferParams& params_, int samples_)
}
}
samples = samples_;
num_samples = num_samples_;
state.buffer = BufferParams();
state.sample = -1;
state.num_tiles = 0;
state.num_samples = 0;
state.resolution = start_resolution;
state.tiles.clear();
}
void TileManager::set_samples(int samples_)
void TileManager::set_samples(int num_samples_)
{
samples = samples_;
num_samples = num_samples_;
}
void TileManager::set_tiles()
@@ -71,24 +74,34 @@ void TileManager::set_tiles()
int resolution = state.resolution;
int image_w = max(1, params.width/resolution);
int image_h = max(1, params.height/resolution);
int tile_w = (tile_size >= image_w)? 1: (image_w + tile_size - 1)/tile_size;
int tile_h = (tile_size >= image_h)? 1: (image_h + tile_size - 1)/tile_size;
int sub_w = image_w/tile_w;
int sub_h = image_h/tile_h;
state.tiles.clear();
for(int tile_y = 0; tile_y < tile_h; tile_y++) {
for(int tile_x = 0; tile_x < tile_w; tile_x++) {
int x = tile_x * sub_w;
int y = tile_y * sub_h;
int w = (tile_x == tile_w-1)? image_w - x: sub_w;
int h = (tile_y == tile_h-1)? image_h - y: sub_h;
int num = min(image_h, num_devices);
state.tiles.push_back(Tile(x, y, w, h));
for(int device = 0; device < num; device++) {
int device_y = (image_h/num)*device;
int device_h = (device == num-1)? image_h - device*(image_h/num): image_h/num;
int tile_w = (tile_size.x >= image_w)? 1: (image_w + tile_size.x - 1)/tile_size.x;
int tile_h = (tile_size.y >= device_h)? 1: (device_h + tile_size.y - 1)/tile_size.y;
int sub_w = (image_w + tile_w - 1)/tile_w;
int sub_h = (device_h + tile_h - 1)/tile_h;
for(int tile_y = 0; tile_y < tile_h; tile_y++) {
for(int tile_x = 0; tile_x < tile_w; tile_x++) {
int x = tile_x * sub_w;
int y = tile_y * sub_h;
int w = (tile_x == tile_w-1)? image_w - x: sub_w;
int h = (tile_y == tile_h-1)? device_h - y: sub_h;
state.tiles.push_back(Tile(x, y + device_y, w, h, device));
}
}
}
state.num_tiles = state.tiles.size();
state.buffer.width = image_w;
state.buffer.height = image_h;
@@ -98,9 +111,24 @@ void TileManager::set_tiles()
state.buffer.full_height = max(1, params.full_height/resolution);
}
bool TileManager::next_tile(Tile& tile, int device)
{
list<Tile>::iterator iter;
for(iter = state.tiles.begin(); iter != state.tiles.end(); iter++) {
if(iter->device == device) {
tile = *iter;
state.tiles.erase(iter);
return true;
}
}
return false;
}
bool TileManager::done()
{
return (state.sample+1 >= samples && state.resolution == 1);
return (state.sample+state.num_samples >= num_samples && state.resolution == 1);
}
bool TileManager::next()
@@ -111,10 +139,17 @@ bool TileManager::next()
if(progressive && state.resolution > 1) {
state.sample = 0;
state.resolution /= 2;
state.num_samples = 1;
set_tiles();
}
else {
state.sample++;
if(progressive)
state.num_samples = 1;
else
state.num_samples = num_samples;
state.resolution = 1;
set_tiles();
}

View File

@@ -31,9 +31,13 @@ CCL_NAMESPACE_BEGIN
class Tile {
public:
int x, y, w, h;
int device;
Tile(int x_, int y_, int w_, int h_)
: x(x_), y(y_), w(w_), h(h_) {}
Tile()
{}
Tile(int x_, int y_, int w_, int h_, int device_)
: x(x_), y(y_), w(w_), h(h_), device(device_) {}
};
/* Tile Manager */
@@ -45,25 +49,29 @@ public:
struct State {
BufferParams buffer;
int sample;
int num_samples;
int resolution;
int num_tiles;
list<Tile> tiles;
} state;
TileManager(bool progressive, int samples, int tile_size, int min_size);
TileManager(bool progressive, int num_samples, int2 tile_size, int min_size, int num_devices = 1);
~TileManager();
void reset(BufferParams& params, int samples);
void set_samples(int samples);
void reset(BufferParams& params, int num_samples);
void set_samples(int num_samples);
bool next();
bool next_tile(Tile& tile, int device = 0);
bool done();
protected:
void set_tiles();
bool progressive;
int samples;
int tile_size;
int num_samples;
int2 tile_size;
int min_size;
int num_devices;
int start_resolution;
};

View File

@@ -277,6 +277,11 @@ __device_inline float cross(const float2 a, const float2 b)
#ifndef __KERNEL_OPENCL__
__device_inline bool operator==(const int2 a, const int2 b)
{
return (a.x == b.x && a.y == b.y);
}
__device_inline float len(const float2 a)
{
return sqrtf(dot(a, a));

View File

@@ -2538,7 +2538,11 @@ static ImBuf *image_get_render_result(Image *ima, ImageUser *iuser, void **lock_
/* free rect buffer if float buffer changes, so it can be recreated with
* the updated result, and also in case we got byte buffer from sequencer,
* so we don't keep reference to freed buffer */
if (ibuf->rect_float != rectf || rect || !rectf)
/* todo: this fix breaks save buffers render progress
if (ibuf->rect_float != rectf || rect || !rectf) */
if (ibuf->rect_float != rectf || rect)
imb_freerectImBuf(ibuf);
if (rect)

View File

@@ -317,6 +317,7 @@ static void rna_def_render_engine(BlenderRNA *brna)
RNA_def_property_flag(prop, PROP_REQUIRED);
prop = RNA_def_int(func, "h", 0, 0, INT_MAX, "Height", "", 0, INT_MAX);
RNA_def_property_flag(prop, PROP_REQUIRED);
prop = RNA_def_string(func, "layer", "", 0, "Layer", "Single layer to get render result for");
prop = RNA_def_pointer(func, "result", "RenderResult", "Result", "");
RNA_def_function_return(func, prop);
@@ -326,6 +327,7 @@ static void rna_def_render_engine(BlenderRNA *brna)
func = RNA_def_function(srna, "end_result", "RE_engine_end_result");
prop = RNA_def_pointer(func, "result", "RenderResult", "Result", "");
prop = RNA_def_boolean(func, "cancel", 0, "Cancel", "Don't merge back results");
RNA_def_property_flag(prop, PROP_REQUIRED);
func = RNA_def_function(srna, "test_break", "RE_engine_test_break");
@@ -360,6 +362,11 @@ static void rna_def_render_engine(BlenderRNA *brna)
RNA_def_property_pointer_sdna(prop, NULL, "camera_override");
RNA_def_property_struct_type(prop, "Object");
prop = RNA_def_property(srna, "tile_x", PROP_INT, PROP_UNSIGNED);
RNA_def_property_int_sdna(prop, NULL, "tile_x");
prop = RNA_def_property(srna, "tile_y", PROP_INT, PROP_UNSIGNED);
RNA_def_property_int_sdna(prop, NULL, "tile_y");
/* registration */
prop = RNA_def_property(srna, "bl_idname", PROP_STRING, PROP_NONE);

View File

@@ -86,6 +86,9 @@ typedef struct RenderEngine {
int flag;
struct Object *camera_override;
int tile_x;
int tile_y;
struct Render *re;
ListBase fullresult;
char *text;
@@ -97,9 +100,9 @@ void RE_engine_free(RenderEngine *engine);
void RE_layer_load_from_file(struct RenderLayer *layer, struct ReportList *reports, const char *filename, int x, int y);
void RE_result_load_from_file(struct RenderResult *result, struct ReportList *reports, const char *filename);
struct RenderResult *RE_engine_begin_result(RenderEngine *engine, int x, int y, int w, int h);
struct RenderResult *RE_engine_begin_result(RenderEngine *engine, int x, int y, int w, int h, const char *layername);
void RE_engine_update_result(RenderEngine *engine, struct RenderResult *result);
void RE_engine_end_result(RenderEngine *engine, struct RenderResult *result);
void RE_engine_end_result(RenderEngine *engine, struct RenderResult *result, int cancel);
int RE_engine_test_break(RenderEngine *engine);
void RE_engine_update_stats(RenderEngine *engine, const char *stats, const char *info);

View File

@@ -92,6 +92,9 @@ typedef struct RenderLayer {
float *acolrect; /* 4 float, optional transparent buffer, needs storage for display updates */
float *scolrect; /* 4 float, optional strand buffer, needs storage for display updates */
int rectx, recty;
/* optional saved endresult on disk */
void *exrhandle;
ListBase passes;
@@ -124,7 +127,7 @@ typedef struct RenderResult {
volatile RenderLayer *renlay;
/* optional saved endresult on disk */
void *exrhandle;
int do_exr_tile;
/* for render results in Image, verify validity for sequences */
int framenr;

View File

@@ -40,7 +40,7 @@ struct Object;
void free_sample_tables(Render *re);
void make_sample_tables(Render *re);
void initparts(Render *re);
void initparts(Render *re, int do_crop);
void freeparts(Render *re);

View File

@@ -37,6 +37,8 @@
#define RR_USE_MEM 0
#define RR_USE_EXR 1
#define RR_ALL_LAYERS NULL
struct ImBuf;
struct ListBase;
struct Render;
@@ -49,7 +51,7 @@ struct rcti;
/* New */
struct RenderResult *render_result_new(struct Render *re,
struct rcti *partrct, int crop, int savebuffers);
struct rcti *partrct, int crop, int savebuffers, const char *layername);
struct RenderResult *render_result_new_full_sample(struct Render *re,
struct ListBase *lb, struct rcti *partrct, int crop, int savebuffers);
@@ -76,9 +78,9 @@ void render_result_exr_file_end(struct Render *re);
void render_result_exr_file_merge(struct RenderResult *rr, struct RenderResult *rrpart);
void render_result_exr_file_path(struct Scene *scene, int sample, char *filepath);
void render_result_exr_file_path(struct Scene *scene, const char *layname, int sample, char *filepath);
int render_result_exr_file_read(struct Render *re, int sample);
int render_result_exr_file_read_path(struct RenderResult *rr, const char *filepath);
int render_result_exr_file_read_path(struct RenderResult *rr, struct RenderLayer *rl_single, const char *filepath);
/* Combined Pixel Rect */

View File

@@ -55,6 +55,7 @@
#include "RE_engine.h"
#include "RE_pipeline.h"
#include "initrender.h"
#include "render_types.h"
#include "render_result.h"
@@ -149,7 +150,7 @@ void RE_engine_free(RenderEngine *engine)
/* Render Results */
RenderResult *RE_engine_begin_result(RenderEngine *engine, int x, int y, int w, int h)
RenderResult *RE_engine_begin_result(RenderEngine *engine, int x, int y, int w, int h, const char *layername)
{
Render *re = engine->re;
RenderResult *result;
@@ -172,7 +173,9 @@ RenderResult *RE_engine_begin_result(RenderEngine *engine, int x, int y, int w,
disprect.ymin = y;
disprect.ymax = y + h;
result = render_result_new(re, &disprect, 0, RR_USE_MEM);
result = render_result_new(re, &disprect, 0, RR_USE_MEM, layername);
/* todo: make this thread safe */
BLI_addtail(&engine->fullresult, result);
result->tilerect.xmin += re->disprect.xmin;
@@ -193,21 +196,36 @@ void RE_engine_update_result(RenderEngine *engine, RenderResult *result)
}
}
void RE_engine_end_result(RenderEngine *engine, RenderResult *result)
void RE_engine_end_result(RenderEngine *engine, RenderResult *result, int cancel)
{
Render *re = engine->re;
RenderPart *pa;
if (!result)
return;
/* merge. on break, don't merge in result for preview renders, looks nicer */
if (!(re->test_break(re->tbh) && (re->r.scemode & R_PREVIEWBUTS)))
render_result_merge(re->result, result);
if (!cancel) {
/* for exr tile render, detect tiles that are done */
for (pa = re->parts.first; pa; pa = pa->next) {
if (result->tilerect.xmin == pa->disprect.xmin &&
result->tilerect.ymin == pa->disprect.ymin &&
result->tilerect.xmax == pa->disprect.xmax &&
result->tilerect.ymax == pa->disprect.ymax) {
pa->ready = 1;
}
}
/* draw */
if (!re->test_break(re->tbh)) {
result->renlay = result->layers.first; // weak, draws first layer always
re->display_draw(re->ddh, result, NULL);
if (re->result->do_exr_tile)
render_result_exr_file_merge(re->result, result);
else if (!(re->test_break(re->tbh) && (re->r.scemode & R_PREVIEWBUTS)))
render_result_merge(re->result, result);
/* draw */
if (!re->test_break(re->tbh)) {
result->renlay = result->layers.first; // weak, draws first layer always
re->display_draw(re->ddh, result, NULL);
}
}
/* free */
@@ -290,12 +308,16 @@ int RE_engine_render(Render *re, int do_all)
/* create render result */
BLI_rw_mutex_lock(&re->resultmutex, THREAD_LOCK_WRITE);
if (re->result == NULL || !(re->r.scemode & R_PREVIEWBUTS)) {
int savebuffers;
if (re->result)
render_result_free(re->result);
re->result = render_result_new(re, &re->disprect, 0, 0);
savebuffers = (re->r.scemode & R_EXR_TILE_FILE) ? RR_USE_EXR : RR_USE_MEM;
re->result = render_result_new(re, &re->disprect, 0, savebuffers, RR_ALL_LAYERS);
}
BLI_rw_mutex_unlock(&re->resultmutex);
if (re->result == NULL)
return 1;
@@ -317,11 +339,29 @@ int RE_engine_render(Render *re, int do_all)
if ((re->r.scemode & (R_NO_FRAME_UPDATE | R_PREVIEWBUTS)) == 0)
BKE_scene_update_for_newframe(re->main, re->scene, re->lay);
initparts(re, FALSE);
engine->tile_x = re->partx;
engine->tile_y = re->party;
if (re->result->do_exr_tile)
render_result_exr_file_begin(re);
if (type->update)
type->update(engine, re->main, re->scene);
if (type->render)
type->render(engine, re->scene);
if (re->result->do_exr_tile) {
BLI_rw_mutex_lock(&re->resultmutex, THREAD_LOCK_WRITE);
render_result_exr_file_end(re);
BLI_rw_mutex_unlock(&re->resultmutex);
}
engine->tile_x = 0;
engine->tile_y = 0;
freeparts(re);
render_result_free_list(&engine->fullresult, engine->fullresult.first);
RE_engine_free(engine);

View File

@@ -539,7 +539,7 @@ void freeparts(Render *re)
BLI_freelistN(&re->parts);
}
void initparts(Render *re)
void initparts(Render *re, int do_crop)
{
int nr, xd, yd, partx, party, xparts, yparts;
int xminb, xmaxb, yminb, ymaxb;
@@ -620,7 +620,7 @@ void initparts(Render *re)
RenderPart *pa = MEM_callocN(sizeof(RenderPart), "new part");
/* Non-box filters need 2 pixels extra to work */
if ((re->r.filtertype || (re->r.mode & R_EDGE))) {
if (do_crop && (re->r.filtertype || (re->r.mode & R_EDGE))) {
pa->crop = 2;
disprect.xmin -= pa->crop;
disprect.ymin -= pa->crop;

View File

@@ -652,7 +652,7 @@ static void *do_part_thread(void *pa_v)
if (!R.sss_points && (R.r.scemode & R_FULL_SAMPLE))
pa->result = render_result_new_full_sample(&R, &pa->fullresult, &pa->disprect, pa->crop, RR_USE_MEM);
else
pa->result = render_result_new(&R, &pa->disprect, pa->crop, RR_USE_MEM);
pa->result = render_result_new(&R, &pa->disprect, pa->crop, RR_USE_MEM, RR_ALL_LAYERS);
if (R.sss_points)
zbufshade_sss_tile(pa);
@@ -662,7 +662,7 @@ static void *do_part_thread(void *pa_v)
zbufshade_tile(pa);
/* merge too on break! */
if (R.result->exrhandle) {
if (R.result->do_exr_tile) {
render_result_exr_file_merge(R.result, pa->result);
}
else if (render_display_draw_enabled(&R)) {
@@ -806,12 +806,12 @@ static void threaded_tile_processor(Render *re)
render_result_free(re->result);
if (re->sss_points && render_display_draw_enabled(re))
re->result = render_result_new(re, &re->disprect, 0, RR_USE_MEM);
re->result = render_result_new(re, &re->disprect, 0, RR_USE_MEM, RR_ALL_LAYERS);
else if (re->r.scemode & R_FULL_SAMPLE)
re->result = render_result_new_full_sample(re, &re->fullresult, &re->disprect, 0, RR_USE_EXR);
else
re->result = render_result_new(re, &re->disprect, 0,
(re->r.scemode & R_EXR_TILE_FILE) ? RR_USE_EXR : RR_USE_MEM);
(re->r.scemode & R_EXR_TILE_FILE) ? RR_USE_EXR : RR_USE_MEM, RR_ALL_LAYERS);
}
BLI_rw_mutex_unlock(&re->resultmutex);
@@ -821,9 +821,9 @@ static void threaded_tile_processor(Render *re)
/* warning; no return here without closing exr file */
initparts(re);
initparts(re, TRUE);
if (re->result->exrhandle)
if (re->result->do_exr_tile)
render_result_exr_file_begin(re);
BLI_init_threads(&threads, do_part_thread, re->r.threads);
@@ -903,7 +903,7 @@ static void threaded_tile_processor(Render *re)
}
if (re->result->exrhandle) {
if (re->result->do_exr_tile) {
BLI_rw_mutex_lock(&re->resultmutex, THREAD_LOCK_WRITE);
render_result_exr_file_end(re);
BLI_rw_mutex_unlock(&re->resultmutex);
@@ -1056,7 +1056,7 @@ static void do_render_blur_3d(Render *re)
int blur = re->r.mblur_samples;
/* create accumulation render result */
rres = render_result_new(re, &re->disprect, 0, RR_USE_MEM);
rres = render_result_new(re, &re->disprect, 0, RR_USE_MEM, RR_ALL_LAYERS);
/* do the blur steps */
while (blur--) {
@@ -1181,7 +1181,7 @@ static void do_render_fields_3d(Render *re)
re->disprect.ymax *= 2;
BLI_rw_mutex_lock(&re->resultmutex, THREAD_LOCK_WRITE);
re->result = render_result_new(re, &re->disprect, 0, RR_USE_MEM);
re->result = render_result_new(re, &re->disprect, 0, RR_USE_MEM, RR_ALL_LAYERS);
if (rr2) {
if (re->r.mode & R_ODDFIELD)
@@ -1244,7 +1244,7 @@ static void do_render_fields_blur_3d(Render *re)
re->rectx = re->winx;
re->recty = re->winy;
rres = render_result_new(re, &re->disprect, 0, RR_USE_MEM);
rres = render_result_new(re, &re->disprect, 0, RR_USE_MEM, RR_ALL_LAYERS);
render_result_merge(rres, re->result);
render_result_free(re->result);
@@ -1564,7 +1564,7 @@ static void do_render_composite_fields_blur_3d(Render *re)
BLI_rw_mutex_lock(&re->resultmutex, THREAD_LOCK_WRITE);
render_result_free(re->result);
re->result = render_result_new(re, &re->disprect, 0, RR_USE_MEM);
re->result = render_result_new(re, &re->disprect, 0, RR_USE_MEM, RR_ALL_LAYERS);
BLI_rw_mutex_unlock(&re->resultmutex);
@@ -1856,7 +1856,7 @@ int RE_is_rendering_allowed(Scene *scene, Object *camera_override, ReportList *r
if (scene->r.scemode & (R_EXR_TILE_FILE | R_FULL_SAMPLE)) {
char str[FILE_MAX];
render_result_exr_file_path(scene, 0, str);
render_result_exr_file_path(scene, "", 0, str);
if (BLI_file_is_writable(str) == 0) {
BKE_report(reports, RPT_ERROR, "Can not save render buffers, check the temp default path");
@@ -1940,7 +1940,7 @@ static void validate_render_settings(Render *re)
if (RE_engine_is_external(re)) {
/* not supported yet */
re->r.scemode &= ~(R_EXR_TILE_FILE | R_FULL_SAMPLE);
re->r.scemode &= ~(R_FULL_SAMPLE);
re->r.mode &= ~(R_FIELDS | R_MBLUR);
}
}
@@ -2429,7 +2429,7 @@ void RE_layer_load_from_file(RenderLayer *layer, ReportList *reports, const char
void RE_result_load_from_file(RenderResult *result, ReportList *reports, const char *filename)
{
if (!render_result_exr_file_read_path(result, filename)) {
if (!render_result_exr_file_read_path(result, NULL, filename)) {
BKE_reportf(reports, RPT_ERROR, "RE_result_rect_from_file: failed to load '%s'\n", filename);
return;
}

View File

@@ -384,10 +384,10 @@ static void render_layer_add_pass(RenderResult *rr, RenderLayer *rl, int channel
rpass->recty = rl->recty;
BLI_strncpy(rpass->name, get_pass_name(rpass->passtype, -1), sizeof(rpass->name));
if (rr->exrhandle) {
if (rl->exrhandle) {
int a;
for (a = 0; a < channels; a++)
IMB_exr_add_channel(rr->exrhandle, rl->name, get_pass_name(passtype, a), 0, 0, NULL);
IMB_exr_add_channel(rl->exrhandle, rl->name, get_pass_name(passtype, a), 0, 0, NULL);
}
else {
float *rect;
@@ -413,7 +413,7 @@ static void render_layer_add_pass(RenderResult *rr, RenderLayer *rl, int channel
/* will read info from Render *re to define layers */
/* called in threads */
/* re->winx,winy is coordinate space of entire image, partrct the part within */
RenderResult *render_result_new(Render *re, rcti *partrct, int crop, int savebuffers)
RenderResult *render_result_new(Render *re, rcti *partrct, int crop, int savebuffers, const char *layername)
{
RenderResult *rr;
RenderLayer *rl;
@@ -435,17 +435,21 @@ RenderResult *render_result_new(Render *re, rcti *partrct, int crop, int savebuf
/* tilerect is relative coordinates within render disprect. do not subtract crop yet */
rr->tilerect.xmin = partrct->xmin - re->disprect.xmin;
rr->tilerect.xmax = partrct->xmax - re->disprect.xmax;
rr->tilerect.xmax = partrct->xmax - re->disprect.xmin;
rr->tilerect.ymin = partrct->ymin - re->disprect.ymin;
rr->tilerect.ymax = partrct->ymax - re->disprect.ymax;
rr->tilerect.ymax = partrct->ymax - re->disprect.ymin;
if (savebuffers) {
rr->exrhandle = IMB_exr_get_handle();
rr->do_exr_tile = TRUE;
}
/* check renderdata for amount of layers */
for (nr = 0, srl = re->r.layers.first; srl; srl = srl->next, nr++) {
if (layername && layername[0])
if (strcmp(srl->name, layername) != 0)
continue;
if ((re->r.scemode & R_SINGLE_LAYER) && nr != re->r.actlay)
continue;
if (srl->layflag & SCE_LAY_DISABLE)
@@ -466,11 +470,13 @@ RenderResult *render_result_new(Render *re, rcti *partrct, int crop, int savebuf
rl->rectx = rectx;
rl->recty = recty;
if (rr->exrhandle) {
IMB_exr_add_channel(rr->exrhandle, rl->name, "Combined.R", 0, 0, NULL);
IMB_exr_add_channel(rr->exrhandle, rl->name, "Combined.G", 0, 0, NULL);
IMB_exr_add_channel(rr->exrhandle, rl->name, "Combined.B", 0, 0, NULL);
IMB_exr_add_channel(rr->exrhandle, rl->name, "Combined.A", 0, 0, NULL);
if (rr->do_exr_tile) {
rl->exrhandle = IMB_exr_get_handle();
IMB_exr_add_channel(rl->exrhandle, rl->name, "Combined.R", 0, 0, NULL);
IMB_exr_add_channel(rl->exrhandle, rl->name, "Combined.G", 0, 0, NULL);
IMB_exr_add_channel(rl->exrhandle, rl->name, "Combined.B", 0, 0, NULL);
IMB_exr_add_channel(rl->exrhandle, rl->name, "Combined.A", 0, 0, NULL);
}
else
rl->rectf = MEM_mapallocN(rectx * recty * sizeof(float) * 4, "Combined rgba");
@@ -532,7 +538,7 @@ RenderResult *render_result_new(Render *re, rcti *partrct, int crop, int savebuf
}
/* sss, previewrender and envmap don't do layers, so we make a default one */
if (rr->layers.first == NULL) {
if (rr->layers.first == NULL && !(layername && layername[0])) {
rl = MEM_callocN(sizeof(RenderLayer), "new render layer");
BLI_addtail(&rr->layers, rl);
@@ -540,11 +546,13 @@ RenderResult *render_result_new(Render *re, rcti *partrct, int crop, int savebuf
rl->recty = recty;
/* duplicate code... */
if (rr->exrhandle) {
IMB_exr_add_channel(rr->exrhandle, rl->name, "Combined.R", 0, 0, NULL);
IMB_exr_add_channel(rr->exrhandle, rl->name, "Combined.G", 0, 0, NULL);
IMB_exr_add_channel(rr->exrhandle, rl->name, "Combined.B", 0, 0, NULL);
IMB_exr_add_channel(rr->exrhandle, rl->name, "Combined.A", 0, 0, NULL);
if (rr->do_exr_tile) {
rl->exrhandle = IMB_exr_get_handle();
IMB_exr_add_channel(rl->exrhandle, rl->name, "Combined.R", 0, 0, NULL);
IMB_exr_add_channel(rl->exrhandle, rl->name, "Combined.G", 0, 0, NULL);
IMB_exr_add_channel(rl->exrhandle, rl->name, "Combined.B", 0, 0, NULL);
IMB_exr_add_channel(rl->exrhandle, rl->name, "Combined.A", 0, 0, NULL);
}
else
rl->rectf = MEM_mapallocN(rectx * recty * sizeof(float) * 4, "Combined rgba");
@@ -570,10 +578,10 @@ RenderResult *render_result_new_full_sample(Render *re, ListBase *lb, rcti *part
int a;
if (re->osa == 0)
return render_result_new(re, partrct, crop, savebuffers);
return render_result_new(re, partrct, crop, savebuffers, RR_ALL_LAYERS);
for (a = 0; a < re->osa; a++) {
RenderResult *rr = render_result_new(re, partrct, crop, savebuffers);
RenderResult *rr = render_result_new(re, partrct, crop, savebuffers, RR_ALL_LAYERS);
BLI_addtail(lb, rr);
rr->sample_nr = a;
}
@@ -682,15 +690,18 @@ void render_result_merge(RenderResult *rr, RenderResult *rrpart)
RenderLayer *rl, *rlp;
RenderPass *rpass, *rpassp;
for (rl = rr->layers.first, rlp = rrpart->layers.first; rl && rlp; rl = rl->next, rlp = rlp->next) {
/* combined */
if (rl->rectf && rlp->rectf)
do_merge_tile(rr, rrpart, rl->rectf, rlp->rectf, 4);
/* passes are allocated in sync */
for (rpass = rl->passes.first, rpassp = rlp->passes.first; rpass && rpassp; rpass = rpass->next, rpassp = rpassp->next) {
do_merge_tile(rr, rrpart, rpass->rect, rpassp->rect, rpass->channels);
for (rl = rr->layers.first; rl; rl = rl->next) {
for (rlp = rrpart->layers.first; rlp; rlp = rlp->next) {
if (strcmp(rlp->name, rl->name) == 0) {
/* combined */
if (rl->rectf && rlp->rectf)
do_merge_tile(rr, rrpart, rl->rectf, rlp->rectf, 4);
/* passes are allocated in sync */
for (rpass = rl->passes.first, rpassp = rlp->passes.first; rpass && rpassp; rpass = rpass->next, rpassp = rpassp->next) {
do_merge_tile(rr, rrpart, rpass->rect, rpassp->rect, rpass->channels);
}
}
}
}
}
@@ -827,13 +838,16 @@ void render_result_single_layer_end(Render *re)
static void save_render_result_tile(RenderResult *rr, RenderResult *rrpart)
{
RenderLayer *rlp;
RenderLayer *rlp, *rl;
RenderPass *rpassp;
int offs, partx, party;
BLI_lock_thread(LOCK_IMAGE);
for (rlp = rrpart->layers.first; rlp; rlp = rlp->next) {
for (rl = rr->layers.first; rl; rl = rl->next)
if (strcmp(rl->name, rlp->name) == 0)
break;
if (rrpart->crop) { /* filters add pixel extra */
offs = (rrpart->crop + rrpart->crop * rrpart->rectx);
@@ -846,7 +860,7 @@ static void save_render_result_tile(RenderResult *rr, RenderResult *rrpart)
if (rlp->rectf) {
int a, xstride = 4;
for (a = 0; a < xstride; a++)
IMB_exr_set_channel(rr->exrhandle, rlp->name, get_pass_name(SCE_PASS_COMBINED, a),
IMB_exr_set_channel(rl->exrhandle, rlp->name, get_pass_name(SCE_PASS_COMBINED, a),
xstride, xstride * rrpart->rectx, rlp->rectf + a + xstride * offs);
}
@@ -854,7 +868,7 @@ static void save_render_result_tile(RenderResult *rr, RenderResult *rrpart)
for (rpassp = rlp->passes.first; rpassp; rpassp = rpassp->next) {
int a, xstride = rpassp->channels;
for (a = 0; a < xstride; a++)
IMB_exr_set_channel(rr->exrhandle, rlp->name, get_pass_name(rpassp->passtype, a),
IMB_exr_set_channel(rl->exrhandle, rlp->name, get_pass_name(rpassp->passtype, a),
xstride, xstride * rrpart->rectx, rpassp->rect + a + xstride * offs);
}
@@ -862,7 +876,14 @@ static void save_render_result_tile(RenderResult *rr, RenderResult *rrpart)
party = rrpart->tilerect.ymin + rrpart->crop;
partx = rrpart->tilerect.xmin + rrpart->crop;
IMB_exrtile_write_channels(rr->exrhandle, partx, party, 0);
for (rlp = rrpart->layers.first; rlp; rlp = rlp->next) {
for (rl = rr->layers.first; rl; rl = rl->next)
if (strcmp(rl->name, rlp->name) == 0)
break;
IMB_exrtile_write_channels(rl->exrhandle, partx, party, 0);
}
BLI_unlock_thread(LOCK_IMAGE);
}
@@ -871,15 +892,18 @@ static void save_empty_result_tiles(Render *re)
{
RenderPart *pa;
RenderResult *rr;
RenderLayer *rl;
for (rr = re->result; rr; rr = rr->next) {
IMB_exrtile_clear_channels(rr->exrhandle);
for (rl = rr->layers.first; rl; rl = rl->next) {
IMB_exrtile_clear_channels(rl->exrhandle);
for (pa = re->parts.first; pa; pa = pa->next) {
if (pa->ready == 0) {
int party = pa->disprect.ymin - re->disprect.ymin + pa->crop;
int partx = pa->disprect.xmin - re->disprect.xmin + pa->crop;
IMB_exrtile_write_channels(rr->exrhandle, partx, party, 0);
for (pa = re->parts.first; pa; pa = pa->next) {
if (pa->ready == 0) {
int party = pa->disprect.ymin - re->disprect.ymin + pa->crop;
int partx = pa->disprect.xmin - re->disprect.xmin + pa->crop;
IMB_exrtile_write_channels(rl->exrhandle, partx, party, 0);
}
}
}
}
@@ -889,13 +913,16 @@ static void save_empty_result_tiles(Render *re)
void render_result_exr_file_begin(Render *re)
{
RenderResult *rr;
RenderLayer *rl;
char str[FILE_MAX];
for (rr = re->result; rr; rr = rr->next) {
render_result_exr_file_path(re->scene, rr->sample_nr, str);
printf("write exr tmp file, %dx%d, %s\n", rr->rectx, rr->recty, str);
IMB_exrtile_begin_write(rr->exrhandle, str, 0, rr->rectx, rr->recty, re->partx, re->party);
for (rl = rr->layers.first; rl; rl = rl->next) {
render_result_exr_file_path(re->scene, rl->name, rr->sample_nr, str);
IMB_exrtile_begin_write(rl->exrhandle, str, 0, rr->rectx, rr->recty, re->partx, re->party);
}
}
}
@@ -903,12 +930,17 @@ void render_result_exr_file_begin(Render *re)
void render_result_exr_file_end(Render *re)
{
RenderResult *rr;
RenderLayer *rl;
save_empty_result_tiles(re);
for (rr = re->result; rr; rr = rr->next) {
IMB_exr_close(rr->exrhandle);
rr->exrhandle = NULL;
for (rl = rr->layers.first; rl; rl = rl->next) {
IMB_exr_close(rl->exrhandle);
rl->exrhandle = NULL;
}
rr->do_exr_tile = FALSE;
}
render_result_free_list(&re->fullresult, re->result);
@@ -925,17 +957,17 @@ void render_result_exr_file_merge(RenderResult *rr, RenderResult *rrpart)
}
/* path to temporary exr file */
void render_result_exr_file_path(Scene *scene, int sample, char *filepath)
void render_result_exr_file_path(Scene *scene, const char *layname, int sample, char *filepath)
{
char di[FILE_MAX], name[FILE_MAXFILE + MAX_ID_NAME + 100], fi[FILE_MAXFILE];
char di[FILE_MAX], name[FILE_MAXFILE + MAX_ID_NAME + MAX_ID_NAME + 100], fi[FILE_MAXFILE];
BLI_strncpy(di, G.main->name, FILE_MAX);
BLI_splitdirstring(di, fi);
if (sample == 0)
BLI_snprintf(name, sizeof(name), "%s_%s.exr", fi, scene->id.name + 2);
BLI_snprintf(name, sizeof(name), "%s_%s_%s.exr", fi, scene->id.name + 2, layname);
else
BLI_snprintf(name, sizeof(name), "%s_%s%d.exr", fi, scene->id.name + 2, sample);
BLI_snprintf(name, sizeof(name), "%s_%s_%s%d.exr", fi, scene->id.name + 2, layname, sample);
BLI_make_file_string("/", filepath, BLI_temporary_dir(), name);
}
@@ -943,29 +975,30 @@ void render_result_exr_file_path(Scene *scene, int sample, char *filepath)
/* only for temp buffer files, makes exact copy of render result */
int render_result_exr_file_read(Render *re, int sample)
{
RenderLayer *rl;
char str[FILE_MAX];
int success;
int success = TRUE;
RE_FreeRenderResult(re->result);
re->result = render_result_new(re, &re->disprect, 0, RR_USE_MEM);
re->result = render_result_new(re, &re->disprect, 0, RR_USE_MEM, RR_ALL_LAYERS);
render_result_exr_file_path(re->scene, sample, str);
printf("read exr tmp file: %s\n", str);
for (rl = re->result->layers.first; rl; rl = rl->next) {
if (render_result_exr_file_read_path(re->result, str)) {
success = TRUE;
}
else {
printf("cannot read: %s\n", str);
success = FALSE;
render_result_exr_file_path(re->scene, rl->name, sample, str);
printf("read exr tmp file: %s\n", str);
if (!render_result_exr_file_read_path(re->result, rl, str)) {
printf("cannot read: %s\n", str);
success = FALSE;
}
}
return success;
}
/* called for reading temp files, and for external engines */
int render_result_exr_file_read_path(RenderResult *rr, const char *filepath)
int render_result_exr_file_read_path(RenderResult *rr, RenderLayer *rl_single, const char *filepath)
{
RenderLayer *rl;
RenderPass *rpass;
@@ -988,6 +1021,9 @@ int render_result_exr_file_read_path(RenderResult *rr, const char *filepath)
}
for (rl = rr->layers.first; rl; rl = rl->next) {
if (rl_single && rl_single != rl)
continue;
/* combined */
if (rl->rectf) {
int a, xstride = 4;