Files
test/intern/cycles/device/device_cpu.cpp
Sergey Sharybin 62df895415 Tomato Cycles: resolve slowdown introduced by interactive samples view commit
Update samples displayign in the interface once in a whiel only
(currently once in 1 sec). This still keeps rendering interactive
enough for artists and it eliminates slowdown caused by constant
uploading sampels from GPU to host.

This also allowed to get rid of hack with storing render result
in render buffers, due to it's not so big overhead ow to create
render result when sample needs to be updated.

Tests made with different files shows that now render speed is
really close to oroginal speed from before we started hackign
into this stuff.

There still some issues with interactive update when rendering
several render layers, but that seems to be irrelivant to all
this sampels commtis, so woudl look into this as a separate
patch.
2012-07-25 14:29:14 +00:00

303 lines
6.5 KiB
C++

/*
* Copyright 2011, Blender Foundation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <stdlib.h>
#include <string.h>
#include "device.h"
#include "device_intern.h"
#include "kernel.h"
#include "kernel_types.h"
#include "osl_shader.h"
#include "buffers.h"
#include "util_debug.h"
#include "util_foreach.h"
#include "util_function.h"
#include "util_opengl.h"
#include "util_progress.h"
#include "util_system.h"
#include "util_thread.h"
CCL_NAMESPACE_BEGIN
class CPUDevice : public Device
{
public:
TaskPool task_pool;
KernelGlobals *kg;
CPUDevice(int threads_num)
{
kg = kernel_globals_create();
/* do now to avoid thread issues */
system_cpu_support_optimized();
}
~CPUDevice()
{
task_pool.stop();
kernel_globals_free(kg);
}
bool support_advanced_shading()
{
return true;
}
void mem_alloc(device_memory& mem, MemoryType type)
{
mem.device_pointer = mem.data_pointer;
}
void mem_copy_to(device_memory& mem)
{
/* no-op */
}
void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
{
/* no-op */
}
void mem_zero(device_memory& mem)
{
memset((void*)mem.device_pointer, 0, mem.memory_size());
}
void mem_free(device_memory& mem)
{
mem.device_pointer = 0;
}
void const_copy_to(const char *name, void *host, size_t size)
{
kernel_const_copy(kg, name, host, size);
}
void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
{
kernel_tex_copy(kg, name, mem.data_pointer, mem.data_width, mem.data_height);
mem.device_pointer = mem.data_pointer;
}
void tex_free(device_memory& mem)
{
mem.device_pointer = 0;
}
void *osl_memory()
{
#ifdef WITH_OSL
return kernel_osl_memory(kg);
#else
return NULL;
#endif
}
void thread_run(DeviceTask *task)
{
if(task->type == DeviceTask::PATH_TRACE)
thread_path_trace(*task);
else if(task->type == DeviceTask::TONEMAP)
thread_tonemap(*task);
else if(task->type == DeviceTask::SHADER)
thread_shader(*task);
}
class CPUDeviceTask : public DeviceTask {
public:
CPUDeviceTask(CPUDevice *device, DeviceTask& task)
: DeviceTask(task)
{
run = function_bind(&CPUDevice::thread_run, device, this);
}
};
void thread_path_trace(DeviceTask& task)
{
if(task_pool.cancelled())
return;
#ifdef WITH_OSL
if(kernel_osl_use(kg))
OSLShader::thread_init(kg);
#endif
RenderTile tile;
while(task.acquire_tile(this, tile)) {
float *render_buffer = (float*)tile.buffer;
uint *rng_state = (uint*)tile.rng_state;
int start_sample = tile.start_sample;
int end_sample = tile.start_sample + tile.num_samples;
#ifdef WITH_OPTIMIZED_KERNEL
if(system_cpu_support_optimized()) {
for(int sample = start_sample; sample < end_sample; sample++) {
for(int y = tile.y; y < tile.y + tile.h; y++) {
for(int x = tile.x; x < tile.x + tile.w; x++) {
if (task.get_cancel())
break;
if(task_pool.cancelled())
break;
kernel_cpu_optimized_path_trace(kg, render_buffer, rng_state,
sample, x, y, tile.offset, tile.stride);
}
}
task.update_progress(tile);
}
}
else
#endif
{
for(int sample = start_sample; sample < end_sample; sample++) {
for(int y = tile.y; y < tile.y + tile.h; y++) {
for(int x = tile.x; x < tile.x + tile.w; x++) {
if (task.get_cancel()) {
break;
}
if(task_pool.cancelled())
break;
kernel_cpu_path_trace(kg, render_buffer, rng_state,
sample, x, y, tile.offset, tile.stride);
}
}
task.update_progress(tile);
}
}
task.release_tile(tile);
if(task_pool.cancelled())
break;
}
#ifdef WITH_OSL
if(kernel_osl_use(kg))
OSLShader::thread_free(kg);
#endif
}
void thread_tonemap(DeviceTask& task)
{
#ifdef WITH_OPTIMIZED_KERNEL
if(system_cpu_support_optimized()) {
for(int y = task.y; y < task.y + task.h; y++)
for(int x = task.x; x < task.x + task.w; x++)
kernel_cpu_optimized_tonemap(kg, (uchar4*)task.rgba, (float*)task.buffer,
task.sample, task.resolution, x, y, task.offset, task.stride);
}
else
#endif
{
for(int y = task.y; y < task.y + task.h; y++)
for(int x = task.x; x < task.x + task.w; x++)
kernel_cpu_tonemap(kg, (uchar4*)task.rgba, (float*)task.buffer,
task.sample, task.resolution, x, y, task.offset, task.stride);
}
}
void thread_shader(DeviceTask& task)
{
#ifdef WITH_OSL
if(kernel_osl_use(kg))
OSLShader::thread_init(kg);
#endif
#ifdef WITH_OPTIMIZED_KERNEL
if(system_cpu_support_optimized()) {
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
kernel_cpu_optimized_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
if(task_pool.cancelled())
break;
}
}
else
#endif
{
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
kernel_cpu_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
if(task_pool.cancelled())
break;
}
}
#ifdef WITH_OSL
if(kernel_osl_use(kg))
OSLShader::thread_free(kg);
#endif
}
void task_add(DeviceTask& task)
{
/* split task into smaller ones, more than number of threads for uneven
* workloads where some parts of the image render slower than others */
list<DeviceTask> tasks;
task.split(tasks, TaskScheduler::num_threads()+1);
foreach(DeviceTask& task, tasks)
task_pool.push(new CPUDeviceTask(this, task));
}
void task_wait()
{
task_pool.wait_work();
}
void task_cancel()
{
task_pool.cancel();
}
};
Device *device_cpu_create(DeviceInfo& info, int threads)
{
return new CPUDevice(threads);
}
void device_cpu_info(vector<DeviceInfo>& devices)
{
DeviceInfo info;
info.type = DEVICE_CPU;
info.description = system_cpu_brand_string();
info.id = "CPU";
info.num = 0;
info.advanced_shading = true;
info.pack_images = false;
devices.insert(devices.begin(), info);
}
CCL_NAMESPACE_END