This commit solves couple of issues appeared with new integrator: - Render job progress bar is now shows progress based on number of rendered tiles. This is the same as Blender Internal does. This still requires some further thoughts because for GPU it's better to use single tile and in this case progress bar should be based on number of rendered samples. - Removes "global" sample counter from progress descriptor. There's no more global-being sample which makes sense. This counter was replaced with tile counter. - Use proper sample number when copying render buffer to blender. It used to be final sample number used which lead to tiles appearing from complete dark to normal brightness as they're being rendered. Now tile would be displayed with proper brightness starting from the very first sample. Use sample counter stored in render tile descriptor and pass it to update / write callbacks. This was tested on CPU and GPU CUDA rendering. Additional change: OpenCL rendering now should be cancellable before it finished rendering all the samples (the same change as for CPU/CUDA from a while ago). This part of commit wasn't actually tested, would do it later.
305 lines
6.6 KiB
C++
305 lines
6.6 KiB
C++
/*
|
|
* Copyright 2011, Blender Foundation.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version 2
|
|
* of the License, or (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software Foundation,
|
|
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
*/
|
|
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "device.h"
|
|
#include "device_intern.h"
|
|
|
|
#include "kernel.h"
|
|
#include "kernel_types.h"
|
|
|
|
#include "osl_shader.h"
|
|
|
|
#include "buffers.h"
|
|
|
|
#include "util_debug.h"
|
|
#include "util_foreach.h"
|
|
#include "util_function.h"
|
|
#include "util_opengl.h"
|
|
#include "util_progress.h"
|
|
#include "util_system.h"
|
|
#include "util_thread.h"
|
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
class CPUDevice : public Device
|
|
{
|
|
public:
|
|
TaskPool task_pool;
|
|
KernelGlobals *kg;
|
|
|
|
CPUDevice(int threads_num)
|
|
{
|
|
kg = kernel_globals_create();
|
|
|
|
/* do now to avoid thread issues */
|
|
system_cpu_support_optimized();
|
|
}
|
|
|
|
~CPUDevice()
|
|
{
|
|
task_pool.stop();
|
|
kernel_globals_free(kg);
|
|
}
|
|
|
|
bool support_advanced_shading()
|
|
{
|
|
return true;
|
|
}
|
|
|
|
void mem_alloc(device_memory& mem, MemoryType type)
|
|
{
|
|
mem.device_pointer = mem.data_pointer;
|
|
}
|
|
|
|
void mem_copy_to(device_memory& mem)
|
|
{
|
|
/* no-op */
|
|
}
|
|
|
|
void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
|
|
{
|
|
/* no-op */
|
|
}
|
|
|
|
void mem_zero(device_memory& mem)
|
|
{
|
|
memset((void*)mem.device_pointer, 0, mem.memory_size());
|
|
}
|
|
|
|
void mem_free(device_memory& mem)
|
|
{
|
|
mem.device_pointer = 0;
|
|
}
|
|
|
|
void const_copy_to(const char *name, void *host, size_t size)
|
|
{
|
|
kernel_const_copy(kg, name, host, size);
|
|
}
|
|
|
|
void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
|
|
{
|
|
kernel_tex_copy(kg, name, mem.data_pointer, mem.data_width, mem.data_height);
|
|
mem.device_pointer = mem.data_pointer;
|
|
}
|
|
|
|
void tex_free(device_memory& mem)
|
|
{
|
|
mem.device_pointer = 0;
|
|
}
|
|
|
|
void *osl_memory()
|
|
{
|
|
#ifdef WITH_OSL
|
|
return kernel_osl_memory(kg);
|
|
#else
|
|
return NULL;
|
|
#endif
|
|
}
|
|
|
|
void thread_run(DeviceTask *task)
|
|
{
|
|
if(task->type == DeviceTask::PATH_TRACE)
|
|
thread_path_trace(*task);
|
|
else if(task->type == DeviceTask::TONEMAP)
|
|
thread_tonemap(*task);
|
|
else if(task->type == DeviceTask::SHADER)
|
|
thread_shader(*task);
|
|
}
|
|
|
|
class CPUDeviceTask : public DeviceTask {
|
|
public:
|
|
CPUDeviceTask(CPUDevice *device, DeviceTask& task)
|
|
: DeviceTask(task)
|
|
{
|
|
run = function_bind(&CPUDevice::thread_run, device, this);
|
|
}
|
|
};
|
|
|
|
void thread_path_trace(DeviceTask& task)
|
|
{
|
|
if(task_pool.cancelled())
|
|
return;
|
|
|
|
#ifdef WITH_OSL
|
|
if(kernel_osl_use(kg))
|
|
OSLShader::thread_init(kg);
|
|
#endif
|
|
|
|
RenderTile tile;
|
|
|
|
while(task.acquire_tile(this, tile)) {
|
|
float *render_buffer = (float*)tile.buffer;
|
|
uint *rng_state = (uint*)tile.rng_state;
|
|
int start_sample = tile.start_sample;
|
|
int end_sample = tile.start_sample + tile.num_samples;
|
|
|
|
#ifdef WITH_OPTIMIZED_KERNEL
|
|
if(system_cpu_support_optimized()) {
|
|
for(int sample = start_sample; sample < end_sample; sample++) {
|
|
for(int y = tile.y; y < tile.y + tile.h; y++) {
|
|
for(int x = tile.x; x < tile.x + tile.w; x++) {
|
|
if (task.get_cancel())
|
|
break;
|
|
|
|
if(task_pool.cancelled())
|
|
break;
|
|
|
|
kernel_cpu_optimized_path_trace(kg, render_buffer, rng_state,
|
|
sample, x, y, tile.offset, tile.stride);
|
|
}
|
|
}
|
|
|
|
tile.sample = sample + 1;
|
|
task.update_progress(tile);
|
|
}
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
for(int sample = start_sample; sample < end_sample; sample++) {
|
|
for(int y = tile.y; y < tile.y + tile.h; y++) {
|
|
for(int x = tile.x; x < tile.x + tile.w; x++) {
|
|
if (task.get_cancel()) {
|
|
break;
|
|
}
|
|
|
|
if(task_pool.cancelled())
|
|
break;
|
|
|
|
kernel_cpu_path_trace(kg, render_buffer, rng_state,
|
|
sample, x, y, tile.offset, tile.stride);
|
|
|
|
}
|
|
}
|
|
|
|
tile.sample = sample + 1;
|
|
task.update_progress(tile);
|
|
}
|
|
}
|
|
|
|
task.release_tile(tile);
|
|
|
|
if(task_pool.cancelled())
|
|
break;
|
|
}
|
|
|
|
#ifdef WITH_OSL
|
|
if(kernel_osl_use(kg))
|
|
OSLShader::thread_free(kg);
|
|
#endif
|
|
}
|
|
|
|
void thread_tonemap(DeviceTask& task)
|
|
{
|
|
#ifdef WITH_OPTIMIZED_KERNEL
|
|
if(system_cpu_support_optimized()) {
|
|
for(int y = task.y; y < task.y + task.h; y++)
|
|
for(int x = task.x; x < task.x + task.w; x++)
|
|
kernel_cpu_optimized_tonemap(kg, (uchar4*)task.rgba, (float*)task.buffer,
|
|
task.sample, task.resolution, x, y, task.offset, task.stride);
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
for(int y = task.y; y < task.y + task.h; y++)
|
|
for(int x = task.x; x < task.x + task.w; x++)
|
|
kernel_cpu_tonemap(kg, (uchar4*)task.rgba, (float*)task.buffer,
|
|
task.sample, task.resolution, x, y, task.offset, task.stride);
|
|
}
|
|
}
|
|
|
|
void thread_shader(DeviceTask& task)
|
|
{
|
|
#ifdef WITH_OSL
|
|
if(kernel_osl_use(kg))
|
|
OSLShader::thread_init(kg);
|
|
#endif
|
|
|
|
#ifdef WITH_OPTIMIZED_KERNEL
|
|
if(system_cpu_support_optimized()) {
|
|
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
|
|
kernel_cpu_optimized_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
|
|
|
|
if(task_pool.cancelled())
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
|
|
kernel_cpu_shader(kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
|
|
|
|
if(task_pool.cancelled())
|
|
break;
|
|
}
|
|
}
|
|
|
|
#ifdef WITH_OSL
|
|
if(kernel_osl_use(kg))
|
|
OSLShader::thread_free(kg);
|
|
#endif
|
|
}
|
|
|
|
void task_add(DeviceTask& task)
|
|
{
|
|
/* split task into smaller ones, more than number of threads for uneven
|
|
* workloads where some parts of the image render slower than others */
|
|
list<DeviceTask> tasks;
|
|
task.split(tasks, TaskScheduler::num_threads()+1);
|
|
|
|
foreach(DeviceTask& task, tasks)
|
|
task_pool.push(new CPUDeviceTask(this, task));
|
|
}
|
|
|
|
void task_wait()
|
|
{
|
|
task_pool.wait_work();
|
|
}
|
|
|
|
void task_cancel()
|
|
{
|
|
task_pool.cancel();
|
|
}
|
|
};
|
|
|
|
Device *device_cpu_create(DeviceInfo& info, int threads)
|
|
{
|
|
return new CPUDevice(threads);
|
|
}
|
|
|
|
void device_cpu_info(vector<DeviceInfo>& devices)
|
|
{
|
|
DeviceInfo info;
|
|
|
|
info.type = DEVICE_CPU;
|
|
info.description = system_cpu_brand_string();
|
|
info.id = "CPU";
|
|
info.num = 0;
|
|
info.advanced_shading = true;
|
|
info.pack_images = false;
|
|
|
|
devices.insert(devices.begin(), info);
|
|
}
|
|
|
|
CCL_NAMESPACE_END
|
|
|