Files
test/intern/cycles/device/device_cpu.cpp
Martijn Berger 85a0c5d4e1 Cycles: network render code updated for latest changes and improved
This actually works somewhat now, although viewport rendering is broken and any
kind of network error or connection failure will kill Blender.

* Experimental WITH_CYCLES_NETWORK cmake option
* Networked Device is shown as an option next to CPU and GPU Compute
* Various updates to work with the latest Cycles code
* Locks and thread safety for RPC calls and tiles
* Refactored pointer mapping code
* Fix error in CPU brand string retrieval code

This includes work by Doug Gale, Martijn Berger and Brecht Van Lommel.

Reviewers: brecht

Differential Revision: http://developer.blender.org/D36
2013-12-07 12:26:58 +01:00

427 lines
11 KiB
C++

/*
* Copyright 2011-2013 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*/
#include <stdlib.h>
#include <string.h>
#include "device.h"
#include "device_intern.h"
#include "kernel.h"
#include "kernel_compat_cpu.h"
#include "kernel_types.h"
#include "kernel_globals.h"
#include "osl_shader.h"
#include "osl_globals.h"
#include "buffers.h"
#include "util_debug.h"
#include "util_foreach.h"
#include "util_function.h"
#include "util_opengl.h"
#include "util_progress.h"
#include "util_system.h"
#include "util_thread.h"
CCL_NAMESPACE_BEGIN
class CPUDevice : public Device
{
public:
TaskPool task_pool;
KernelGlobals kernel_globals;
#ifdef WITH_OSL
OSLGlobals osl_globals;
#endif
CPUDevice(DeviceInfo& info, Stats &stats, bool background)
: Device(info, stats, background)
{
#ifdef WITH_OSL
kernel_globals.osl = &osl_globals;
#endif
/* do now to avoid thread issues */
system_cpu_support_sse2();
system_cpu_support_sse3();
system_cpu_support_sse41();
}
~CPUDevice()
{
task_pool.stop();
}
void mem_alloc(device_memory& mem, MemoryType type)
{
mem.device_pointer = mem.data_pointer;
stats.mem_alloc(mem.memory_size());
}
void mem_copy_to(device_memory& mem)
{
/* no-op */
}
void mem_copy_from(device_memory& mem, int y, int w, int h, int elem)
{
/* no-op */
}
void mem_zero(device_memory& mem)
{
memset((void*)mem.device_pointer, 0, mem.memory_size());
}
void mem_free(device_memory& mem)
{
mem.device_pointer = 0;
stats.mem_free(mem.memory_size());
}
void const_copy_to(const char *name, void *host, size_t size)
{
kernel_const_copy(&kernel_globals, name, host, size);
}
void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
{
kernel_tex_copy(&kernel_globals, name, mem.data_pointer, mem.data_width, mem.data_height);
mem.device_pointer = mem.data_pointer;
stats.mem_alloc(mem.memory_size());
}
void tex_free(device_memory& mem)
{
mem.device_pointer = 0;
stats.mem_free(mem.memory_size());
}
void *osl_memory()
{
#ifdef WITH_OSL
return &osl_globals;
#else
return NULL;
#endif
}
void thread_run(DeviceTask *task)
{
if(task->type == DeviceTask::PATH_TRACE)
thread_path_trace(*task);
else if(task->type == DeviceTask::FILM_CONVERT)
thread_film_convert(*task);
else if(task->type == DeviceTask::SHADER)
thread_shader(*task);
}
class CPUDeviceTask : public DeviceTask {
public:
CPUDeviceTask(CPUDevice *device, DeviceTask& task)
: DeviceTask(task)
{
run = function_bind(&CPUDevice::thread_run, device, this);
}
};
void thread_path_trace(DeviceTask& task)
{
if(task_pool.canceled()) {
if(task.need_finish_queue == false)
return;
}
KernelGlobals kg = kernel_globals;
#ifdef WITH_OSL
OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
#endif
RenderTile tile;
while(task.acquire_tile(this, tile)) {
float *render_buffer = (float*)tile.buffer;
uint *rng_state = (uint*)tile.rng_state;
int start_sample = tile.start_sample;
int end_sample = tile.start_sample + tile.num_samples;
#ifdef WITH_OPTIMIZED_KERNEL
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
if(system_cpu_support_sse41()) {
for(int sample = start_sample; sample < end_sample; sample++) {
if (task.get_cancel() || task_pool.canceled()) {
if(task.need_finish_queue == false)
break;
}
for(int y = tile.y; y < tile.y + tile.h; y++) {
for(int x = tile.x; x < tile.x + tile.w; x++) {
kernel_cpu_sse41_path_trace(&kg, render_buffer, rng_state,
sample, x, y, tile.offset, tile.stride);
}
}
tile.sample = sample + 1;
task.update_progress(tile);
}
}
else
#endif
if(system_cpu_support_sse3()) {
for(int sample = start_sample; sample < end_sample; sample++) {
if (task.get_cancel() || task_pool.canceled()) {
if(task.need_finish_queue == false)
break;
}
for(int y = tile.y; y < tile.y + tile.h; y++) {
for(int x = tile.x; x < tile.x + tile.w; x++) {
kernel_cpu_sse3_path_trace(&kg, render_buffer, rng_state,
sample, x, y, tile.offset, tile.stride);
}
}
tile.sample = sample + 1;
task.update_progress(tile);
}
}
else if(system_cpu_support_sse2()) {
for(int sample = start_sample; sample < end_sample; sample++) {
if (task.get_cancel() || task_pool.canceled()) {
if(task.need_finish_queue == false)
break;
}
for(int y = tile.y; y < tile.y + tile.h; y++) {
for(int x = tile.x; x < tile.x + tile.w; x++) {
kernel_cpu_sse2_path_trace(&kg, render_buffer, rng_state,
sample, x, y, tile.offset, tile.stride);
}
}
tile.sample = sample + 1;
task.update_progress(tile);
}
}
else
#endif
{
for(int sample = start_sample; sample < end_sample; sample++) {
if (task.get_cancel() || task_pool.canceled()) {
if(task.need_finish_queue == false)
break;
}
for(int y = tile.y; y < tile.y + tile.h; y++) {
for(int x = tile.x; x < tile.x + tile.w; x++) {
kernel_cpu_path_trace(&kg, render_buffer, rng_state,
sample, x, y, tile.offset, tile.stride);
}
}
tile.sample = sample + 1;
task.update_progress(tile);
}
}
task.release_tile(tile);
if(task_pool.canceled()) {
if(task.need_finish_queue == false)
break;
}
}
#ifdef WITH_OSL
OSLShader::thread_free(&kg);
#endif
}
void thread_film_convert(DeviceTask& task)
{
float sample_scale = 1.0f/(task.sample + 1);
if(task.rgba_half) {
#ifdef WITH_OPTIMIZED_KERNEL
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
if(system_cpu_support_sse41()) {
for(int y = task.y; y < task.y + task.h; y++)
for(int x = task.x; x < task.x + task.w; x++)
kernel_cpu_sse41_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer,
sample_scale, x, y, task.offset, task.stride);
}
else
#endif
if(system_cpu_support_sse3()) {
for(int y = task.y; y < task.y + task.h; y++)
for(int x = task.x; x < task.x + task.w; x++)
kernel_cpu_sse3_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer,
sample_scale, x, y, task.offset, task.stride);
}
else if(system_cpu_support_sse2()) {
for(int y = task.y; y < task.y + task.h; y++)
for(int x = task.x; x < task.x + task.w; x++)
kernel_cpu_sse2_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer,
sample_scale, x, y, task.offset, task.stride);
}
else
#endif
{
for(int y = task.y; y < task.y + task.h; y++)
for(int x = task.x; x < task.x + task.w; x++)
kernel_cpu_convert_to_half_float(&kernel_globals, (uchar4*)task.rgba_half, (float*)task.buffer,
sample_scale, x, y, task.offset, task.stride);
}
}
else {
#ifdef WITH_OPTIMIZED_KERNEL
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
if(system_cpu_support_sse41()) {
for(int y = task.y; y < task.y + task.h; y++)
for(int x = task.x; x < task.x + task.w; x++)
kernel_cpu_sse41_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer,
sample_scale, x, y, task.offset, task.stride);
}
#endif
if(system_cpu_support_sse3()) {
for(int y = task.y; y < task.y + task.h; y++)
for(int x = task.x; x < task.x + task.w; x++)
kernel_cpu_sse3_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer,
sample_scale, x, y, task.offset, task.stride);
}
else if(system_cpu_support_sse2()) {
for(int y = task.y; y < task.y + task.h; y++)
for(int x = task.x; x < task.x + task.w; x++)
kernel_cpu_sse2_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer,
sample_scale, x, y, task.offset, task.stride);
}
else
#endif
{
for(int y = task.y; y < task.y + task.h; y++)
for(int x = task.x; x < task.x + task.w; x++)
kernel_cpu_convert_to_byte(&kernel_globals, (uchar4*)task.rgba_byte, (float*)task.buffer,
sample_scale, x, y, task.offset, task.stride);
}
}
}
void thread_shader(DeviceTask& task)
{
KernelGlobals kg = kernel_globals;
#ifdef WITH_OSL
OSLShader::thread_init(&kg, &kernel_globals, &osl_globals);
#endif
#ifdef WITH_OPTIMIZED_KERNEL
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
if(system_cpu_support_sse41()) {
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
kernel_cpu_sse41_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
if(task_pool.canceled())
break;
}
}
#endif
if(system_cpu_support_sse3()) {
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
kernel_cpu_sse3_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
if(task_pool.canceled())
break;
}
}
else if(system_cpu_support_sse2()) {
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
kernel_cpu_sse2_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
if(task_pool.canceled())
break;
}
}
else
#endif
{
for(int x = task.shader_x; x < task.shader_x + task.shader_w; x++) {
kernel_cpu_shader(&kg, (uint4*)task.shader_input, (float4*)task.shader_output, task.shader_eval_type, x);
if(task_pool.canceled())
break;
}
}
#ifdef WITH_OSL
OSLShader::thread_free(&kg);
#endif
}
void task_add(DeviceTask& task)
{
/* split task into smaller ones */
list<DeviceTask> tasks;
task.split(tasks, TaskScheduler::num_threads());
foreach(DeviceTask& task, tasks)
task_pool.push(new CPUDeviceTask(this, task));
}
void task_wait()
{
task_pool.wait_work();
}
void task_cancel()
{
task_pool.cancel();
}
};
Device *device_cpu_create(DeviceInfo& info, Stats &stats, bool background)
{
return new CPUDevice(info, stats, background);
}
void device_cpu_info(vector<DeviceInfo>& devices)
{
DeviceInfo info;
info.type = DEVICE_CPU;
info.description = system_cpu_brand_string();
info.id = "CPU";
info.num = 0;
info.advanced_shading = true;
info.pack_images = false;
devices.insert(devices.begin(), info);
}
CCL_NAMESPACE_END