From ea88a0bec5317fbd84dd2f9128dbae8a50d3863e Mon Sep 17 00:00:00 2001 From: Campbell Barton Date: Tue, 20 Dec 2011 11:37:55 +0000 Subject: [PATCH 1/7] recent commit missed swapping args for MATRIX_ITEM in mathutils_Vector.c, breaking matrix*vector. --- .../blender/python/mathutils/mathutils_Vector.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/source/blender/python/mathutils/mathutils_Vector.c b/source/blender/python/mathutils/mathutils_Vector.c index 25760910c4e..bd121b6177f 100644 --- a/source/blender/python/mathutils/mathutils_Vector.c +++ b/source/blender/python/mathutils/mathutils_Vector.c @@ -1486,11 +1486,11 @@ static PyObject *Vector_isub(PyObject *v1, PyObject *v2) * note: vector/matrix multiplication IS NOT COMMUTATIVE!!!! * note: assume read callbacks have been done first. */ -int column_vector_multiplication(float rvec[MAX_DIMENSIONS], VectorObject* vec, MatrixObject * mat) +int column_vector_multiplication(float r_vec[MAX_DIMENSIONS], VectorObject* vec, MatrixObject * mat) { float vec_cpy[MAX_DIMENSIONS]; double dot = 0.0f; - int x, y, z = 0; + int row, col, z = 0; if (mat->num_col != vec->size) { if (mat->num_col == 4 && vec->size == 3) { @@ -1507,13 +1507,13 @@ int column_vector_multiplication(float rvec[MAX_DIMENSIONS], VectorObject* vec, memcpy(vec_cpy, vec->vec, vec->size * sizeof(float)); - rvec[3] = 1.0f; + r_vec[3] = 1.0f; - for (x = 0; x < mat->num_row; x++) { - for (y = 0; y < mat->num_col; y++) { - dot += (double)(MATRIX_ITEM(mat, y, x) * vec_cpy[y]); + for (row = 0; row < mat->num_row; row++) { + for (col = 0; col < mat->num_col; col++) { + dot += (double)(MATRIX_ITEM(mat, row, col) * vec_cpy[col]); } - rvec[z++] = (float)dot; + r_vec[z++] = (float)dot; dot = 0.0f; } @@ -2634,7 +2634,7 @@ static int row_vector_multiplication(float rvec[MAX_DIMENSIONS], VectorObject *v //muliplication for (x = 0; x < mat->num_col; x++) { for (y = 0; y < mat->num_row; y++) { - dot += MATRIX_ITEM(mat, x, y) * vec_cpy[y]; + dot += MATRIX_ITEM(mat, y, x) * vec_cpy[y]; } rvec[z++] = (float)dot; dot = 0.0f; From 81c635a3bb194931384d9533e02bff7fc09307b0 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Tue, 20 Dec 2011 12:25:18 +0000 Subject: [PATCH 2/7] Fix #25775: crash using border render from external render engines, merging back the tile used wrong offsets. --- source/blender/render/intern/source/external_engine.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/source/blender/render/intern/source/external_engine.c b/source/blender/render/intern/source/external_engine.c index 2b44bad82ab..dc2de5fb450 100644 --- a/source/blender/render/intern/source/external_engine.c +++ b/source/blender/render/intern/source/external_engine.c @@ -190,6 +190,11 @@ void RE_engine_end_result(RenderEngine *engine, RenderResult *result) if(!result) return; + + result->tilerect.xmin += re->disprect.xmin; + result->tilerect.xmax += re->disprect.xmin; + result->tilerect.ymin += re->disprect.ymin; + result->tilerect.ymax += re->disprect.ymin; /* merge. on break, don't merge in result for preview renders, looks nicer */ if(!(re->test_break(re->tbh) && (re->r.scemode & R_PREVIEWBUTS))) From 72d2d05770a721986986c137a5cbc36cb796062f Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Tue, 20 Dec 2011 12:25:37 +0000 Subject: [PATCH 3/7] Cycles: border rendering support, includes some refactoring in how pixels are accessed on devices. --- intern/cycles/app/cycles_test.cpp | 19 ++++-- intern/cycles/blender/addon/engine.py | 6 +- intern/cycles/blender/blender_camera.cpp | 24 ++++++++ intern/cycles/blender/blender_session.cpp | 23 +++++-- intern/cycles/blender/blender_sync.h | 1 + intern/cycles/device/device.h | 1 + intern/cycles/device/device_cpu.cpp | 12 ++-- intern/cycles/device/device_cuda.cpp | 12 ++++ intern/cycles/device/device_opencl.cpp | 10 +++- intern/cycles/kernel/kernel.cl | 8 +-- intern/cycles/kernel/kernel.cpp | 8 +-- intern/cycles/kernel/kernel.cu | 8 +-- intern/cycles/kernel/kernel.h | 12 ++-- intern/cycles/kernel/kernel_film.h | 5 +- intern/cycles/kernel/kernel_optimized.cpp | 8 +-- intern/cycles/kernel/kernel_path.h | 14 ++--- intern/cycles/kernel/kernel_random.h | 14 ++--- intern/cycles/kernel/kernel_types.h | 5 +- intern/cycles/render/buffers.cpp | 42 ++++++------- intern/cycles/render/buffers.h | 49 +++++++++++++-- intern/cycles/render/camera.cpp | 7 +-- intern/cycles/render/session.cpp | 73 ++++++++++++----------- intern/cycles/render/session.h | 18 +++--- intern/cycles/render/tile.cpp | 18 +++--- intern/cycles/render/tile.h | 8 ++- 25 files changed, 257 insertions(+), 148 deletions(-) diff --git a/intern/cycles/app/cycles_test.cpp b/intern/cycles/app/cycles_test.cpp index 27e53ded6db..83816727404 100644 --- a/intern/cycles/app/cycles_test.cpp +++ b/intern/cycles/app/cycles_test.cpp @@ -82,10 +82,21 @@ static void session_print_status() session_print(status); } +static BufferParams session_buffer_params() +{ + BufferParams buffer_params; + buffer_params.width = options.width; + buffer_params.height = options.height; + buffer_params.full_width = options.width; + buffer_params.full_height = options.height; + + return buffer_params; +} + static void session_init() { options.session = new Session(options.session_params); - options.session->reset(options.width, options.height, options.session_params.samples); + options.session->reset(session_buffer_params(), options.session_params.samples); options.session->scene = options.scene; if(options.session_params.background && !options.quiet) @@ -151,7 +162,7 @@ static void display_info(Progress& progress) static void display() { - options.session->draw(options.width, options.height); + options.session->draw(session_buffer_params()); display_info(options.session->progress); } @@ -162,13 +173,13 @@ static void resize(int width, int height) options.height= height; if(options.session) - options.session->reset(options.width, options.height, options.session_params.samples); + options.session->reset(session_buffer_params(), options.session_params.samples); } void keyboard(unsigned char key) { if(key == 'r') - options.session->reset(options.width, options.height, options.session_params.samples); + options.session->reset(session_buffer_params(), options.session_params.samples); else if(key == 27) // escape options.session->progress.set_cancel("Cancelled"); } diff --git a/intern/cycles/blender/addon/engine.py b/intern/cycles/blender/addon/engine.py index 2fedd2c0afa..60b77b23f25 100644 --- a/intern/cycles/blender/addon/engine.py +++ b/intern/cycles/blender/addon/engine.py @@ -62,11 +62,7 @@ def render(engine): def update(engine, data, scene): import bcycles - if scene.render.use_border: - engine.report({'ERROR'}, "Border rendering not supported yet") - free(engine) - else: - bcycles.sync(engine.session) + bcycles.sync(engine.session) def draw(engine, region, v3d, rv3d): diff --git a/intern/cycles/blender/blender_camera.cpp b/intern/cycles/blender/blender_camera.cpp index 442a8f62bfd..9777de14b1e 100644 --- a/intern/cycles/blender/blender_camera.cpp +++ b/intern/cycles/blender/blender_camera.cpp @@ -287,5 +287,29 @@ void BlenderSync::sync_view(BL::SpaceView3D b_v3d, BL::RegionView3D b_rv3d, int blender_camera_sync(scene->camera, &bcam, width, height); } +BufferParams BlenderSync::get_buffer_params(BL::Scene b_scene, BL::RegionView3D b_rv3d, int width, int height) +{ + BufferParams params; + + params.full_width = width; + params.full_height = height; + + /* border render */ + BL::RenderSettings r = b_scene.render(); + + if(!b_rv3d && r.use_border()) { + params.full_x = r.border_min_x()*width; + params.full_y = r.border_min_y()*height; + params.width = (int)(r.border_max_x()*width) - params.full_x; + params.height = (int)(r.border_max_y()*height) - params.full_y; + } + else { + params.width = width; + params.height = height; + } + + return params; +} + CCL_NAMESPACE_END diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp index 4433b1e24f9..1803dd36beb 100644 --- a/intern/cycles/blender/blender_session.cpp +++ b/intern/cycles/blender/blender_session.cpp @@ -100,7 +100,9 @@ void BlenderSession::create_session() session->set_pause(BlenderSync::get_session_pause(b_scene, background)); /* start rendering */ - session->reset(width, height, session_params.samples); + BufferParams buffer_params = BlenderSync::get_buffer_params(b_scene, b_rv3d, width, height); + + session->reset(buffer_params, session_params.samples); session->start(); } @@ -135,7 +137,10 @@ void BlenderSession::write_render_result() if(!pixels) return; - struct RenderResult *rrp = RE_engine_begin_result((RenderEngine*)b_engine.ptr.data, 0, 0, width, height); + BufferParams buffer_params = BlenderSync::get_buffer_params(b_scene, b_rv3d, width, height); + int w = buffer_params.width, h = buffer_params.height; + + struct RenderResult *rrp = RE_engine_begin_result((RenderEngine*)b_engine.ptr.data, 0, 0, w, h); PointerRNA rrptr; RNA_pointer_create(NULL, &RNA_RenderResult, rrp, &rrptr); BL::RenderResult rr(rrptr); @@ -188,8 +193,10 @@ void BlenderSession::synchronize() session->scene->mutex.unlock(); /* reset if needed */ - if(scene->need_reset()) - session->reset(width, height, session_params.samples); + if(scene->need_reset()) { + BufferParams buffer_params = BlenderSync::get_buffer_params(b_scene, b_rv3d, width, height); + session->reset(buffer_params, session_params.samples); + } } bool BlenderSession::draw(int w, int h) @@ -225,7 +232,9 @@ bool BlenderSession::draw(int w, int h) /* reset if requested */ if(reset) { SessionParams session_params = BlenderSync::get_session_params(b_scene, background); - session->reset(width, height, session_params.samples); + BufferParams buffer_params = BlenderSync::get_buffer_params(b_scene, b_rv3d, width, height); + + session->reset(buffer_params, session_params.samples); } } @@ -233,7 +242,9 @@ bool BlenderSession::draw(int w, int h) update_status_progress(); /* draw */ - return !session->draw(width, height); + BufferParams buffer_params = BlenderSync::get_buffer_params(b_scene, b_rv3d, width, height); + + return !session->draw(buffer_params); } void BlenderSession::get_status(string& status, string& substatus) diff --git a/intern/cycles/blender/blender_sync.h b/intern/cycles/blender/blender_sync.h index 83c7f70fd59..824904cd81d 100644 --- a/intern/cycles/blender/blender_sync.h +++ b/intern/cycles/blender/blender_sync.h @@ -62,6 +62,7 @@ public: static SceneParams get_scene_params(BL::Scene b_scene, bool background); static SessionParams get_session_params(BL::Scene b_scene, bool background); static bool get_session_pause(BL::Scene b_scene, bool background); + static BufferParams get_buffer_params(BL::Scene b_scene, BL::RegionView3D b_rv3d, int width, int height); private: /* sync */ diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index 5b87b11b6b8..be6a3f144ed 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -60,6 +60,7 @@ public: device_ptr buffer; int sample; int resolution; + int offset, stride; device_ptr displace_input; device_ptr displace_offset; diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp index 990b7cb94b0..a45a4fb69f6 100644 --- a/intern/cycles/device/device_cpu.cpp +++ b/intern/cycles/device/device_cpu.cpp @@ -162,7 +162,8 @@ public: if(system_cpu_support_optimized()) { for(int y = task.y; y < task.y + task.h; y++) { for(int x = task.x; x < task.x + task.w; x++) - kernel_cpu_optimized_path_trace(kg, (float4*)task.buffer, (unsigned int*)task.rng_state, task.sample, x, y); + kernel_cpu_optimized_path_trace(kg, (float4*)task.buffer, (unsigned int*)task.rng_state, + task.sample, x, y, task.offset, task.stride); if(tasks.worker_cancel()) break; @@ -173,7 +174,8 @@ public: { for(int y = task.y; y < task.y + task.h; y++) { for(int x = task.x; x < task.x + task.w; x++) - kernel_cpu_path_trace(kg, (float4*)task.buffer, (unsigned int*)task.rng_state, task.sample, x, y); + kernel_cpu_path_trace(kg, (float4*)task.buffer, (unsigned int*)task.rng_state, + task.sample, x, y, task.offset, task.stride); if(tasks.worker_cancel()) break; @@ -192,14 +194,16 @@ public: if(system_cpu_support_optimized()) { for(int y = task.y; y < task.y + task.h; y++) for(int x = task.x; x < task.x + task.w; x++) - kernel_cpu_optimized_tonemap(kg, (uchar4*)task.rgba, (float4*)task.buffer, task.sample, task.resolution, x, y); + kernel_cpu_optimized_tonemap(kg, (uchar4*)task.rgba, (float4*)task.buffer, + task.sample, task.resolution, x, y, task.offset, task.stride); } else #endif { for(int y = task.y; y < task.y + task.h; y++) for(int x = task.x; x < task.x + task.w; x++) - kernel_cpu_tonemap(kg, (uchar4*)task.rgba, (float4*)task.buffer, task.sample, task.resolution, x, y); + kernel_cpu_tonemap(kg, (uchar4*)task.rgba, (float4*)task.buffer, + task.sample, task.resolution, x, y, task.offset, task.stride); } } diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 177c90ba2df..dfa2fcb2322 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -520,6 +520,12 @@ public: cuda_assert(cuParamSeti(cuPathTrace, offset, task.h)) offset += sizeof(task.h); + cuda_assert(cuParamSeti(cuPathTrace, offset, task.offset)) + offset += sizeof(task.offset); + + cuda_assert(cuParamSeti(cuPathTrace, offset, task.stride)) + offset += sizeof(task.stride); + cuda_assert(cuParamSetSize(cuPathTrace, offset)) /* launch kernel: todo find optimal size, cache config for fermi */ @@ -581,6 +587,12 @@ public: cuda_assert(cuParamSeti(cuFilmConvert, offset, task.h)) offset += sizeof(task.h); + cuda_assert(cuParamSeti(cuFilmConvert, offset, task.offset)) + offset += sizeof(task.offset); + + cuda_assert(cuParamSeti(cuFilmConvert, offset, task.stride)) + offset += sizeof(task.stride); + cuda_assert(cuParamSetSize(cuFilmConvert, offset)) /* launch kernel: todo find optimal size, cache config for fermi */ diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index 8eaaebc6629..3a1d3032d6e 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -191,7 +191,7 @@ public: { char version[256]; - int major, minor, req_major = 1, req_minor = 1; + int major, minor, req_major = 1, req_minor = 0; clGetPlatformInfo(cpPlatform, CL_PLATFORM_VERSION, sizeof(version), &version, NULL); @@ -541,6 +541,8 @@ public: cl_int d_w = task.w; cl_int d_h = task.h; cl_int d_sample = task.sample; + cl_int d_offset = task.offset; + cl_int d_stride = task.stride; /* sample arguments */ int narg = 0; @@ -559,6 +561,8 @@ public: ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_y), (void*)&d_y); ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_w), (void*)&d_w); ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_h), (void*)&d_h); + ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_offset), (void*)&d_offset); + ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_stride), (void*)&d_stride); opencl_assert(ciErr); @@ -611,6 +615,8 @@ public: cl_int d_h = task.h; cl_int d_sample = task.sample; cl_int d_resolution = task.resolution; + cl_int d_offset = task.offset; + cl_int d_stride = task.stride; /* sample arguments */ int narg = 0; @@ -630,6 +636,8 @@ public: ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_y), (void*)&d_y); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_w), (void*)&d_w); ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_h), (void*)&d_h); + ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_offset), (void*)&d_offset); + ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_stride), (void*)&d_stride); opencl_assert(ciErr); diff --git a/intern/cycles/kernel/kernel.cl b/intern/cycles/kernel/kernel.cl index c00bc3fe957..90eb7a2513f 100644 --- a/intern/cycles/kernel/kernel.cl +++ b/intern/cycles/kernel/kernel.cl @@ -36,7 +36,7 @@ __kernel void kernel_ocl_path_trace( #include "kernel_textures.h" int sample, - int sx, int sy, int sw, int sh) + int sx, int sy, int sw, int sh, int offset, int stride) { KernelGlobals kglobals, *kg = &kglobals; @@ -50,7 +50,7 @@ __kernel void kernel_ocl_path_trace( int y = sy + get_global_id(1); if(x < sx + sw && y < sy + sh) - kernel_path_trace(kg, buffer, rng_state, sample, x, y); + kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); } __kernel void kernel_ocl_tonemap( @@ -63,7 +63,7 @@ __kernel void kernel_ocl_tonemap( #include "kernel_textures.h" int sample, int resolution, - int sx, int sy, int sw, int sh) + int sx, int sy, int sw, int sh, int offset, int stride) { KernelGlobals kglobals, *kg = &kglobals; @@ -77,7 +77,7 @@ __kernel void kernel_ocl_tonemap( int y = sy + get_global_id(1); if(x < sx + sw && y < sy + sh) - kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y); + kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y, offset, stride); } /*__kernel void kernel_ocl_displace(__global uint4 *input, __global float3 *offset, int sx) diff --git a/intern/cycles/kernel/kernel.cpp b/intern/cycles/kernel/kernel.cpp index 52a3852aa01..b4c3839dbd0 100644 --- a/intern/cycles/kernel/kernel.cpp +++ b/intern/cycles/kernel/kernel.cpp @@ -204,16 +204,16 @@ void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t /* Path Tracing */ -void kernel_cpu_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, int sample, int x, int y) +void kernel_cpu_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride) { - kernel_path_trace(kg, buffer, rng_state, sample, x, y); + kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); } /* Tonemapping */ -void kernel_cpu_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sample, int resolution, int x, int y) +void kernel_cpu_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sample, int resolution, int x, int y, int offset, int stride) { - kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y); + kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y, offset, stride); } /* Displacement */ diff --git a/intern/cycles/kernel/kernel.cu b/intern/cycles/kernel/kernel.cu index 75415a00b00..71fc7ac3197 100644 --- a/intern/cycles/kernel/kernel.cu +++ b/intern/cycles/kernel/kernel.cu @@ -26,22 +26,22 @@ #include "kernel_path.h" #include "kernel_displace.h" -extern "C" __global__ void kernel_cuda_path_trace(float4 *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh) +extern "C" __global__ void kernel_cuda_path_trace(float4 *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride) { int x = sx + blockDim.x*blockIdx.x + threadIdx.x; int y = sy + blockDim.y*blockIdx.y + threadIdx.y; if(x < sx + sw && y < sy + sh) - kernel_path_trace(NULL, buffer, rng_state, sample, x, y); + kernel_path_trace(NULL, buffer, rng_state, sample, x, y, offset, stride); } -extern "C" __global__ void kernel_cuda_tonemap(uchar4 *rgba, float4 *buffer, int sample, int resolution, int sx, int sy, int sw, int sh) +extern "C" __global__ void kernel_cuda_tonemap(uchar4 *rgba, float4 *buffer, int sample, int resolution, int sx, int sy, int sw, int sh, int offset, int stride) { int x = sx + blockDim.x*blockIdx.x + threadIdx.x; int y = sy + blockDim.y*blockIdx.y + threadIdx.y; if(x < sx + sw && y < sy + sh) - kernel_film_tonemap(NULL, rgba, buffer, sample, resolution, x, y); + kernel_film_tonemap(NULL, rgba, buffer, sample, resolution, x, y, offset, stride); } extern "C" __global__ void kernel_cuda_displace(uint4 *input, float3 *offset, int sx) diff --git a/intern/cycles/kernel/kernel.h b/intern/cycles/kernel/kernel.h index 700ee49c5f2..78247504b39 100644 --- a/intern/cycles/kernel/kernel.h +++ b/intern/cycles/kernel/kernel.h @@ -36,13 +36,17 @@ bool kernel_osl_use(KernelGlobals *kg); void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t size); void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t width, size_t height); -void kernel_cpu_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, int sample, int x, int y); -void kernel_cpu_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sample, int resolution, int x, int y); +void kernel_cpu_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, + int sample, int x, int y, int offset, int stride); +void kernel_cpu_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, + int sample, int resolution, int x, int y, int offset, int stride); void kernel_cpu_displace(KernelGlobals *kg, uint4 *input, float3 *offset, int i); #ifdef WITH_OPTIMIZED_KERNEL -void kernel_cpu_optimized_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, int sample, int x, int y); -void kernel_cpu_optimized_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sample, int resolution, int x, int y); +void kernel_cpu_optimized_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, + int sample, int x, int y, int offset, int stride); +void kernel_cpu_optimized_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, + int sample, int resolution, int x, int y, int offset, int stride); void kernel_cpu_optimized_displace(KernelGlobals *kg, uint4 *input, float3 *offset, int i); #endif diff --git a/intern/cycles/kernel/kernel_film.h b/intern/cycles/kernel/kernel_film.h index 4373701452e..cd8acc9647a 100644 --- a/intern/cycles/kernel/kernel_film.h +++ b/intern/cycles/kernel/kernel_film.h @@ -48,10 +48,9 @@ __device uchar4 film_float_to_byte(float4 color) return result; } -__device void kernel_film_tonemap(KernelGlobals *kg, __global uchar4 *rgba, __global float4 *buffer, int sample, int resolution, int x, int y) +__device void kernel_film_tonemap(KernelGlobals *kg, __global uchar4 *rgba, __global float4 *buffer, int sample, int resolution, int x, int y, int offset, int stride) { - int w = kernel_data.cam.width; - int index = x + y*w; + int index = offset + x + y*stride; float4 irradiance = buffer[index]; float4 float_result = film_map(kg, irradiance, sample); diff --git a/intern/cycles/kernel/kernel_optimized.cpp b/intern/cycles/kernel/kernel_optimized.cpp index 85a2b798a62..ea43e01ab58 100644 --- a/intern/cycles/kernel/kernel_optimized.cpp +++ b/intern/cycles/kernel/kernel_optimized.cpp @@ -35,16 +35,16 @@ CCL_NAMESPACE_BEGIN /* Path Tracing */ -void kernel_cpu_optimized_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, int sample, int x, int y) +void kernel_cpu_optimized_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride) { - kernel_path_trace(kg, buffer, rng_state, sample, x, y); + kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride); } /* Tonemapping */ -void kernel_cpu_optimized_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sample, int resolution, int x, int y) +void kernel_cpu_optimized_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sample, int resolution, int x, int y, int offset, int stride) { - kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y); + kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y, offset, stride); } /* Displacement */ diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index c609f6f13fe..05707f31352 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -34,7 +34,7 @@ CCL_NAMESPACE_BEGIN #ifdef __MODIFY_TP__ -__device float3 path_terminate_modified_throughput(KernelGlobals *kg, __global float3 *buffer, int x, int y, int sample) +__device float3 path_terminate_modified_throughput(KernelGlobals *kg, __global float3 *buffer, int x, int y, int offset, int stride, int sample) { /* modify throughput to influence path termination probability, to avoid darker regions receiving fewer samples than lighter regions. also RGB @@ -45,7 +45,7 @@ __device float3 path_terminate_modified_throughput(KernelGlobals *kg, __global f const float minL = 0.1f; if(sample >= minsample) { - float3 L = buffer[x + y*kernel_data.cam.width]; + float3 L = buffer[offset + x + y*stride]; float3 Lmin = make_float3(minL, minL, minL); float correct = (float)(sample+1)/(float)sample; @@ -379,7 +379,7 @@ __device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, R return make_float4(L.x, L.y, L.z, 1.0f - Ltransparent); } -__device void kernel_path_trace(KernelGlobals *kg, __global float4 *buffer, __global uint *rng_state, int sample, int x, int y) +__device void kernel_path_trace(KernelGlobals *kg, __global float4 *buffer, __global uint *rng_state, int sample, int x, int y, int offset, int stride) { /* initialize random numbers */ RNG rng; @@ -387,7 +387,7 @@ __device void kernel_path_trace(KernelGlobals *kg, __global float4 *buffer, __gl float filter_u; float filter_v; - path_rng_init(kg, rng_state, sample, &rng, x, y, &filter_u, &filter_v); + path_rng_init(kg, rng_state, sample, &rng, x, y, offset, stride, &filter_u, &filter_v); /* sample camera ray */ Ray ray; @@ -399,7 +399,7 @@ __device void kernel_path_trace(KernelGlobals *kg, __global float4 *buffer, __gl /* integrate */ #ifdef __MODIFY_TP__ - float3 throughput = path_terminate_modified_throughput(kg, buffer, x, y, sample); + float3 throughput = path_terminate_modified_throughput(kg, buffer, x, y, offset, stride, sample); float4 L = kernel_path_integrate(kg, &rng, sample, ray, throughput)/throughput; #else float3 throughput = make_float3(1.0f, 1.0f, 1.0f); @@ -407,14 +407,14 @@ __device void kernel_path_trace(KernelGlobals *kg, __global float4 *buffer, __gl #endif /* accumulate result in output buffer */ - int index = x + y*kernel_data.cam.width; + int index = offset + x + y*stride; if(sample == 0) buffer[index] = L; else buffer[index] += L; - path_rng_end(kg, rng_state, rng, x, y); + path_rng_end(kg, rng_state, rng, x, y, offset, stride); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h index ba97ab3e3b6..41301ebd3dc 100644 --- a/intern/cycles/kernel/kernel_random.h +++ b/intern/cycles/kernel/kernel_random.h @@ -123,7 +123,7 @@ __device_inline float path_rng(KernelGlobals *kg, RNG *rng, int sample, int dime #endif } -__device_inline void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, RNG *rng, int x, int y, float *fx, float *fy) +__device_inline void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, RNG *rng, int x, int y, int offset, int stride, float *fx, float *fy) { #ifdef __SOBOL_FULL_SCREEN__ uint px, py; @@ -138,7 +138,7 @@ __device_inline void path_rng_init(KernelGlobals *kg, __global uint *rng_state, *fx = size * (float)px * (1.0f/(float)0xFFFFFFFF) - x; *fy = size * (float)py * (1.0f/(float)0xFFFFFFFF) - y; #else - *rng = rng_state[x + y*kernel_data.cam.width]; + *rng = rng_state[offset + x + y*stride]; *rng ^= kernel_data.integrator.seed; @@ -147,7 +147,7 @@ __device_inline void path_rng_init(KernelGlobals *kg, __global uint *rng_state, #endif } -__device void path_rng_end(KernelGlobals *kg, __global uint *rng_state, RNG rng, int x, int y) +__device void path_rng_end(KernelGlobals *kg, __global uint *rng_state, RNG rng, int x, int y, int offset, int stride) { /* nothing to do */ } @@ -163,10 +163,10 @@ __device float path_rng(KernelGlobals *kg, RNG *rng, int sample, int dimension) return (float)*rng * (1.0f/(float)0xFFFFFFFF); } -__device void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, RNG *rng, int x, int y, float *fx, float *fy) +__device void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, RNG *rng, int x, int y, int offset, int stride, float *fx, float *fy) { /* load state */ - *rng = rng_state[x + y*kernel_data.cam.width]; + *rng = rng_state[offset + x + y*stride]; *rng ^= kernel_data.integrator.seed; @@ -174,10 +174,10 @@ __device void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sam *fy = path_rng(kg, rng, sample, PRNG_FILTER_V); } -__device void path_rng_end(KernelGlobals *kg, __global uint *rng_state, RNG rng, int x, int y) +__device void path_rng_end(KernelGlobals *kg, __global uint *rng_state, RNG rng, int x, int y, int offset, int stride) { /* store state for next sample */ - rng_state[x + y*kernel_data.cam.width] = rng; + rng_state[offset + x + y*stride] = rng; } #endif diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index d9bd645b16d..72ebfefbd90 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -300,10 +300,7 @@ typedef struct ShaderData { typedef struct KernelCamera { /* type */ int ortho; - int pad; - - /* size */ - int width, height; + int pad1, pad2, pad3; /* matrices */ Transform cameratoworld; diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp index acdddb475d0..29141b25b59 100644 --- a/intern/cycles/render/buffers.cpp +++ b/intern/cycles/render/buffers.cpp @@ -36,8 +36,6 @@ CCL_NAMESPACE_BEGIN RenderBuffers::RenderBuffers(Device *device_) { device = device_; - width = 0; - height = 0; } RenderBuffers::~RenderBuffers() @@ -58,24 +56,23 @@ void RenderBuffers::device_free() } } -void RenderBuffers::reset(Device *device, int width_, int height_) +void RenderBuffers::reset(Device *device, BufferParams& params_) { - width = width_; - height = height_; + params = params_; /* free existing buffers */ device_free(); /* allocate buffer */ - buffer.resize(width, height); + buffer.resize(params.width, params.height); device->mem_alloc(buffer, MEM_READ_WRITE); device->mem_zero(buffer); /* allocate rng state */ - rng_state.resize(width, height); + rng_state.resize(params.width, params.height); - uint *init_state = rng_state.resize(width, height); - int x, y; + uint *init_state = rng_state.resize(params.width, params.height); + int x, y, width = params.width, height = params.height; for(x=0; xmem_copy_from(buffer, 0, buffer.memory_size()); - float4 *out = new float4[width*height]; + float4 *out = new float4[params.width*params.height]; float4 *in = (float4*)buffer.data_pointer; float scale = 1.0f/(float)sample; - for(int i = width*height - 1; i >= 0; i--) { + for(int i = params.width*params.height - 1; i >= 0; i--) { float4 rgba = in[i]*scale; rgba.x = rgba.x*exposure; @@ -117,8 +114,6 @@ float4 *RenderBuffers::copy_from_device(float exposure, int sample) DisplayBuffer::DisplayBuffer(Device *device_) { device = device_; - width = 0; - height = 0; draw_width = 0; draw_height = 0; transparent = true; /* todo: determine from background */ @@ -137,28 +132,27 @@ void DisplayBuffer::device_free() } } -void DisplayBuffer::reset(Device *device, int width_, int height_) +void DisplayBuffer::reset(Device *device, BufferParams& params_) { draw_width = 0; draw_height = 0; - width = width_; - height = height_; + params = params_; /* free existing buffers */ device_free(); /* allocate display pixels */ - rgba.resize(width, height); + rgba.resize(params.width, params.height); device->pixels_alloc(rgba); } -void DisplayBuffer::draw_set(int width_, int height_) +void DisplayBuffer::draw_set(int width, int height) { - assert(width_ <= width && height_ <= height); + assert(width <= params.width && height <= params.height); - draw_width = width_; - draw_height = height_; + draw_width = width; + draw_height = height; } void DisplayBuffer::draw_transparency_grid() @@ -175,11 +169,11 @@ void DisplayBuffer::draw_transparency_grid() }; glColor4ub(50, 50, 50, 255); - glRectf(0, 0, width, height); + glRectf(0, 0, params.width, params.height); glEnable(GL_POLYGON_STIPPLE); glColor4ub(55, 55, 55, 255); glPolygonStipple(checker_stipple_sml); - glRectf(0, 0, width, height); + glRectf(0, 0, params.width, params.height); glDisable(GL_POLYGON_STIPPLE); } @@ -189,7 +183,7 @@ void DisplayBuffer::draw(Device *device) if(transparent) draw_transparency_grid(); - device->draw_pixels(rgba, 0, draw_width, draw_height, width, height, transparent); + device->draw_pixels(rgba, 0, draw_width, draw_height, params.width, params.height, transparent); } } diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h index d5eb8d7fa2f..66bd03c8893 100644 --- a/intern/cycles/render/buffers.h +++ b/intern/cycles/render/buffers.h @@ -30,12 +30,49 @@ CCL_NAMESPACE_BEGIN class Device; struct float4; +/* Buffer Parameters + Size of render buffer and how it fits in the full image (border render). */ + +class BufferParams { +public: + /* width/height of the physical buffer */ + int width; + int height; + + /* offset into and width/height of the full buffer */ + int full_x; + int full_y; + int full_width; + int full_height; + + BufferParams() + { + width = 0; + height = 0; + + full_x = 0; + full_y = 0; + full_width = 0; + full_height = 0; + } + + bool modified(const BufferParams& params) + { + return !(full_x == params.full_x + && full_y == params.full_y + && width == params.width + && height == params.height + && full_width == params.full_width + && full_height == params.full_height); + } +}; + /* Render Buffers */ class RenderBuffers { public: - /* buffer dimensions */ - int width, height; + /* buffer parameters */ + BufferParams params; /* float buffer */ device_vector buffer; /* random number generator state */ @@ -46,7 +83,7 @@ public: RenderBuffers(Device *device); ~RenderBuffers(); - void reset(Device *device, int width, int height); + void reset(Device *device, BufferParams& params); float4 *copy_from_device(float exposure, int sample); protected: @@ -62,8 +99,8 @@ protected: class DisplayBuffer { public: - /* buffer dimensions */ - int width, height; + /* buffer parameters */ + BufferParams params; /* dimensions for how much of the buffer is actually ready for display. with progressive render we can be using only a subset of the buffer. if these are zero, it means nothing can be drawn yet */ @@ -78,7 +115,7 @@ public: DisplayBuffer(Device *device); ~DisplayBuffer(); - void reset(Device *device, int width, int height); + void reset(Device *device, BufferParams& params); void write(Device *device, const string& filename); void draw_set(int width, int height); diff --git a/intern/cycles/render/camera.cpp b/intern/cycles/render/camera.cpp index e88c0a388bc..d5fca87491d 100644 --- a/intern/cycles/render/camera.cpp +++ b/intern/cycles/render/camera.cpp @@ -72,8 +72,9 @@ void Camera::update() if(!need_update) return; + /* ndc to raster */ Transform screentocamera; - Transform ndctoraster = transform_scale((float)width, (float)height, 1.0f); + Transform ndctoraster = transform_scale(width, height, 1.0f); /* raster to screen */ Transform screentoraster = ndctoraster * @@ -148,10 +149,6 @@ void Camera::device_update(Device *device, DeviceScene *dscene) /* type */ kcam->ortho = ortho; - /* size */ - kcam->width = width; - kcam->height = height; - /* store differentials */ kcam->dx = dx; kcam->dy = dy; diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index 42b4a2bb7e4..26c4dbfbb7a 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -51,8 +51,6 @@ Session::Session(const SessionParams& params_) sample = 0; delayed_reset.do_reset = false; - delayed_reset.w = 0; - delayed_reset.h = 0; delayed_reset.samples = 0; display_outdated = false; @@ -108,7 +106,7 @@ bool Session::ready_to_reset() /* GPU Session */ -void Session::reset_gpu(int w, int h, int samples) +void Session::reset_gpu(BufferParams& buffer_params, int samples) { /* block for buffer acces and reset immediately. we can't do this in the thread, because we need to allocate an OpenGL buffer, and @@ -119,7 +117,7 @@ void Session::reset_gpu(int w, int h, int samples) display_outdated = true; reset_time = time_dt(); - reset_(w, h, samples); + reset_(buffer_params, samples); gpu_need_tonemap = false; gpu_need_tonemap_cond.notify_all(); @@ -127,7 +125,7 @@ void Session::reset_gpu(int w, int h, int samples) pause_cond.notify_all(); } -bool Session::draw_gpu(int w, int h) +bool Session::draw_gpu(BufferParams& buffer_params) { /* block for buffer access */ thread_scoped_lock display_lock(display->mutex); @@ -136,7 +134,7 @@ bool Session::draw_gpu(int w, int h) if(gpu_draw_ready) { /* then verify the buffers have the expected size, so we don't draw previous results in a resized window */ - if(w == display->width && h == display->height) { + if(!buffer_params.modified(display->params)) { /* for CUDA we need to do tonemapping still, since we can only access GL buffers from the main thread */ if(gpu_need_tonemap) { @@ -261,15 +259,14 @@ void Session::run_gpu() /* CPU Session */ -void Session::reset_cpu(int w, int h, int samples) +void Session::reset_cpu(BufferParams& buffer_params, int samples) { thread_scoped_lock reset_lock(delayed_reset.mutex); display_outdated = true; reset_time = time_dt(); - delayed_reset.w = w; - delayed_reset.h = h; + delayed_reset.params = buffer_params; delayed_reset.samples = samples; delayed_reset.do_reset = true; device->task_cancel(); @@ -277,7 +274,7 @@ void Session::reset_cpu(int w, int h, int samples) pause_cond.notify_all(); } -bool Session::draw_cpu(int w, int h) +bool Session::draw_cpu(BufferParams& buffer_params) { thread_scoped_lock display_lock(display->mutex); @@ -285,7 +282,7 @@ bool Session::draw_cpu(int w, int h) if(display->draw_ready()) { /* then verify the buffers have the expected size, so we don't draw previous results in a resized window */ - if(w == display->width && h == display->height) { + if(!buffer_params.modified(display->params)) { display->draw(device); if(display_outdated && (time_dt() - reset_time) > params.text_timeout) @@ -306,7 +303,7 @@ void Session::run_cpu() thread_scoped_lock buffers_lock(buffers->mutex); thread_scoped_lock display_lock(display->mutex); - reset_(delayed_reset.w, delayed_reset.h, delayed_reset.samples); + reset_(delayed_reset.params, delayed_reset.samples); delayed_reset.do_reset = false; } @@ -389,7 +386,7 @@ void Session::run_cpu() if(delayed_reset.do_reset) { /* reset rendering if request from main thread */ delayed_reset.do_reset = false; - reset_(delayed_reset.w, delayed_reset.h, delayed_reset.samples); + reset_(delayed_reset.params, delayed_reset.samples); } else if(need_tonemap) { /* tonemap only if we do not reset, we don't we don't @@ -438,23 +435,23 @@ void Session::run() progress.set_update(); } -bool Session::draw(int w, int h) +bool Session::draw(BufferParams& buffer_params) { if(device_use_gl) - return draw_gpu(w, h); + return draw_gpu(buffer_params); else - return draw_cpu(w, h); + return draw_cpu(buffer_params); } -void Session::reset_(int w, int h, int samples) +void Session::reset_(BufferParams& buffer_params, int samples) { - if(w != buffers->width || h != buffers->height) { + if(buffer_params.modified(buffers->params)) { gpu_draw_ready = false; - buffers->reset(device, w, h); - display->reset(device, w, h); + buffers->reset(device, buffer_params); + display->reset(device, buffer_params); } - tile_manager.reset(w, h, samples); + tile_manager.reset(buffer_params, samples); start_time = time_dt(); preview_time = 0.0; @@ -462,12 +459,12 @@ void Session::reset_(int w, int h, int samples) sample = 0; } -void Session::reset(int w, int h, int samples) +void Session::reset(BufferParams& buffer_params, int samples) { if(device_use_gl) - reset_gpu(w, h, samples); + reset_gpu(buffer_params, samples); else - reset_cpu(w, h, samples); + reset_cpu(buffer_params, samples); } void Session::set_samples(int samples) @@ -514,14 +511,18 @@ void Session::update_scene() progress.set_status("Updating Scene"); - /* update camera if dimensions changed for progressive render */ + /* update camera if dimensions changed for progressive render. the camera + knows nothing about progressive or cropped rendering, it just gets the + image dimensions passed in */ Camera *cam = scene->camera; - int w = tile_manager.state.width; - int h = tile_manager.state.height; + float progressive_x = tile_manager.state.width/(float)tile_manager.params.width; + float progressive_y = tile_manager.state.height/(float)tile_manager.params.height; + int width = tile_manager.params.full_width*progressive_x; + int height = tile_manager.params.full_height*progressive_y; - if(cam->width != w || cam->height != h) { - cam->width = w; - cam->height = h; + if(width != cam->width || height != cam->height) { + cam->width = width; + cam->height = height; cam->tag_update(); } @@ -573,14 +574,16 @@ void Session::path_trace(Tile& tile) /* add path trace task */ DeviceTask task(DeviceTask::PATH_TRACE); - task.x = tile.x; - task.y = tile.y; + task.x = tile_manager.state.full_x + tile.x; + task.y = tile_manager.state.full_y + tile.y; task.w = tile.w; task.h = tile.h; task.buffer = buffers->buffer.device_pointer; task.rng_state = buffers->rng_state.device_pointer; task.sample = tile_manager.state.sample; task.resolution = tile_manager.state.resolution; + task.offset = -(tile_manager.state.full_x + tile_manager.state.full_y*tile_manager.state.width); + task.stride = tile_manager.state.width; device->task_add(task); } @@ -590,14 +593,16 @@ void Session::tonemap() /* add tonemap task */ DeviceTask task(DeviceTask::TONEMAP); - task.x = 0; - task.y = 0; + task.x = tile_manager.state.full_x; + task.y = tile_manager.state.full_y; task.w = tile_manager.state.width; task.h = tile_manager.state.height; task.rgba = display->rgba.device_pointer; task.buffer = buffers->buffer.device_pointer; task.sample = tile_manager.state.sample; task.resolution = tile_manager.state.resolution; + task.offset = -(tile_manager.state.full_x + tile_manager.state.full_y*tile_manager.state.width); + task.stride = tile_manager.state.width; if(task.w > 0 && task.h > 0) { device->task_add(task); diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h index ce7f420096a..89979b8c451 100644 --- a/intern/cycles/render/session.h +++ b/intern/cycles/render/session.h @@ -19,6 +19,7 @@ #ifndef __SESSION_H__ #define __SESSION_H__ +#include "buffers.h" #include "device.h" #include "tile.h" @@ -27,6 +28,7 @@ CCL_NAMESPACE_BEGIN +class BufferParams; class Device; class DeviceScene; class DisplayBuffer; @@ -106,11 +108,11 @@ public: ~Session(); void start(); - bool draw(int w, int h); + bool draw(BufferParams& params); void wait(); bool ready_to_reset(); - void reset(int w, int h, int samples); + void reset(BufferParams& params, int samples); void set_samples(int samples); void set_pause(bool pause); @@ -118,7 +120,7 @@ protected: struct DelayedReset { thread_mutex mutex; bool do_reset; - int w, h; + BufferParams params; int samples; } delayed_reset; @@ -129,15 +131,15 @@ protected: void tonemap(); void path_trace(Tile& tile); - void reset_(int w, int h, int samples); + void reset_(BufferParams& params, int samples); void run_cpu(); - bool draw_cpu(int w, int h); - void reset_cpu(int w, int h, int samples); + bool draw_cpu(BufferParams& params); + void reset_cpu(BufferParams& params, int samples); void run_gpu(); - bool draw_gpu(int w, int h); - void reset_gpu(int w, int h, int samples); + bool draw_gpu(BufferParams& params); + void reset_gpu(BufferParams& params, int samples); TileManager tile_manager; bool device_use_gl; diff --git a/intern/cycles/render/tile.cpp b/intern/cycles/render/tile.cpp index ba437e74874..b118a7ba478 100644 --- a/intern/cycles/render/tile.cpp +++ b/intern/cycles/render/tile.cpp @@ -28,21 +28,21 @@ TileManager::TileManager(bool progressive_, int samples_, int tile_size_, int mi tile_size = tile_size_; min_size = min_size_; - reset(0, 0, 0); + BufferParams buffer_params; + reset(buffer_params, 0); } TileManager::~TileManager() { } -void TileManager::reset(int width_, int height_, int samples_) +void TileManager::reset(BufferParams& params_, int samples_) { - full_width = width_; - full_height = height_; + params = params_; start_resolution = 1; - int w = width_, h = height_; + int w = params.width, h = params.height; if(min_size != INT_MAX) { while(w*h > min_size*min_size) { @@ -55,6 +55,8 @@ void TileManager::reset(int width_, int height_, int samples_) samples = samples_; + state.full_x = 0; + state.full_y = 0; state.width = 0; state.height = 0; state.sample = -1; @@ -70,8 +72,8 @@ void TileManager::set_samples(int samples_) void TileManager::set_tiles() { int resolution = state.resolution; - int image_w = max(1, full_width/resolution); - int image_h = max(1, full_height/resolution); + int image_w = max(1, params.width/resolution); + int image_h = max(1, params.height/resolution); int tile_w = (image_w + tile_size - 1)/tile_size; int tile_h = (image_h + tile_size - 1)/tile_size; int sub_w = image_w/tile_w; @@ -90,6 +92,8 @@ void TileManager::set_tiles() } } + state.full_x = params.full_x/resolution; + state.full_y = params.full_y/resolution; state.width = image_w; state.height = image_h; } diff --git a/intern/cycles/render/tile.h b/intern/cycles/render/tile.h index 5cd16eb8afa..76863d23498 100644 --- a/intern/cycles/render/tile.h +++ b/intern/cycles/render/tile.h @@ -21,6 +21,7 @@ #include +#include "buffers.h" #include "util_list.h" CCL_NAMESPACE_BEGIN @@ -39,7 +40,10 @@ public: class TileManager { public: + BufferParams params; struct State { + int full_x; + int full_y; int width; int height; int sample; @@ -50,7 +54,7 @@ public: TileManager(bool progressive, int samples, int tile_size, int min_size); ~TileManager(); - void reset(int width, int height, int samples); + void reset(BufferParams& params, int samples); void set_samples(int samples); bool next(); bool done(); @@ -63,8 +67,6 @@ protected: int tile_size; int min_size; - int full_width; - int full_height; int start_resolution; }; From 40259cfe7bf8ab3fa844d87b61096562c9ea2e42 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Tue, 20 Dec 2011 12:25:45 +0000 Subject: [PATCH 4/7] Cycles: avoid using float3 in kernel constant memory, just so we're sure alignment is working compatible between cpu and gpu. --- intern/cycles/kernel/kernel_camera.h | 8 ++++---- intern/cycles/kernel/kernel_types.h | 20 +++++++------------- intern/cycles/render/camera.cpp | 4 ++-- intern/cycles/render/nodes.cpp | 1 - intern/cycles/util/util_math.h | 5 +++++ 5 files changed, 18 insertions(+), 20 deletions(-) diff --git a/intern/cycles/kernel/kernel_camera.h b/intern/cycles/kernel/kernel_camera.h index 9cdc2f1f865..2dbdd076891 100644 --- a/intern/cycles/kernel/kernel_camera.h +++ b/intern/cycles/kernel/kernel_camera.h @@ -74,8 +74,8 @@ __device void camera_sample_perspective(KernelGlobals *kg, float raster_x, float ray->dP.dx = make_float3(0.0f, 0.0f, 0.0f); ray->dP.dy = make_float3(0.0f, 0.0f, 0.0f); - ray->dD.dx = normalize(Ddiff + kernel_data.cam.dx) - normalize(Ddiff); - ray->dD.dy = normalize(Ddiff + kernel_data.cam.dy) - normalize(Ddiff); + ray->dD.dx = normalize(Ddiff + float4_to_float3(kernel_data.cam.dx)) - normalize(Ddiff); + ray->dD.dy = normalize(Ddiff + float4_to_float3(kernel_data.cam.dy)) - normalize(Ddiff); #endif #ifdef __CAMERA_CLIPPING__ @@ -107,8 +107,8 @@ __device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, floa #ifdef __RAY_DIFFERENTIALS__ /* ray differential */ - ray->dP.dx = kernel_data.cam.dx; - ray->dP.dy = kernel_data.cam.dy; + ray->dP.dx = float4_to_float3(kernel_data.cam.dx); + ray->dP.dy = float4_to_float3(kernel_data.cam.dy); ray->dD.dx = make_float3(0.0f, 0.0f, 0.0f); ray->dD.dy = make_float3(0.0f, 0.0f, 0.0f); diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 72ebfefbd90..ea73f87a8a5 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -295,7 +295,11 @@ typedef struct ShaderData { #endif } ShaderData; -/* Constrant Kernel Data */ +/* Constrant Kernel Data + * + * These structs are passed from CPU to various devices, and the struct layout + * must match exactly. Structs are padded to ensure 16 byte alignment, and we + * do not use float3 because its size may not be the same on all devices. */ typedef struct KernelCamera { /* type */ @@ -307,14 +311,8 @@ typedef struct KernelCamera { Transform rastertocamera; /* differentials */ - float3 dx; -#ifndef WITH_OPENCL - float pad1; -#endif - float3 dy; -#ifndef WITH_OPENCL - float pad2; -#endif + float4 dx; + float4 dy; /* depth of field */ float aperturesize; @@ -355,10 +353,6 @@ typedef struct KernelBackground { typedef struct KernelSunSky { /* sun direction in spherical and cartesian */ float theta, phi, pad3, pad4; - float3 dir; -#ifndef WITH_OPENCL - float pad; -#endif /* perez function parameters */ float zenith_Y, zenith_x, zenith_y, pad2; diff --git a/intern/cycles/render/camera.cpp b/intern/cycles/render/camera.cpp index d5fca87491d..a83ae81844c 100644 --- a/intern/cycles/render/camera.cpp +++ b/intern/cycles/render/camera.cpp @@ -150,8 +150,8 @@ void Camera::device_update(Device *device, DeviceScene *dscene) kcam->ortho = ortho; /* store differentials */ - kcam->dx = dx; - kcam->dy = dy; + kcam->dx = float3_to_float4(dx); + kcam->dy = float3_to_float4(dy); /* clipping */ kcam->nearclip = nearclip; diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp index 7d873221cd6..81d156a079d 100644 --- a/intern/cycles/render/nodes.cpp +++ b/intern/cycles/render/nodes.cpp @@ -273,7 +273,6 @@ static void sky_texture_precompute(KernelSunSky *ksunsky, float3 dir, float turb ksunsky->theta = theta; ksunsky->phi = phi; - ksunsky->dir = dir; float theta2 = theta*theta; float theta3 = theta*theta*theta; diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index 7c56f0fbb12..0a1d8ff4555 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -536,6 +536,11 @@ __device_inline float3 float4_to_float3(const float4 a) return make_float3(a.x, a.y, a.z); } +__device_inline float4 float3_to_float4(const float3 a) +{ + return make_float4(a.x, a.y, a.z, 1.0f); +} + #ifndef __KERNEL_GPU__ __device_inline void print_float3(const char *label, const float3& a) From 738fdc7b6f43c3e1e838bd4239b36340fa4c2e0f Mon Sep 17 00:00:00 2001 From: Bastien Montagne Date: Tue, 20 Dec 2011 14:15:59 +0000 Subject: [PATCH 5/7] New modifier type: eModifierTypeType_NonGeometrical, for modifiers affecting CustomData layers only (e.g. UVProject and WeightVG ones). Also, allow applying to obdata those modifiers, even with shapekeys, but do not allow applying them *as* shapekey (as shapekeys do not have CD layers). Fix [#29636] Vertex Weight Mix modifier "apply" button don't work. Note: applying whit shape keys currently always uses base shape, not current one (for apply to obdata as well as apply to shapekey), but this is another topic... --- source/blender/blenkernel/BKE_modifier.h | 6 ++++++ source/blender/blenkernel/intern/modifier.c | 9 ++++++++- source/blender/editors/interface/interface_templates.c | 4 ++-- source/blender/editors/object/object_modifier.c | 4 ++-- source/blender/modifiers/intern/MOD_uvproject.c | 2 +- source/blender/modifiers/intern/MOD_weightvgedit.c | 2 +- source/blender/modifiers/intern/MOD_weightvgmix.c | 2 +- source/blender/modifiers/intern/MOD_weightvgproximity.c | 2 +- 8 files changed, 22 insertions(+), 9 deletions(-) diff --git a/source/blender/blenkernel/BKE_modifier.h b/source/blender/blenkernel/BKE_modifier.h index 84f8995b480..23073a2d8eb 100644 --- a/source/blender/blenkernel/BKE_modifier.h +++ b/source/blender/blenkernel/BKE_modifier.h @@ -65,6 +65,11 @@ typedef enum { * unless it's a mesh and can be exploded -> curve can also emit particles */ eModifierTypeType_DeformOrConstruct, + + /* Like eModifierTypeType_Nonconstructive, but does not affect the geometry + * of the object, rather some of its CustomData layers. + * E.g. UVProject and WeightVG modifiers. */ + eModifierTypeType_NonGeometrical, } ModifierTypeType; typedef enum { @@ -311,6 +316,7 @@ int modifier_supportsMapping(struct ModifierData *md); int modifier_couldBeCage(struct Scene *scene, struct ModifierData *md); int modifier_isCorrectableDeformed(struct ModifierData *md); int modifier_sameTopology(ModifierData *md); +int modifier_nonGeometrical(ModifierData *md); int modifier_isEnabled(struct Scene *scene, struct ModifierData *md, int required_mode); void modifier_setError(struct ModifierData *md, const char *format, ...) #ifdef __GNUC__ diff --git a/source/blender/blenkernel/intern/modifier.c b/source/blender/blenkernel/intern/modifier.c index f09be8c34ad..5a389019519 100644 --- a/source/blender/blenkernel/intern/modifier.c +++ b/source/blender/blenkernel/intern/modifier.c @@ -239,7 +239,14 @@ int modifier_couldBeCage(struct Scene *scene, ModifierData *md) int modifier_sameTopology(ModifierData *md) { ModifierTypeInfo *mti = modifierType_getInfo(md->type); - return ( mti->type == eModifierTypeType_OnlyDeform || mti->type == eModifierTypeType_Nonconstructive); + return ELEM3(mti->type, eModifierTypeType_OnlyDeform, eModifierTypeType_Nonconstructive, + eModifierTypeType_NonGeometrical); +} + +int modifier_nonGeometrical(ModifierData *md) +{ + ModifierTypeInfo *mti = modifierType_getInfo(md->type); + return (mti->type == eModifierTypeType_NonGeometrical); } void modifier_setError(ModifierData *md, const char *format, ...) diff --git a/source/blender/editors/interface/interface_templates.c b/source/blender/editors/interface/interface_templates.c index 7f01d4f031d..49e3ff10f06 100644 --- a/source/blender/editors/interface/interface_templates.c +++ b/source/blender/editors/interface/interface_templates.c @@ -845,7 +845,7 @@ static uiLayout *draw_modifier(uiLayout *layout, Scene *scene, Object *ob, Modif uiLayoutSetOperatorContext(row, WM_OP_INVOKE_DEFAULT); uiItemEnumO(row, "OBJECT_OT_modifier_apply", IFACE_("Apply"), 0, "apply_as", MODIFIER_APPLY_DATA); - if (modifier_sameTopology(md)) + if (modifier_sameTopology(md) && !modifier_nonGeometrical(md)) uiItemEnumO(row, "OBJECT_OT_modifier_apply", IFACE_("Apply as Shape"), 0, "apply_as", MODIFIER_APPLY_SHAPE); } @@ -853,7 +853,7 @@ static uiLayout *draw_modifier(uiLayout *layout, Scene *scene, Object *ob, Modif uiBlockSetButLock(block, ob && ob->id.lib, ERROR_LIBDATA_MESSAGE); if (!ELEM5(md->type, eModifierType_Fluidsim, eModifierType_Softbody, eModifierType_ParticleSystem, eModifierType_Cloth, eModifierType_Smoke)) - uiItemO(row, TIP_("Copy"), ICON_NONE, "OBJECT_OT_modifier_copy"); + uiItemO(row, IFACE_("Copy"), ICON_NONE, "OBJECT_OT_modifier_copy"); } /* result is the layout block inside the box, that we return so that modifier settings can be drawn */ diff --git a/source/blender/editors/object/object_modifier.c b/source/blender/editors/object/object_modifier.c index 240fb8017f2..913e5893a77 100644 --- a/source/blender/editors/object/object_modifier.c +++ b/source/blender/editors/object/object_modifier.c @@ -452,7 +452,7 @@ static int modifier_apply_shape(ReportList *reports, Scene *scene, Object *ob, M Key *key=me->key; KeyBlock *kb; - if(!modifier_sameTopology(md)) { + if(!modifier_sameTopology(md) || mti->type == eModifierTypeType_NonGeometrical) { BKE_report(reports, RPT_ERROR, "Only deforming modifiers can be applied to Shapes"); return 0; } @@ -500,7 +500,7 @@ static int modifier_apply_obdata(ReportList *reports, Scene *scene, Object *ob, Mesh *me = ob->data; MultiresModifierData *mmd= find_multires_modifier_before(scene, md); - if( me->key) { + if(me->key && mti->type != eModifierTypeType_NonGeometrical) { BKE_report(reports, RPT_ERROR, "Modifier cannot be applied to Mesh with Shape Keys"); return 0; } diff --git a/source/blender/modifiers/intern/MOD_uvproject.c b/source/blender/modifiers/intern/MOD_uvproject.c index 71292c6e521..38c2073b6a5 100644 --- a/source/blender/modifiers/intern/MOD_uvproject.c +++ b/source/blender/modifiers/intern/MOD_uvproject.c @@ -407,7 +407,7 @@ ModifierTypeInfo modifierType_UVProject = { /* name */ "UVProject", /* structName */ "UVProjectModifierData", /* structSize */ sizeof(UVProjectModifierData), - /* type */ eModifierTypeType_Nonconstructive, + /* type */ eModifierTypeType_NonGeometrical, /* flags */ eModifierTypeFlag_AcceptsMesh | eModifierTypeFlag_SupportsMapping | eModifierTypeFlag_SupportsEditmode diff --git a/source/blender/modifiers/intern/MOD_weightvgedit.c b/source/blender/modifiers/intern/MOD_weightvgedit.c index 9721ee042bf..bdd7ab7486b 100644 --- a/source/blender/modifiers/intern/MOD_weightvgedit.c +++ b/source/blender/modifiers/intern/MOD_weightvgedit.c @@ -254,7 +254,7 @@ ModifierTypeInfo modifierType_WeightVGEdit = { /* name */ "VertexWeightEdit", /* structName */ "WeightVGEditModifierData", /* structSize */ sizeof(WeightVGEditModifierData), - /* type */ eModifierTypeType_Nonconstructive, + /* type */ eModifierTypeType_NonGeometrical, /* flags */ eModifierTypeFlag_AcceptsMesh /* |eModifierTypeFlag_SupportsMapping*/ |eModifierTypeFlag_SupportsEditmode, diff --git a/source/blender/modifiers/intern/MOD_weightvgmix.c b/source/blender/modifiers/intern/MOD_weightvgmix.c index fd7e47cc4bf..17316d891da 100644 --- a/source/blender/modifiers/intern/MOD_weightvgmix.c +++ b/source/blender/modifiers/intern/MOD_weightvgmix.c @@ -386,7 +386,7 @@ ModifierTypeInfo modifierType_WeightVGMix = { /* name */ "VertexWeightMix", /* structName */ "WeightVGMixModifierData", /* structSize */ sizeof(WeightVGMixModifierData), - /* type */ eModifierTypeType_Nonconstructive, + /* type */ eModifierTypeType_NonGeometrical, /* flags */ eModifierTypeFlag_AcceptsMesh /* |eModifierTypeFlag_SupportsMapping*/ |eModifierTypeFlag_SupportsEditmode, diff --git a/source/blender/modifiers/intern/MOD_weightvgproximity.c b/source/blender/modifiers/intern/MOD_weightvgproximity.c index 9b5678a3f3b..f67fd907d02 100644 --- a/source/blender/modifiers/intern/MOD_weightvgproximity.c +++ b/source/blender/modifiers/intern/MOD_weightvgproximity.c @@ -520,7 +520,7 @@ ModifierTypeInfo modifierType_WeightVGProximity = { /* name */ "VertexWeightProximity", /* structName */ "WeightVGProximityModifierData", /* structSize */ sizeof(WeightVGProximityModifierData), - /* type */ eModifierTypeType_Nonconstructive, + /* type */ eModifierTypeType_NonGeometrical, /* flags */ eModifierTypeFlag_AcceptsMesh /* |eModifierTypeFlag_SupportsMapping*/ |eModifierTypeFlag_SupportsEditmode, From 690de795803c345dc4916148f016b661c2e634e7 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Tue, 20 Dec 2011 17:36:56 +0000 Subject: [PATCH 6/7] Cycles: some tweaks for apple opencl with ATI cards, to get it working up to the level of ambient occlusion render, shaders still fail. Fixes found with much help from Jens and Dalai. --- intern/cycles/device/device.cpp | 31 +++++++++++++-- intern/cycles/device/device.h | 4 ++ intern/cycles/device/device_opencl.cpp | 43 ++++++++++++++------- intern/cycles/kernel/CMakeLists.txt | 2 +- intern/cycles/kernel/kernel_compat_opencl.h | 19 +++++++-- 5 files changed, 78 insertions(+), 21 deletions(-) diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp index f43ccffe461..6ebc359fdb3 100644 --- a/intern/cycles/device/device.cpp +++ b/intern/cycles/device/device.cpp @@ -24,6 +24,7 @@ #include "util_cuda.h" #include "util_debug.h" +#include "util_foreach.h" #include "util_math.h" #include "util_opencl.h" #include "util_opengl.h" @@ -41,7 +42,31 @@ DeviceTask::DeviceTask(Type type_) { } -void DeviceTask::split(ThreadQueue& tasks, int num) +void DeviceTask::split_max_size(list& tasks, int max_size) +{ + int num; + + if(type == DISPLACE) { + num = (displace_w + max_size - 1)/max_size; + } + else { + max_size = max(1, max_size/w); + num = (h + max_size - 1)/max_size; + } + + split(tasks, num); +} + +void DeviceTask::split(ThreadQueue& queue, int num) +{ + list tasks; + split(tasks, num); + + foreach(DeviceTask& task, tasks) + queue.push(task); +} + +void DeviceTask::split(list& tasks, int num) { if(type == DISPLACE) { num = min(displace_w, num); @@ -55,7 +80,7 @@ void DeviceTask::split(ThreadQueue& tasks, int num) task.displace_x = tx; task.displace_w = tw; - tasks.push(task); + tasks.push_back(task); } } else { @@ -70,7 +95,7 @@ void DeviceTask::split(ThreadQueue& tasks, int num) task.y = ty; task.h = th; - tasks.push(task); + tasks.push_back(task); } } } diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index be6a3f144ed..a6a81e7b326 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -23,6 +23,7 @@ #include "device_memory.h" +#include "util_list.h" #include "util_string.h" #include "util_thread.h" #include "util_types.h" @@ -67,7 +68,10 @@ public: int displace_x, displace_w; DeviceTask(Type type = PATH_TRACE); + + void split(list& tasks, int num); void split(ThreadQueue& tasks, int num); + void split_max_size(list& tasks, int max_size); }; /* Device */ diff --git a/intern/cycles/device/device_opencl.cpp b/intern/cycles/device/device_opencl.cpp index 3a1d3032d6e..6014dd0fdb7 100644 --- a/intern/cycles/device/device_opencl.cpp +++ b/intern/cycles/device/device_opencl.cpp @@ -25,6 +25,7 @@ #include "device.h" #include "device_intern.h" +#include "util_foreach.h" #include "util_map.h" #include "util_math.h" #include "util_md5.h" @@ -52,6 +53,7 @@ public: map mem_map; device_ptr null_mem; bool device_initialized; + string platform_name; const char *opencl_error_string(cl_int err) { @@ -175,6 +177,10 @@ public: if(opencl_error(ciErr)) return; + char name[256]; + clGetPlatformInfo(cpPlatform, CL_PLATFORM_NAME, sizeof(name), &name, NULL); + platform_name = name; + cxContext = clCreateContext(0, 1, &cdDevice, NULL, NULL, &ciErr); if(opencl_error(ciErr)) return; @@ -191,7 +197,7 @@ public: { char version[256]; - int major, minor, req_major = 1, req_minor = 0; + int major, minor, req_major = 1, req_minor = 1; clGetPlatformInfo(cpPlatform, CL_PLATFORM_VERSION, sizeof(version), &version, NULL); @@ -277,14 +283,11 @@ public: { string build_options = " -cl-fast-relaxed-math "; - /* Full Shading only on NVIDIA cards at the moment */ - char vendor[256]; - - clGetPlatformInfo(cpPlatform, CL_PLATFORM_NAME, sizeof(vendor), &vendor, NULL); - string name = vendor; - - if(name == "NVIDIA CUDA") - build_options += "-D__KERNEL_SHADING__ -D__MULTI_CLOSURE__ "; + /* full shading only on NVIDIA cards at the moment */ + if(platform_name == "NVIDIA CUDA") + build_options += "-D__KERNEL_SHADING__ -D__MULTI_CLOSURE__ -cl-nv-maxrregcount=24 -cl-nv-verbose "; + if(platform_name == "Apple") + build_options += " -D__CL_NO_FLOAT3__ "; return build_options; } @@ -657,12 +660,24 @@ public: opencl_assert(clFinish(cqCommandQueue)); } - void task_add(DeviceTask& task) + void task_add(DeviceTask& maintask) { - if(task.type == DeviceTask::TONEMAP) - tonemap(task); - else if(task.type == DeviceTask::PATH_TRACE) - path_trace(task); + list tasks; + + /* arbitrary limit to work around apple ATI opencl issue */ + if(platform_name == "Apple") + maintask.split_max_size(tasks, 76800); + else + tasks.push_back(maintask); + + DeviceTask task; + + foreach(DeviceTask& task, tasks) { + if(task.type == DeviceTask::TONEMAP) + tonemap(task); + else if(task.type == DeviceTask::PATH_TRACE) + path_trace(task); + } } void task_wait() diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index e17544bf7af..939a74660a1 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -143,7 +143,7 @@ endif() #set(KERNEL_PREPROCESSED ${CMAKE_CURRENT_BINARY_DIR}/kernel_preprocessed.cl) #add_custom_command( # OUTPUT ${KERNEL_PREPROCESSED} -# COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DWITH_OPENCL -o ${KERNEL_PREPROCESSED} +# COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -o ${KERNEL_PREPROCESSED} # DEPENDS ${SRC_KERNEL} ${SRC_UTIL_HEADERS}) #add_custom_target(cycles_kernel_preprocess ALL DEPENDS ${KERNEL_PREPROCESSED}) #delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel) diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h index 5515966807b..9fbd8566ecd 100644 --- a/intern/cycles/kernel/kernel_compat_opencl.h +++ b/intern/cycles/kernel/kernel_compat_opencl.h @@ -25,12 +25,21 @@ /* no namespaces in opencl */ #define CCL_NAMESPACE_BEGIN #define CCL_NAMESPACE_END -#define WITH_OPENCL + +#ifdef __CL_NO_FLOAT3__ +#define float3 float4 +#endif + +#ifdef __CL_NOINLINE__ +#define __noinline __attribute__((noinline)) +#else +#define __noinline +#endif /* in opencl all functions are device functions, so leave this empty */ #define __device -#define __device_inline -#define __device_noinline +#define __device_inline __device +#define __device_noinline __device __noinline /* no assert in opencl */ #define kernel_assert(cond) @@ -68,7 +77,11 @@ __device float kernel_tex_interp_(__global float *data, int width, float x) #endif #define make_float2(x, y) ((float2)(x, y)) +#ifdef __CL_NO_FLOAT3__ +#define make_float3(x, y, z) ((float4)(x, y, z, 0.0)) +#else #define make_float3(x, y, z) ((float3)(x, y, z)) +#endif #define make_float4(x, y, z, w) ((float4)(x, y, z, w)) #define make_int2(x, y) ((int2)(x, y)) #define make_int3(x, y, z) ((int3)(x, y, z)) From 2d1de2e78d619ffd999876768a9466d083984a9f Mon Sep 17 00:00:00 2001 From: Thomas Dinges Date: Tue, 20 Dec 2011 18:59:10 +0000 Subject: [PATCH 7/7] Cycles/CUDA: * Rename shader model to compute capability in error messages. --- intern/cycles/device/device_cuda.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index dfa2fcb2322..2a49d4fae4c 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -221,7 +221,7 @@ public: cuDeviceComputeCapability(&major, &minor, cuDevId); if(major <= 1 && minor <= 2) { - cuda_error(string_printf("CUDA device supported only with shader model 1.3 or up, found %d.%d.", major, minor)); + cuda_error(string_printf("CUDA device supported only with compute capability 1.3 or up, found %d.%d.", major, minor)); return false; } } @@ -253,9 +253,9 @@ public: #if defined(WITH_CUDA_BINARIES) && defined(_WIN32) if(major <= 1 && minor <= 2) - cuda_error(string_printf("CUDA device supported only with shader model 1.3 or up, found %d.%d.", major, minor)); + cuda_error(string_printf("CUDA device supported only compute capability 1.3 or up, found %d.%d.", major, minor)); else - cuda_error(string_printf("CUDA binary kernel for this graphics card shader model (%d.%d) not found.", major, minor)); + cuda_error(string_printf("CUDA binary kernel for this graphics card compute capability (%d.%d) not found.", major, minor)); return ""; #else /* if not, find CUDA compiler */