svn merge ^/trunk/blender -r42761:42776
authorCampbell Barton <ideasman42@gmail.com>
Tue, 20 Dec 2011 22:08:24 +0000 (22:08 +0000)
committerCampbell Barton <ideasman42@gmail.com>
Tue, 20 Dec 2011 22:08:24 +0000 (22:08 +0000)
41 files changed:
intern/cycles/app/cycles_test.cpp
intern/cycles/blender/addon/engine.py
intern/cycles/blender/blender_camera.cpp
intern/cycles/blender/blender_session.cpp
intern/cycles/blender/blender_sync.h
intern/cycles/device/device.cpp
intern/cycles/device/device.h
intern/cycles/device/device_cpu.cpp
intern/cycles/device/device_cuda.cpp
intern/cycles/device/device_opencl.cpp
intern/cycles/kernel/CMakeLists.txt
intern/cycles/kernel/kernel.cl
intern/cycles/kernel/kernel.cpp
intern/cycles/kernel/kernel.cu
intern/cycles/kernel/kernel.h
intern/cycles/kernel/kernel_camera.h
intern/cycles/kernel/kernel_compat_opencl.h
intern/cycles/kernel/kernel_film.h
intern/cycles/kernel/kernel_optimized.cpp
intern/cycles/kernel/kernel_path.h
intern/cycles/kernel/kernel_random.h
intern/cycles/kernel/kernel_types.h
intern/cycles/render/buffers.cpp
intern/cycles/render/buffers.h
intern/cycles/render/camera.cpp
intern/cycles/render/nodes.cpp
intern/cycles/render/session.cpp
intern/cycles/render/session.h
intern/cycles/render/tile.cpp
intern/cycles/render/tile.h
intern/cycles/util/util_math.h
source/blender/blenkernel/BKE_modifier.h
source/blender/blenkernel/intern/modifier.c
source/blender/editors/interface/interface_templates.c
source/blender/editors/object/object_modifier.c
source/blender/modifiers/intern/MOD_uvproject.c
source/blender/modifiers/intern/MOD_weightvgedit.c
source/blender/modifiers/intern/MOD_weightvgmix.c
source/blender/modifiers/intern/MOD_weightvgproximity.c
source/blender/python/mathutils/mathutils_Vector.c
source/blender/render/intern/source/external_engine.c

index 27e53ded6db18775f89b5db3ef241994ebc0afa3..838167274048a0689678d74d26608274080a7018 100644 (file)
@@ -82,10 +82,21 @@ static void session_print_status()
        session_print(status);
 }
 
+static BufferParams session_buffer_params()
+{
+       BufferParams buffer_params;
+       buffer_params.width = options.width;
+       buffer_params.height = options.height;
+       buffer_params.full_width = options.width;
+       buffer_params.full_height = options.height;
+
+       return buffer_params;
+}
+
 static void session_init()
 {
        options.session = new Session(options.session_params);
-       options.session->reset(options.width, options.height, options.session_params.samples);
+       options.session->reset(session_buffer_params(), options.session_params.samples);
        options.session->scene = options.scene;
        
        if(options.session_params.background && !options.quiet)
@@ -151,7 +162,7 @@ static void display_info(Progress& progress)
 
 static void display()
 {
-       options.session->draw(options.width, options.height);
+       options.session->draw(session_buffer_params());
 
        display_info(options.session->progress);
 }
@@ -162,13 +173,13 @@ static void resize(int width, int height)
        options.height= height;
 
        if(options.session)
-               options.session->reset(options.width, options.height, options.session_params.samples);
+               options.session->reset(session_buffer_params(), options.session_params.samples);
 }
 
 void keyboard(unsigned char key)
 {
        if(key == 'r')
-               options.session->reset(options.width, options.height, options.session_params.samples);
+               options.session->reset(session_buffer_params(), options.session_params.samples);
        else if(key == 27) // escape
                options.session->progress.set_cancel("Cancelled");
 }
index 2fedd2c0afa060675fa73e928d1dcd046e783016..60b77b23f25c89ee5921cc57773d8ad8220d68f2 100644 (file)
@@ -62,11 +62,7 @@ def render(engine):
 
 def update(engine, data, scene):
     import bcycles
-    if scene.render.use_border:
-        engine.report({'ERROR'}, "Border rendering not supported yet")
-        free(engine)
-    else:
-        bcycles.sync(engine.session)
+    bcycles.sync(engine.session)
 
 
 def draw(engine, region, v3d, rv3d):
index 442a8f62bfd44f6ed20027c183e475032fc585e0..9777de14b1e98801c8572fa7cc596277bd3ef41a 100644 (file)
@@ -287,5 +287,29 @@ void BlenderSync::sync_view(BL::SpaceView3D b_v3d, BL::RegionView3D b_rv3d, int
        blender_camera_sync(scene->camera, &bcam, width, height);
 }
 
+BufferParams BlenderSync::get_buffer_params(BL::Scene b_scene, BL::RegionView3D b_rv3d, int width, int height)
+{
+       BufferParams params;
+
+       params.full_width = width;
+       params.full_height = height;
+
+       /* border render */
+       BL::RenderSettings r = b_scene.render();
+
+       if(!b_rv3d && r.use_border()) {
+               params.full_x = r.border_min_x()*width;
+               params.full_y = r.border_min_y()*height;
+               params.width = (int)(r.border_max_x()*width) - params.full_x;
+               params.height = (int)(r.border_max_y()*height) - params.full_y;
+       }
+       else {
+               params.width = width;
+               params.height = height;
+       }
+
+       return params;
+}
+
 CCL_NAMESPACE_END
 
index 4433b1e24f9149f65e6a0cdc5a1ce6bddb74d376..1803dd36beb43a650a548578be3c75137c19edb2 100644 (file)
@@ -100,7 +100,9 @@ void BlenderSession::create_session()
        session->set_pause(BlenderSync::get_session_pause(b_scene, background));
 
        /* start rendering */
-       session->reset(width, height, session_params.samples);
+       BufferParams buffer_params = BlenderSync::get_buffer_params(b_scene, b_rv3d, width, height);
+
+       session->reset(buffer_params, session_params.samples);
        session->start();
 }
 
@@ -135,7 +137,10 @@ void BlenderSession::write_render_result()
        if(!pixels)
                return;
 
-       struct RenderResult *rrp = RE_engine_begin_result((RenderEngine*)b_engine.ptr.data, 0, 0, width, height);
+       BufferParams buffer_params = BlenderSync::get_buffer_params(b_scene, b_rv3d, width, height);
+       int w = buffer_params.width, h = buffer_params.height;
+
+       struct RenderResult *rrp = RE_engine_begin_result((RenderEngine*)b_engine.ptr.data, 0, 0, w, h);
        PointerRNA rrptr;
        RNA_pointer_create(NULL, &RNA_RenderResult, rrp, &rrptr);
        BL::RenderResult rr(rrptr);
@@ -188,8 +193,10 @@ void BlenderSession::synchronize()
        session->scene->mutex.unlock();
 
        /* reset if needed */
-       if(scene->need_reset())
-               session->reset(width, height, session_params.samples);
+       if(scene->need_reset()) {
+               BufferParams buffer_params = BlenderSync::get_buffer_params(b_scene, b_rv3d, width, height);
+               session->reset(buffer_params, session_params.samples);
+       }
 }
 
 bool BlenderSession::draw(int w, int h)
@@ -225,7 +232,9 @@ bool BlenderSession::draw(int w, int h)
                /* reset if requested */
                if(reset) {
                        SessionParams session_params = BlenderSync::get_session_params(b_scene, background);
-                       session->reset(width, height, session_params.samples);
+                       BufferParams buffer_params = BlenderSync::get_buffer_params(b_scene, b_rv3d, width, height);
+
+                       session->reset(buffer_params, session_params.samples);
                }
        }
 
@@ -233,7 +242,9 @@ bool BlenderSession::draw(int w, int h)
        update_status_progress();
 
        /* draw */
-       return !session->draw(width, height);
+       BufferParams buffer_params = BlenderSync::get_buffer_params(b_scene, b_rv3d, width, height);
+
+       return !session->draw(buffer_params);
 }
 
 void BlenderSession::get_status(string& status, string& substatus)
index 83c7f70fd596876f07af39426189e9d312e0ac9b..824904cd81d2a64cbd2a87654a4a36ffc33a8c96 100644 (file)
@@ -62,6 +62,7 @@ public:
        static SceneParams get_scene_params(BL::Scene b_scene, bool background);
        static SessionParams get_session_params(BL::Scene b_scene, bool background);
        static bool get_session_pause(BL::Scene b_scene, bool background);
+       static BufferParams get_buffer_params(BL::Scene b_scene, BL::RegionView3D b_rv3d, int width, int height);
 
 private:
        /* sync */
index f43ccffe4614afb420fbda639b99c090d530f269..6ebc359fdb3ae2be4bd33298667fdd44699884e3 100644 (file)
@@ -24,6 +24,7 @@
 
 #include "util_cuda.h"
 #include "util_debug.h"
+#include "util_foreach.h"
 #include "util_math.h"
 #include "util_opencl.h"
 #include "util_opengl.h"
@@ -41,7 +42,31 @@ DeviceTask::DeviceTask(Type type_)
 {
 }
 
-void DeviceTask::split(ThreadQueue<DeviceTask>& tasks, int num)
+void DeviceTask::split_max_size(list<DeviceTask>& tasks, int max_size)
+{
+       int num;
+
+       if(type == DISPLACE) {
+               num = (displace_w + max_size - 1)/max_size;
+       }
+       else {
+               max_size = max(1, max_size/w);
+               num = (h + max_size - 1)/max_size;
+       }
+
+       split(tasks, num);
+}
+
+void DeviceTask::split(ThreadQueue<DeviceTask>& queue, int num)
+{
+       list<DeviceTask> tasks;
+       split(tasks, num);
+
+       foreach(DeviceTask& task, tasks)
+               queue.push(task);
+}
+
+void DeviceTask::split(list<DeviceTask>& tasks, int num)
 {
        if(type == DISPLACE) {
                num = min(displace_w, num);
@@ -55,7 +80,7 @@ void DeviceTask::split(ThreadQueue<DeviceTask>& tasks, int num)
                        task.displace_x = tx;
                        task.displace_w = tw;
 
-                       tasks.push(task);
+                       tasks.push_back(task);
                }
        }
        else {
@@ -70,7 +95,7 @@ void DeviceTask::split(ThreadQueue<DeviceTask>& tasks, int num)
                        task.y = ty;
                        task.h = th;
 
-                       tasks.push(task);
+                       tasks.push_back(task);
                }
        }
 }
index 5b87b11b6b87602b4167533432ab8ab018d20107..a6a81e7b3268d10359f4e1d8e321f41873c5036c 100644 (file)
@@ -23,6 +23,7 @@
 
 #include "device_memory.h"
 
+#include "util_list.h"
 #include "util_string.h"
 #include "util_thread.h"
 #include "util_types.h"
@@ -60,13 +61,17 @@ public:
        device_ptr buffer;
        int sample;
        int resolution;
+       int offset, stride;
 
        device_ptr displace_input;
        device_ptr displace_offset;
        int displace_x, displace_w;
 
        DeviceTask(Type type = PATH_TRACE);
+
+       void split(list<DeviceTask>& tasks, int num);
        void split(ThreadQueue<DeviceTask>& tasks, int num);
+       void split_max_size(list<DeviceTask>& tasks, int max_size);
 };
 
 /* Device */
index 990b7cb94b0a64d2909c73d89b3c3c7bb3a40fbb..a45a4fb69f627edd05900e355436715d39bccfb7 100644 (file)
@@ -162,7 +162,8 @@ public:
                if(system_cpu_support_optimized()) {
                        for(int y = task.y; y < task.y + task.h; y++) {
                                for(int x = task.x; x < task.x + task.w; x++)
-                                       kernel_cpu_optimized_path_trace(kg, (float4*)task.buffer, (unsigned int*)task.rng_state, task.sample, x, y);
+                                       kernel_cpu_optimized_path_trace(kg, (float4*)task.buffer, (unsigned int*)task.rng_state,
+                                               task.sample, x, y, task.offset, task.stride);
 
                                if(tasks.worker_cancel())
                                        break;
@@ -173,7 +174,8 @@ public:
                {
                        for(int y = task.y; y < task.y + task.h; y++) {
                                for(int x = task.x; x < task.x + task.w; x++)
-                                       kernel_cpu_path_trace(kg, (float4*)task.buffer, (unsigned int*)task.rng_state, task.sample, x, y);
+                                       kernel_cpu_path_trace(kg, (float4*)task.buffer, (unsigned int*)task.rng_state,
+                                               task.sample, x, y, task.offset, task.stride);
 
                                if(tasks.worker_cancel())
                                        break;
@@ -192,14 +194,16 @@ public:
                if(system_cpu_support_optimized()) {
                        for(int y = task.y; y < task.y + task.h; y++)
                                for(int x = task.x; x < task.x + task.w; x++)
-                                       kernel_cpu_optimized_tonemap(kg, (uchar4*)task.rgba, (float4*)task.buffer, task.sample, task.resolution, x, y);
+                                       kernel_cpu_optimized_tonemap(kg, (uchar4*)task.rgba, (float4*)task.buffer,
+                                               task.sample, task.resolution, x, y, task.offset, task.stride);
                }
                else
 #endif
                {
                        for(int y = task.y; y < task.y + task.h; y++)
                                for(int x = task.x; x < task.x + task.w; x++)
-                                       kernel_cpu_tonemap(kg, (uchar4*)task.rgba, (float4*)task.buffer, task.sample, task.resolution, x, y);
+                                       kernel_cpu_tonemap(kg, (uchar4*)task.rgba, (float4*)task.buffer,
+                                               task.sample, task.resolution, x, y, task.offset, task.stride);
                }
        }
 
index 177c90ba2dff3e4d72299bc99685c61d3150f2eb..2a49d4fae4cd11685394126668b7c88fc9df56fa 100644 (file)
@@ -221,7 +221,7 @@ public:
                        cuDeviceComputeCapability(&major, &minor, cuDevId);
 
                        if(major <= 1 && minor <= 2) {
-                               cuda_error(string_printf("CUDA device supported only with shader model 1.3 or up, found %d.%d.", major, minor));
+                               cuda_error(string_printf("CUDA device supported only with compute capability 1.3 or up, found %d.%d.", major, minor));
                                return false;
                        }
                }
@@ -253,9 +253,9 @@ public:
 
 #if defined(WITH_CUDA_BINARIES) && defined(_WIN32)
                if(major <= 1 && minor <= 2)
-                       cuda_error(string_printf("CUDA device supported only with shader model 1.3 or up, found %d.%d.", major, minor));
+                       cuda_error(string_printf("CUDA device supported only compute capability 1.3 or up, found %d.%d.", major, minor));
                else
-                       cuda_error(string_printf("CUDA binary kernel for this graphics card shader model (%d.%d) not found.", major, minor));
+                       cuda_error(string_printf("CUDA binary kernel for this graphics card compute capability (%d.%d) not found.", major, minor));
                return "";
 #else
                /* if not, find CUDA compiler */
@@ -520,6 +520,12 @@ public:
                cuda_assert(cuParamSeti(cuPathTrace, offset, task.h))
                offset += sizeof(task.h);
 
+               cuda_assert(cuParamSeti(cuPathTrace, offset, task.offset))
+               offset += sizeof(task.offset);
+
+               cuda_assert(cuParamSeti(cuPathTrace, offset, task.stride))
+               offset += sizeof(task.stride);
+
                cuda_assert(cuParamSetSize(cuPathTrace, offset))
 
                /* launch kernel: todo find optimal size, cache config for fermi */
@@ -581,6 +587,12 @@ public:
                cuda_assert(cuParamSeti(cuFilmConvert, offset, task.h))
                offset += sizeof(task.h);
 
+               cuda_assert(cuParamSeti(cuFilmConvert, offset, task.offset))
+               offset += sizeof(task.offset);
+
+               cuda_assert(cuParamSeti(cuFilmConvert, offset, task.stride))
+               offset += sizeof(task.stride);
+
                cuda_assert(cuParamSetSize(cuFilmConvert, offset))
 
                /* launch kernel: todo find optimal size, cache config for fermi */
index 8eaaebc6629c30f8c6e4f44cd5fc481e572ae9c6..6014dd0fdb7bf9d98a5592490d20969cd1f913c8 100644 (file)
@@ -25,6 +25,7 @@
 #include "device.h"
 #include "device_intern.h"
 
+#include "util_foreach.h"
 #include "util_map.h"
 #include "util_math.h"
 #include "util_md5.h"
@@ -52,6 +53,7 @@ public:
        map<string, device_memory*> mem_map;
        device_ptr null_mem;
        bool device_initialized;
+       string platform_name;
 
        const char *opencl_error_string(cl_int err)
        {
@@ -175,6 +177,10 @@ public:
                if(opencl_error(ciErr))
                        return;
 
+               char name[256];
+               clGetPlatformInfo(cpPlatform, CL_PLATFORM_NAME, sizeof(name), &name, NULL);
+               platform_name = name;
+
                cxContext = clCreateContext(0, 1, &cdDevice, NULL, NULL, &ciErr);
                if(opencl_error(ciErr))
                        return;
@@ -277,14 +283,11 @@ public:
        {
                string build_options = " -cl-fast-relaxed-math ";
                
-               /* Full Shading only on NVIDIA cards at the moment */
-               char vendor[256];
-
-               clGetPlatformInfo(cpPlatform, CL_PLATFORM_NAME, sizeof(vendor), &vendor, NULL);
-               string name = vendor;
-               
-               if(name == "NVIDIA CUDA")
-                       build_options += "-D__KERNEL_SHADING__ -D__MULTI_CLOSURE__ ";
+               /* full shading only on NVIDIA cards at the moment */
+               if(platform_name == "NVIDIA CUDA")
+                       build_options += "-D__KERNEL_SHADING__ -D__MULTI_CLOSURE__ -cl-nv-maxrregcount=24 -cl-nv-verbose ";
+               if(platform_name == "Apple")
+                       build_options += " -D__CL_NO_FLOAT3__ ";
 
                return build_options;
        }
@@ -541,6 +544,8 @@ public:
                cl_int d_w = task.w;
                cl_int d_h = task.h;
                cl_int d_sample = task.sample;
+               cl_int d_offset = task.offset;
+               cl_int d_stride = task.stride;
 
                /* sample arguments */
                int narg = 0;
@@ -559,6 +564,8 @@ public:
                ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_y), (void*)&d_y);
                ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_w), (void*)&d_w);
                ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_h), (void*)&d_h);
+               ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_offset), (void*)&d_offset);
+               ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_stride), (void*)&d_stride);
 
                opencl_assert(ciErr);
 
@@ -611,6 +618,8 @@ public:
                cl_int d_h = task.h;
                cl_int d_sample = task.sample;
                cl_int d_resolution = task.resolution;
+               cl_int d_offset = task.offset;
+               cl_int d_stride = task.stride;
 
                /* sample arguments */
                int narg = 0;
@@ -630,6 +639,8 @@ public:
                ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_y), (void*)&d_y);
                ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_w), (void*)&d_w);
                ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_h), (void*)&d_h);
+               ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_offset), (void*)&d_offset);
+               ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_stride), (void*)&d_stride);
 
                opencl_assert(ciErr);
 
@@ -649,12 +660,24 @@ public:
                opencl_assert(clFinish(cqCommandQueue));
        }
 
-       void task_add(DeviceTask& task)
+       void task_add(DeviceTask& maintask)
        {
-               if(task.type == DeviceTask::TONEMAP)
-                       tonemap(task);
-               else if(task.type == DeviceTask::PATH_TRACE)
-                       path_trace(task);
+               list<DeviceTask> tasks;
+
+               /* arbitrary limit to work around apple ATI opencl issue */
+               if(platform_name == "Apple")
+                       maintask.split_max_size(tasks, 76800);
+               else
+                       tasks.push_back(maintask);
+
+               DeviceTask task;
+
+               foreach(DeviceTask& task, tasks) {
+                       if(task.type == DeviceTask::TONEMAP)
+                               tonemap(task);
+                       else if(task.type == DeviceTask::PATH_TRACE)
+                               path_trace(task);
+               }
        }
 
        void task_wait()
index e17544bf7afbcd1b2b2adc80df9aa09de0fcb573..939a74660a1c807df9a5872b57fc19f5ea16d39d 100644 (file)
@@ -143,7 +143,7 @@ endif()
 #set(KERNEL_PREPROCESSED ${CMAKE_CURRENT_BINARY_DIR}/kernel_preprocessed.cl)
 #add_custom_command(
 #      OUTPUT ${KERNEL_PREPROCESSED}
-#      COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DWITH_OPENCL -o ${KERNEL_PREPROCESSED}
+#      COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -o ${KERNEL_PREPROCESSED}
 #      DEPENDS ${SRC_KERNEL} ${SRC_UTIL_HEADERS})
 #add_custom_target(cycles_kernel_preprocess ALL DEPENDS ${KERNEL_PREPROCESSED})
 #delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel)
index c00bc3fe9579317130eaf0db0da164ba771864cf..90eb7a2513f2c34b30c48106bdb5a36498cb5dc8 100644 (file)
@@ -36,7 +36,7 @@ __kernel void kernel_ocl_path_trace(
 #include "kernel_textures.h"
 
        int sample,
-       int sx, int sy, int sw, int sh)
+       int sx, int sy, int sw, int sh, int offset, int stride)
 {
        KernelGlobals kglobals, *kg = &kglobals;
 
@@ -50,7 +50,7 @@ __kernel void kernel_ocl_path_trace(
        int y = sy + get_global_id(1);
 
        if(x < sx + sw && y < sy + sh)
-               kernel_path_trace(kg, buffer, rng_state, sample, x, y);
+               kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
 }
 
 __kernel void kernel_ocl_tonemap(
@@ -63,7 +63,7 @@ __kernel void kernel_ocl_tonemap(
 #include "kernel_textures.h"
 
        int sample, int resolution,
-       int sx, int sy, int sw, int sh)
+       int sx, int sy, int sw, int sh, int offset, int stride)
 {
        KernelGlobals kglobals, *kg = &kglobals;
 
@@ -77,7 +77,7 @@ __kernel void kernel_ocl_tonemap(
        int y = sy + get_global_id(1);
 
        if(x < sx + sw && y < sy + sh)
-               kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y);
+               kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y, offset, stride);
 }
 
 /*__kernel void kernel_ocl_displace(__global uint4 *input, __global float3 *offset, int sx)
index 52a3852aa01793fb46f51b3871917e0ccadaf21c..b4c3839dbd09824bb96327236ee4efa741d0ffef 100644 (file)
@@ -204,16 +204,16 @@ void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t
 
 /* Path Tracing */
 
-void kernel_cpu_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, int sample, int x, int y)
+void kernel_cpu_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride)
 {
-       kernel_path_trace(kg, buffer, rng_state, sample, x, y);
+       kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
 }
 
 /* Tonemapping */
 
-void kernel_cpu_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sample, int resolution, int x, int y)
+void kernel_cpu_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sample, int resolution, int x, int y, int offset, int stride)
 {
-       kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y);
+       kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y, offset, stride);
 }
 
 /* Displacement */
index 75415a00b000065111c6b61cac9e5c02ce5d5986..71fc7ac31971f9ea5c3d3bf4e1ef72efd674ac46 100644 (file)
 #include "kernel_path.h"
 #include "kernel_displace.h"
 
-extern "C" __global__ void kernel_cuda_path_trace(float4 *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh)
+extern "C" __global__ void kernel_cuda_path_trace(float4 *buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int offset, int stride)
 {
        int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
        int y = sy + blockDim.y*blockIdx.y + threadIdx.y;
 
        if(x < sx + sw && y < sy + sh)
-               kernel_path_trace(NULL, buffer, rng_state, sample, x, y);
+               kernel_path_trace(NULL, buffer, rng_state, sample, x, y, offset, stride);
 }
 
-extern "C" __global__ void kernel_cuda_tonemap(uchar4 *rgba, float4 *buffer, int sample, int resolution, int sx, int sy, int sw, int sh)
+extern "C" __global__ void kernel_cuda_tonemap(uchar4 *rgba, float4 *buffer, int sample, int resolution, int sx, int sy, int sw, int sh, int offset, int stride)
 {
        int x = sx + blockDim.x*blockIdx.x + threadIdx.x;
        int y = sy + blockDim.y*blockIdx.y + threadIdx.y;
 
        if(x < sx + sw && y < sy + sh)
-               kernel_film_tonemap(NULL, rgba, buffer, sample, resolution, x, y);
+               kernel_film_tonemap(NULL, rgba, buffer, sample, resolution, x, y, offset, stride);
 }
 
 extern "C" __global__ void kernel_cuda_displace(uint4 *input, float3 *offset, int sx)
index 700ee49c5f2f98cc33dccf3683dd00a57bf4c585..78247504b39b36b791e3edbf88417a907ca8ce11 100644 (file)
@@ -36,13 +36,17 @@ bool kernel_osl_use(KernelGlobals *kg);
 void kernel_const_copy(KernelGlobals *kg, const char *name, void *host, size_t size);
 void kernel_tex_copy(KernelGlobals *kg, const char *name, device_ptr mem, size_t width, size_t height);
 
-void kernel_cpu_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, int sample, int x, int y);
-void kernel_cpu_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sample, int resolution, int x, int y);
+void kernel_cpu_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state,
+       int sample, int x, int y, int offset, int stride);
+void kernel_cpu_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer,
+       int sample, int resolution, int x, int y, int offset, int stride);
 void kernel_cpu_displace(KernelGlobals *kg, uint4 *input, float3 *offset, int i);
 
 #ifdef WITH_OPTIMIZED_KERNEL
-void kernel_cpu_optimized_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, int sample, int x, int y);
-void kernel_cpu_optimized_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sample, int resolution, int x, int y);
+void kernel_cpu_optimized_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state,
+       int sample, int x, int y, int offset, int stride);
+void kernel_cpu_optimized_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer,
+       int sample, int resolution, int x, int y, int offset, int stride);
 void kernel_cpu_optimized_displace(KernelGlobals *kg, uint4 *input, float3 *offset, int i);
 #endif
 
index 9cdc2f1f8659f56cb6396175b531fafcca99847c..2dbdd07689190cea7262e065f6169369497a3580 100644 (file)
@@ -74,8 +74,8 @@ __device void camera_sample_perspective(KernelGlobals *kg, float raster_x, float
        ray->dP.dx = make_float3(0.0f, 0.0f, 0.0f);
        ray->dP.dy = make_float3(0.0f, 0.0f, 0.0f);
 
-       ray->dD.dx = normalize(Ddiff + kernel_data.cam.dx) - normalize(Ddiff);
-       ray->dD.dy = normalize(Ddiff + kernel_data.cam.dy) - normalize(Ddiff);
+       ray->dD.dx = normalize(Ddiff + float4_to_float3(kernel_data.cam.dx)) - normalize(Ddiff);
+       ray->dD.dy = normalize(Ddiff + float4_to_float3(kernel_data.cam.dy)) - normalize(Ddiff);
 #endif
 
 #ifdef __CAMERA_CLIPPING__
@@ -107,8 +107,8 @@ __device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, floa
 
 #ifdef __RAY_DIFFERENTIALS__
        /* ray differential */
-       ray->dP.dx = kernel_data.cam.dx;
-       ray->dP.dy = kernel_data.cam.dy;
+       ray->dP.dx = float4_to_float3(kernel_data.cam.dx);
+       ray->dP.dy = float4_to_float3(kernel_data.cam.dy);
 
        ray->dD.dx = make_float3(0.0f, 0.0f, 0.0f);
        ray->dD.dy = make_float3(0.0f, 0.0f, 0.0f);
index 5515966807b399059a7a7f9b13efb54ac7d6479f..9fbd8566ecda2ae82c8282d11c06c56e8d76937c 100644 (file)
 /* no namespaces in opencl */
 #define CCL_NAMESPACE_BEGIN
 #define CCL_NAMESPACE_END
-#define WITH_OPENCL
+
+#ifdef __CL_NO_FLOAT3__
+#define float3 float4
+#endif
+
+#ifdef __CL_NOINLINE__
+#define __noinline __attribute__((noinline))
+#else
+#define __noinline
+#endif
 
 /* in opencl all functions are device functions, so leave this empty */
 #define __device
-#define __device_inline
-#define __device_noinline
+#define __device_inline __device
+#define __device_noinline  __device __noinline
 
 /* no assert in opencl */
 #define kernel_assert(cond)
@@ -68,7 +77,11 @@ __device float kernel_tex_interp_(__global float *data, int width, float x)
 #endif
 
 #define make_float2(x, y) ((float2)(x, y))
+#ifdef __CL_NO_FLOAT3__
+#define make_float3(x, y, z) ((float4)(x, y, z, 0.0))
+#else
 #define make_float3(x, y, z) ((float3)(x, y, z))
+#endif
 #define make_float4(x, y, z, w) ((float4)(x, y, z, w))
 #define make_int2(x, y) ((int2)(x, y))
 #define make_int3(x, y, z) ((int3)(x, y, z))
index 4373701452ee080a4e85f666f526c9af8880b848..cd8acc9647abf76b5ef38b0586940b82b66f876a 100644 (file)
@@ -48,10 +48,9 @@ __device uchar4 film_float_to_byte(float4 color)
        return result;
 }
 
-__device void kernel_film_tonemap(KernelGlobals *kg, __global uchar4 *rgba, __global float4 *buffer, int sample, int resolution, int x, int y)
+__device void kernel_film_tonemap(KernelGlobals *kg, __global uchar4 *rgba, __global float4 *buffer, int sample, int resolution, int x, int y, int offset, int stride)
 {
-       int w = kernel_data.cam.width;
-       int index = x + y*w;
+       int index = offset + x + y*stride;
        float4 irradiance = buffer[index];
 
        float4 float_result = film_map(kg, irradiance, sample);
index 85a2b798a628cc4beb7d162286bce3508eea4540..ea43e01ab58a65e9893c8920028b34ecbdb37f64 100644 (file)
@@ -35,16 +35,16 @@ CCL_NAMESPACE_BEGIN
 
 /* Path Tracing */
 
-void kernel_cpu_optimized_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, int sample, int x, int y)
+void kernel_cpu_optimized_path_trace(KernelGlobals *kg, float4 *buffer, unsigned int *rng_state, int sample, int x, int y, int offset, int stride)
 {
-       kernel_path_trace(kg, buffer, rng_state, sample, x, y);
+       kernel_path_trace(kg, buffer, rng_state, sample, x, y, offset, stride);
 }
 
 /* Tonemapping */
 
-void kernel_cpu_optimized_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sample, int resolution, int x, int y)
+void kernel_cpu_optimized_tonemap(KernelGlobals *kg, uchar4 *rgba, float4 *buffer, int sample, int resolution, int x, int y, int offset, int stride)
 {
-       kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y);
+       kernel_film_tonemap(kg, rgba, buffer, sample, resolution, x, y, offset, stride);
 }
 
 /* Displacement */
index c609f6f13fe3722b13beebeb1f631243f3b83747..05707f31352e38fa778f53db6504dd522c4a1381 100644 (file)
@@ -34,7 +34,7 @@
 CCL_NAMESPACE_BEGIN
 
 #ifdef __MODIFY_TP__
-__device float3 path_terminate_modified_throughput(KernelGlobals *kg, __global float3 *buffer, int x, int y, int sample)
+__device float3 path_terminate_modified_throughput(KernelGlobals *kg, __global float3 *buffer, int x, int y, int offset, int stride, int sample)
 {
        /* modify throughput to influence path termination probability, to avoid
           darker regions receiving fewer samples than lighter regions. also RGB
@@ -45,7 +45,7 @@ __device float3 path_terminate_modified_throughput(KernelGlobals *kg, __global f
        const float minL = 0.1f;
 
        if(sample >= minsample) {
-               float3 L = buffer[x + y*kernel_data.cam.width];
+               float3 L = buffer[offset + x + y*stride];
                float3 Lmin = make_float3(minL, minL, minL);
                float correct = (float)(sample+1)/(float)sample;
 
@@ -379,7 +379,7 @@ __device float4 kernel_path_integrate(KernelGlobals *kg, RNG *rng, int sample, R
        return make_float4(L.x, L.y, L.z, 1.0f - Ltransparent);
 }
 
-__device void kernel_path_trace(KernelGlobals *kg, __global float4 *buffer, __global uint *rng_state, int sample, int x, int y)
+__device void kernel_path_trace(KernelGlobals *kg, __global float4 *buffer, __global uint *rng_state, int sample, int x, int y, int offset, int stride)
 {
        /* initialize random numbers */
        RNG rng;
@@ -387,7 +387,7 @@ __device void kernel_path_trace(KernelGlobals *kg, __global float4 *buffer, __gl
        float filter_u;
        float filter_v;
 
-       path_rng_init(kg, rng_state, sample, &rng, x, y, &filter_u, &filter_v);
+       path_rng_init(kg, rng_state, sample, &rng, x, y, offset, stride, &filter_u, &filter_v);
 
        /* sample camera ray */
        Ray ray;
@@ -399,7 +399,7 @@ __device void kernel_path_trace(KernelGlobals *kg, __global float4 *buffer, __gl
 
        /* integrate */
 #ifdef __MODIFY_TP__
-       float3 throughput = path_terminate_modified_throughput(kg, buffer, x, y, sample);
+       float3 throughput = path_terminate_modified_throughput(kg, buffer, x, y, offset, stride, sample);
        float4 L = kernel_path_integrate(kg, &rng, sample, ray, throughput)/throughput;
 #else
        float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
@@ -407,14 +407,14 @@ __device void kernel_path_trace(KernelGlobals *kg, __global float4 *buffer, __gl
 #endif
 
        /* accumulate result in output buffer */
-       int index = x + y*kernel_data.cam.width;
+       int index = offset + x + y*stride;
 
        if(sample == 0)
                buffer[index] = L;
        else
                buffer[index] += L;
 
-       path_rng_end(kg, rng_state, rng, x, y);
+       path_rng_end(kg, rng_state, rng, x, y, offset, stride);
 }
 
 CCL_NAMESPACE_END
index ba97ab3e3b65a8b50382c4b8ffaeacc6fc8b9a02..41301ebd3dcee79ad4238005cad39a5616c48537 100644 (file)
@@ -123,7 +123,7 @@ __device_inline float path_rng(KernelGlobals *kg, RNG *rng, int sample, int dime
 #endif
 }
 
-__device_inline void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, RNG *rng, int x, int y, float *fx, float *fy)
+__device_inline void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, RNG *rng, int x, int y, int offset, int stride, float *fx, float *fy)
 {
 #ifdef __SOBOL_FULL_SCREEN__
        uint px, py;
@@ -138,7 +138,7 @@ __device_inline void path_rng_init(KernelGlobals *kg, __global uint *rng_state,
        *fx = size * (float)px * (1.0f/(float)0xFFFFFFFF) - x;
        *fy = size * (float)py * (1.0f/(float)0xFFFFFFFF) - y;
 #else
-       *rng = rng_state[x + y*kernel_data.cam.width];
+       *rng = rng_state[offset + x + y*stride];
 
        *rng ^= kernel_data.integrator.seed;
 
@@ -147,7 +147,7 @@ __device_inline void path_rng_init(KernelGlobals *kg, __global uint *rng_state,
 #endif
 }
 
-__device void path_rng_end(KernelGlobals *kg, __global uint *rng_state, RNG rng, int x, int y)
+__device void path_rng_end(KernelGlobals *kg, __global uint *rng_state, RNG rng, int x, int y, int offset, int stride)
 {
        /* nothing to do */
 }
@@ -163,10 +163,10 @@ __device float path_rng(KernelGlobals *kg, RNG *rng, int sample, int dimension)
        return (float)*rng * (1.0f/(float)0xFFFFFFFF);
 }
 
-__device void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, RNG *rng, int x, int y, float *fx, float *fy)
+__device void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sample, RNG *rng, int x, int y, int offset, int stride, float *fx, float *fy)
 {
        /* load state */
-       *rng = rng_state[x + y*kernel_data.cam.width];
+       *rng = rng_state[offset + x + y*stride];
 
        *rng ^= kernel_data.integrator.seed;
 
@@ -174,10 +174,10 @@ __device void path_rng_init(KernelGlobals *kg, __global uint *rng_state, int sam
        *fy = path_rng(kg, rng, sample, PRNG_FILTER_V);
 }
 
-__device void path_rng_end(KernelGlobals *kg, __global uint *rng_state, RNG rng, int x, int y)
+__device void path_rng_end(KernelGlobals *kg, __global uint *rng_state, RNG rng, int x, int y, int offset, int stride)
 {
        /* store state for next sample */
-       rng_state[x + y*kernel_data.cam.width] = rng;
+       rng_state[offset + x + y*stride] = rng;
 }
 
 #endif
index d9bd645b16d35320f5b0841be4d29e6e3be7497f..ea73f87a8a56385e09e891f4a81e900612346b79 100644 (file)
@@ -295,29 +295,24 @@ typedef struct ShaderData {
 #endif
 } ShaderData;
 
-/* Constrant Kernel Data */
+/* Constrant Kernel Data
+ *
+ * These structs are passed from CPU to various devices, and the struct layout
+ * must match exactly. Structs are padded to ensure 16 byte alignment, and we
+ * do not use float3 because its size may not be the same on all devices. */
 
 typedef struct KernelCamera {
        /* type */
        int ortho;
-       int pad;
-
-       /* size */
-       int width, height;
+       int pad1, pad2, pad3;
 
        /* matrices */
        Transform cameratoworld;
        Transform rastertocamera;
 
        /* differentials */
-       float3 dx;
-#ifndef WITH_OPENCL
-       float pad1;
-#endif
-       float3 dy;
-#ifndef WITH_OPENCL
-       float pad2;
-#endif
+       float4 dx;
+       float4 dy;
 
        /* depth of field */
        float aperturesize;
@@ -358,10 +353,6 @@ typedef struct KernelBackground {
 typedef struct KernelSunSky {
        /* sun direction in spherical and cartesian */
        float theta, phi, pad3, pad4;
-       float3 dir;
-#ifndef WITH_OPENCL
-       float pad;
-#endif
 
        /* perez function parameters */
        float zenith_Y, zenith_x, zenith_y, pad2;
index acdddb475d0dbab492d162d9c1a07f2520a2302b..29141b25b59f5d70117089201b8eafdf797772d7 100644 (file)
@@ -36,8 +36,6 @@ CCL_NAMESPACE_BEGIN
 RenderBuffers::RenderBuffers(Device *device_)
 {
        device = device_;
-       width = 0;
-       height = 0;
 }
 
 RenderBuffers::~RenderBuffers()
@@ -58,24 +56,23 @@ void RenderBuffers::device_free()
        }
 }
 
-void RenderBuffers::reset(Device *device, int width_, int height_)
+void RenderBuffers::reset(Device *device, BufferParams& params_)
 {
-       width = width_;
-       height = height_;
+       params = params_;
 
        /* free existing buffers */
        device_free();
        
        /* allocate buffer */
-       buffer.resize(width, height);
+       buffer.resize(params.width, params.height);
        device->mem_alloc(buffer, MEM_READ_WRITE);
        device->mem_zero(buffer);
 
        /* allocate rng state */
-       rng_state.resize(width, height);
+       rng_state.resize(params.width, params.height);
 
-       uint *init_state = rng_state.resize(width, height);
-       int x, y;
+       uint *init_state = rng_state.resize(params.width, params.height);
+       int x, y, width = params.width, height = params.height;
        
        for(x=0; x<width; x++)
                for(y=0; y<height; y++)
@@ -92,11 +89,11 @@ float4 *RenderBuffers::copy_from_device(float exposure, int sample)
 
        device->mem_copy_from(buffer, 0, buffer.memory_size());
 
-       float4 *out = new float4[width*height];
+       float4 *out = new float4[params.width*params.height];
        float4 *in = (float4*)buffer.data_pointer;
        float scale = 1.0f/(float)sample;
        
-       for(int i = width*height - 1; i >= 0; i--) {
+       for(int i = params.width*params.height - 1; i >= 0; i--) {
                float4 rgba = in[i]*scale;
 
                rgba.x = rgba.x*exposure;
@@ -117,8 +114,6 @@ float4 *RenderBuffers::copy_from_device(float exposure, int sample)
 DisplayBuffer::DisplayBuffer(Device *device_)
 {
        device = device_;
-       width = 0;
-       height = 0;
        draw_width = 0;
        draw_height = 0;
        transparent = true; /* todo: determine from background */
@@ -137,28 +132,27 @@ void DisplayBuffer::device_free()
        }
 }
 
-void DisplayBuffer::reset(Device *device, int width_, int height_)
+void DisplayBuffer::reset(Device *device, BufferParams& params_)
 {
        draw_width = 0;
        draw_height = 0;
 
-       width = width_;
-       height = height_;
+       params = params_;
 
        /* free existing buffers */
        device_free();
 
        /* allocate display pixels */
-       rgba.resize(width, height);
+       rgba.resize(params.width, params.height);
        device->pixels_alloc(rgba);
 }
 
-void DisplayBuffer::draw_set(int width_, int height_)
+void DisplayBuffer::draw_set(int width, int height)
 {
-       assert(width_ <= width && height_ <= height);
+       assert(width <= params.width && height <= params.height);
 
-       draw_width = width_;
-       draw_height = height_;
+       draw_width = width;
+       draw_height = height;
 }
 
 void DisplayBuffer::draw_transparency_grid()
@@ -175,11 +169,11 @@ void DisplayBuffer::draw_transparency_grid()
        };
 
        glColor4ub(50, 50, 50, 255);
-       glRectf(0, 0, width, height);
+       glRectf(0, 0, params.width, params.height);
        glEnable(GL_POLYGON_STIPPLE);
        glColor4ub(55, 55, 55, 255);
        glPolygonStipple(checker_stipple_sml);
-       glRectf(0, 0, width, height);
+       glRectf(0, 0, params.width, params.height);
        glDisable(GL_POLYGON_STIPPLE);
 }
 
@@ -189,7 +183,7 @@ void DisplayBuffer::draw(Device *device)
                if(transparent)
                        draw_transparency_grid();
 
-               device->draw_pixels(rgba, 0, draw_width, draw_height, width, height, transparent);
+               device->draw_pixels(rgba, 0, draw_width, draw_height, params.width, params.height, transparent);
        }
 }
 
index d5eb8d7fa2feb92419d888463bc6acd488f614db..66bd03c889317f1c1f79b9035c8bf24d23d6df53 100644 (file)
@@ -30,12 +30,49 @@ CCL_NAMESPACE_BEGIN
 class Device;
 struct float4;
 
+/* Buffer Parameters
+   Size of render buffer and how it fits in the full image (border render). */
+
+class BufferParams {
+public:
+       /* width/height of the physical buffer */
+       int width;
+       int height;
+
+       /* offset into and width/height of the full buffer */
+       int full_x;
+       int full_y;
+       int full_width;
+       int full_height;
+
+       BufferParams()
+       {
+               width = 0;
+               height = 0;
+
+               full_x = 0;
+               full_y = 0;
+               full_width = 0;
+               full_height = 0;
+       }
+
+       bool modified(const BufferParams& params)
+       {
+               return !(full_x == params.full_x
+                       && full_y == params.full_y
+                       && width == params.width
+                       && height == params.height
+                       && full_width == params.full_width
+                       && full_height == params.full_height);
+       }
+};
+
 /* Render Buffers */
 
 class RenderBuffers {
 public:
-       /* buffer dimensions */
-       int width, height;
+       /* buffer parameters */
+       BufferParams params;
        /* float buffer */
        device_vector<float4> buffer;
        /* random number generator state */
@@ -46,7 +83,7 @@ public:
        RenderBuffers(Device *device);
        ~RenderBuffers();
 
-       void reset(Device *device, int width, int height);
+       void reset(Device *device, BufferParams& params);
        float4 *copy_from_device(float exposure, int sample);
 
 protected:
@@ -62,8 +99,8 @@ protected:
 
 class DisplayBuffer {
 public:
-       /* buffer dimensions */
-       int width, height;
+       /* buffer parameters */
+       BufferParams params;
        /* dimensions for how much of the buffer is actually ready for display.
           with progressive render we can be using only a subset of the buffer.
           if these are zero, it means nothing can be drawn yet */
@@ -78,7 +115,7 @@ public:
        DisplayBuffer(Device *device);
        ~DisplayBuffer();
 
-       void reset(Device *device, int width, int height);
+       void reset(Device *device, BufferParams& params);
        void write(Device *device, const string& filename);
 
        void draw_set(int width, int height);
index e88c0a388bc05e65b17d3bdcebee0313478ffb01..a83ae81844cbcb805cc734a0c289897671e13080 100644 (file)
@@ -72,8 +72,9 @@ void Camera::update()
        if(!need_update)
                return;
        
+       /* ndc to raster */
        Transform screentocamera;
-       Transform ndctoraster = transform_scale((float)width, (float)height, 1.0f);
+       Transform ndctoraster = transform_scale(width, height, 1.0f);
 
        /* raster to screen */
        Transform screentoraster = ndctoraster *
@@ -148,13 +149,9 @@ void Camera::device_update(Device *device, DeviceScene *dscene)
        /* type */
        kcam->ortho = ortho;
 
-       /* size */
-       kcam->width = width;
-       kcam->height = height;
-
        /* store differentials */
-       kcam->dx = dx;
-       kcam->dy = dy;
+       kcam->dx = float3_to_float4(dx);
+       kcam->dy = float3_to_float4(dy);
 
        /* clipping */
        kcam->nearclip = nearclip;
index 7d873221cd6ece18cda70d8052db7cd979444442..81d156a079dad1f8eb4e08cde493921f4a0264d5 100644 (file)
@@ -273,7 +273,6 @@ static void sky_texture_precompute(KernelSunSky *ksunsky, float3 dir, float turb
 
        ksunsky->theta = theta;
        ksunsky->phi = phi;
-       ksunsky->dir = dir;
 
        float theta2 = theta*theta;
        float theta3 = theta*theta*theta;
index 42b4a2bb7e480de4ec35928aebc579f94dac46f2..26c4dbfbb7a7066e2819ff1b6b9970561a0f2409 100644 (file)
@@ -51,8 +51,6 @@ Session::Session(const SessionParams& params_)
        sample = 0;
 
        delayed_reset.do_reset = false;
-       delayed_reset.w = 0;
-       delayed_reset.h = 0;
        delayed_reset.samples = 0;
 
        display_outdated = false;
@@ -108,7 +106,7 @@ bool Session::ready_to_reset()
 
 /* GPU Session */
 
-void Session::reset_gpu(int w, int h, int samples)
+void Session::reset_gpu(BufferParams& buffer_params, int samples)
 {
        /* block for buffer acces and reset immediately. we can't do this
           in the thread, because we need to allocate an OpenGL buffer, and
@@ -119,7 +117,7 @@ void Session::reset_gpu(int w, int h, int samples)
        display_outdated = true;
        reset_time = time_dt();
 
-       reset_(w, h, samples);
+       reset_(buffer_params, samples);
 
        gpu_need_tonemap = false;
        gpu_need_tonemap_cond.notify_all();
@@ -127,7 +125,7 @@ void Session::reset_gpu(int w, int h, int samples)
        pause_cond.notify_all();
 }
 
-bool Session::draw_gpu(int w, int h)
+bool Session::draw_gpu(BufferParams& buffer_params)
 {
        /* block for buffer access */
        thread_scoped_lock display_lock(display->mutex);
@@ -136,7 +134,7 @@ bool Session::draw_gpu(int w, int h)
        if(gpu_draw_ready) {
                /* then verify the buffers have the expected size, so we don't
                   draw previous results in a resized window */
-               if(w == display->width && h == display->height) {
+               if(!buffer_params.modified(display->params)) {
                        /* for CUDA we need to do tonemapping still, since we can
                           only access GL buffers from the main thread */
                        if(gpu_need_tonemap) {
@@ -261,15 +259,14 @@ void Session::run_gpu()
 
 /* CPU Session */
 
-void Session::reset_cpu(int w, int h, int samples)
+void Session::reset_cpu(BufferParams& buffer_params, int samples)
 {
        thread_scoped_lock reset_lock(delayed_reset.mutex);
 
        display_outdated = true;
        reset_time = time_dt();
 
-       delayed_reset.w = w;
-       delayed_reset.h = h;
+       delayed_reset.params = buffer_params;
        delayed_reset.samples = samples;
        delayed_reset.do_reset = true;
        device->task_cancel();
@@ -277,7 +274,7 @@ void Session::reset_cpu(int w, int h, int samples)
        pause_cond.notify_all();
 }
 
-bool Session::draw_cpu(int w, int h)
+bool Session::draw_cpu(BufferParams& buffer_params)
 {
        thread_scoped_lock display_lock(display->mutex);
 
@@ -285,7 +282,7 @@ bool Session::draw_cpu(int w, int h)
        if(display->draw_ready()) {
                /* then verify the buffers have the expected size, so we don't
                   draw previous results in a resized window */
-               if(w == display->width && h == display->height) {
+               if(!buffer_params.modified(display->params)) {
                        display->draw(device);
 
                        if(display_outdated && (time_dt() - reset_time) > params.text_timeout)
@@ -306,7 +303,7 @@ void Session::run_cpu()
                thread_scoped_lock buffers_lock(buffers->mutex);
                thread_scoped_lock display_lock(display->mutex);
 
-               reset_(delayed_reset.w, delayed_reset.h, delayed_reset.samples);
+               reset_(delayed_reset.params, delayed_reset.samples);
                delayed_reset.do_reset = false;
        }
 
@@ -389,7 +386,7 @@ void Session::run_cpu()
                        if(delayed_reset.do_reset) {
                                /* reset rendering if request from main thread */
                                delayed_reset.do_reset = false;
-                               reset_(delayed_reset.w, delayed_reset.h, delayed_reset.samples);
+                               reset_(delayed_reset.params, delayed_reset.samples);
                        }
                        else if(need_tonemap) {
                                /* tonemap only if we do not reset, we don't we don't
@@ -438,23 +435,23 @@ void Session::run()
                progress.set_update();
 }
 
-bool Session::draw(int w, int h)
+bool Session::draw(BufferParams& buffer_params)
 {
        if(device_use_gl)
-               return draw_gpu(w, h);
+               return draw_gpu(buffer_params);
        else
-               return draw_cpu(w, h);
+               return draw_cpu(buffer_params);
 }
 
-void Session::reset_(int w, int h, int samples)
+void Session::reset_(BufferParams& buffer_params, int samples)
 {
-       if(w != buffers->width || h != buffers->height) {
+       if(buffer_params.modified(buffers->params)) {
                gpu_draw_ready = false;
-               buffers->reset(device, w, h);
-               display->reset(device, w, h);
+               buffers->reset(device, buffer_params);
+               display->reset(device, buffer_params);
        }
 
-       tile_manager.reset(w, h, samples);
+       tile_manager.reset(buffer_params, samples);
 
        start_time = time_dt();
        preview_time = 0.0;
@@ -462,12 +459,12 @@ void Session::reset_(int w, int h, int samples)
        sample = 0;
 }
 
-void Session::reset(int w, int h, int samples)
+void Session::reset(BufferParams& buffer_params, int samples)
 {
        if(device_use_gl)
-               reset_gpu(w, h, samples);
+               reset_gpu(buffer_params, samples);
        else
-               reset_cpu(w, h, samples);
+               reset_cpu(buffer_params, samples);
 }
 
 void Session::set_samples(int samples)
@@ -514,14 +511,18 @@ void Session::update_scene()
 
        progress.set_status("Updating Scene");
 
-       /* update camera if dimensions changed for progressive render */
+       /* update camera if dimensions changed for progressive render. the camera
+          knows nothing about progressive or cropped rendering, it just gets the
+          image dimensions passed in */
        Camera *cam = scene->camera;
-       int w = tile_manager.state.width;
-       int h = tile_manager.state.height;
-
-       if(cam->width != w || cam->height != h) {
-               cam->width = w;
-               cam->height = h;
+       float progressive_x = tile_manager.state.width/(float)tile_manager.params.width;
+       float progressive_y = tile_manager.state.height/(float)tile_manager.params.height;
+       int width = tile_manager.params.full_width*progressive_x;
+       int height = tile_manager.params.full_height*progressive_y;
+
+       if(width != cam->width || height != cam->height) {
+               cam->width = width;
+               cam->height = height;
                cam->tag_update();
        }
 
@@ -573,14 +574,16 @@ void Session::path_trace(Tile& tile)
        /* add path trace task */
        DeviceTask task(DeviceTask::PATH_TRACE);
 
-       task.x = tile.x;
-       task.y = tile.y;
+       task.x = tile_manager.state.full_x + tile.x;
+       task.y = tile_manager.state.full_y + tile.y;
        task.w = tile.w;
        task.h = tile.h;
        task.buffer = buffers->buffer.device_pointer;
        task.rng_state = buffers->rng_state.device_pointer;
        task.sample = tile_manager.state.sample;
        task.resolution = tile_manager.state.resolution;
+       task.offset = -(tile_manager.state.full_x + tile_manager.state.full_y*tile_manager.state.width);
+       task.stride = tile_manager.state.width;
 
        device->task_add(task);
 }
@@ -590,14 +593,16 @@ void Session::tonemap()
        /* add tonemap task */
        DeviceTask task(DeviceTask::TONEMAP);
 
-       task.x = 0;
-       task.y = 0;
+       task.x = tile_manager.state.full_x;
+       task.y = tile_manager.state.full_y;
        task.w = tile_manager.state.width;
        task.h = tile_manager.state.height;
        task.rgba = display->rgba.device_pointer;
        task.buffer = buffers->buffer.device_pointer;
        task.sample = tile_manager.state.sample;
        task.resolution = tile_manager.state.resolution;
+       task.offset = -(tile_manager.state.full_x + tile_manager.state.full_y*tile_manager.state.width);
+       task.stride = tile_manager.state.width;
 
        if(task.w > 0 && task.h > 0) {
                device->task_add(task);
index ce7f420096ae9e7637a65208afed73080ef29b5e..89979b8c451068adb5cd0569b8ab6d242ddf2633 100644 (file)
@@ -19,6 +19,7 @@
 #ifndef __SESSION_H__
 #define __SESSION_H__
 
+#include "buffers.h"
 #include "device.h"
 #include "tile.h"
 
@@ -27,6 +28,7 @@
 
 CCL_NAMESPACE_BEGIN
 
+class BufferParams;
 class Device;
 class DeviceScene;
 class DisplayBuffer;
@@ -106,11 +108,11 @@ public:
        ~Session();
 
        void start();
-       bool draw(int w, int h);
+       bool draw(BufferParams& params);
        void wait();
 
        bool ready_to_reset();
-       void reset(int w, int h, int samples);
+       void reset(BufferParams& params, int samples);
        void set_samples(int samples);
        void set_pause(bool pause);
 
@@ -118,7 +120,7 @@ protected:
        struct DelayedReset {
                thread_mutex mutex;
                bool do_reset;
-               int w, h;
+               BufferParams params;
                int samples;
        } delayed_reset;
 
@@ -129,15 +131,15 @@ protected:
 
        void tonemap();
        void path_trace(Tile& tile);
-       void reset_(int w, int h, int samples);
+       void reset_(BufferParams& params, int samples);
 
        void run_cpu();
-       bool draw_cpu(int w, int h);
-       void reset_cpu(int w, int h, int samples);
+       bool draw_cpu(BufferParams& params);
+       void reset_cpu(BufferParams& params, int samples);
 
        void run_gpu();
-       bool draw_gpu(int w, int h);
-       void reset_gpu(int w, int h, int samples);
+       bool draw_gpu(BufferParams& params);
+       void reset_gpu(BufferParams& params, int samples);
 
        TileManager tile_manager;
        bool device_use_gl;
index ba437e7487414d176e6144f76333c32717b2ff82..b118a7ba4781a276fe24ba480a72bdc688209a0c 100644 (file)
@@ -28,21 +28,21 @@ TileManager::TileManager(bool progressive_, int samples_, int tile_size_, int mi
        tile_size = tile_size_;
        min_size = min_size_;
 
-       reset(0, 0, 0);
+       BufferParams buffer_params;
+       reset(buffer_params, 0);
 }
 
 TileManager::~TileManager()
 {
 }
 
-void TileManager::reset(int width_, int height_, int samples_)
+void TileManager::reset(BufferParams& params_, int samples_)
 {
-       full_width = width_;
-       full_height = height_;
+       params = params_;
 
        start_resolution = 1;
 
-       int w = width_, h = height_;
+       int w = params.width, h = params.height;
 
        if(min_size != INT_MAX) {
                while(w*h > min_size*min_size) {
@@ -55,6 +55,8 @@ void TileManager::reset(int width_, int height_, int samples_)
 
        samples = samples_;
 
+       state.full_x = 0;
+       state.full_y = 0;
        state.width = 0;
        state.height = 0;
        state.sample = -1;
@@ -70,8 +72,8 @@ void TileManager::set_samples(int samples_)
 void TileManager::set_tiles()
 {
        int resolution = state.resolution;
-       int image_w = max(1, full_width/resolution);
-       int image_h = max(1, full_height/resolution);
+       int image_w = max(1, params.width/resolution);
+       int image_h = max(1, params.height/resolution);
        int tile_w = (image_w + tile_size - 1)/tile_size;
        int tile_h = (image_h + tile_size - 1)/tile_size;
        int sub_w = image_w/tile_w;
@@ -90,6 +92,8 @@ void TileManager::set_tiles()
                }
        }
 
+       state.full_x = params.full_x/resolution;
+       state.full_y = params.full_y/resolution;
        state.width = image_w;
        state.height = image_h;
 }
index 5cd16eb8afa9bed547a34277c1ea3e196d50fc15..76863d23498935abcf6b773925b626a2a04cd9c6 100644 (file)
@@ -21,6 +21,7 @@
 
 #include <limits.h>
 
+#include "buffers.h"
 #include "util_list.h"
 
 CCL_NAMESPACE_BEGIN
@@ -39,7 +40,10 @@ public:
 
 class TileManager {
 public:
+       BufferParams params;
        struct State {
+               int full_x;
+               int full_y;
                int width;
                int height;
                int sample;
@@ -50,7 +54,7 @@ public:
        TileManager(bool progressive, int samples, int tile_size, int min_size);
        ~TileManager();
 
-       void reset(int width, int height, int samples);
+       void reset(BufferParams& params, int samples);
        void set_samples(int samples);
        bool next();
        bool done();
@@ -63,8 +67,6 @@ protected:
        int tile_size;
        int min_size;
 
-       int full_width;
-       int full_height;
        int start_resolution;
 };
 
index 7c56f0fbb124fcc10eb435766998044fe07842d7..0a1d8ff4555c48b5454b97c7b4eab719870c76b4 100644 (file)
@@ -536,6 +536,11 @@ __device_inline float3 float4_to_float3(const float4 a)
        return make_float3(a.x, a.y, a.z);
 }
 
+__device_inline float4 float3_to_float4(const float3 a)
+{
+       return make_float4(a.x, a.y, a.z, 1.0f);
+}
+
 #ifndef __KERNEL_GPU__
 
 __device_inline void print_float3(const char *label, const float3& a)
index 3063d9bc40a9d1d349ca3b902eb3df5fd6bdec33..d6c48fcb4587c332817f0f965c7b67aede5a533c 100644 (file)
@@ -66,6 +66,11 @@ typedef enum {
         * unless it's a mesh and can be exploded -> curve can also emit particles
         */
        eModifierTypeType_DeformOrConstruct,
+
+       /* Like eModifierTypeType_Nonconstructive, but does not affect the geometry
+        * of the object, rather some of its CustomData layers.
+        * E.g. UVProject and WeightVG modifiers. */
+       eModifierTypeType_NonGeometrical,
 } ModifierTypeType;
 
 typedef enum {
@@ -312,6 +317,7 @@ int           modifier_supportsMapping(struct ModifierData *md);
 int           modifier_couldBeCage(struct Scene *scene, struct ModifierData *md);
 int           modifier_isCorrectableDeformed(struct ModifierData *md);
 int                      modifier_sameTopology(ModifierData *md);
+int           modifier_nonGeometrical(ModifierData *md);
 int           modifier_isEnabled(struct Scene *scene, struct ModifierData *md, int required_mode);
 void          modifier_setError(struct ModifierData *md, const char *format, ...)
 #ifdef __GNUC__
index f09be8c34ad220bcd0af4ed2c358c01823ca28f3..5a389019519b64ed8acd39c43dfe6bb130bb3683 100644 (file)
@@ -239,7 +239,14 @@ int modifier_couldBeCage(struct Scene *scene, ModifierData *md)
 int modifier_sameTopology(ModifierData *md)
 {
        ModifierTypeInfo *mti = modifierType_getInfo(md->type);
-       return ( mti->type == eModifierTypeType_OnlyDeform || mti->type == eModifierTypeType_Nonconstructive);
+       return ELEM3(mti->type, eModifierTypeType_OnlyDeform, eModifierTypeType_Nonconstructive,
+                    eModifierTypeType_NonGeometrical);
+}
+
+int modifier_nonGeometrical(ModifierData *md)
+{
+       ModifierTypeInfo *mti = modifierType_getInfo(md->type);
+       return (mti->type == eModifierTypeType_NonGeometrical);
 }
 
 void modifier_setError(ModifierData *md, const char *format, ...)
index 8afd4e64b03b1193a9605131d8261f6ddc074212..b6c1606ec6baf62aa21d39dbcae830b14446ca8e 100644 (file)
@@ -845,7 +845,7 @@ static uiLayout *draw_modifier(uiLayout *layout, Scene *scene, Object *ob, Modif
                                uiLayoutSetOperatorContext(row, WM_OP_INVOKE_DEFAULT);
                                uiItemEnumO(row, "OBJECT_OT_modifier_apply", IFACE_("Apply"), 0, "apply_as", MODIFIER_APPLY_DATA);
                                
-                               if (modifier_sameTopology(md))
+                               if (modifier_sameTopology(md) && !modifier_nonGeometrical(md))
                                        uiItemEnumO(row, "OBJECT_OT_modifier_apply", IFACE_("Apply as Shape"), 0, "apply_as", MODIFIER_APPLY_SHAPE);
                        }
                        
@@ -853,7 +853,7 @@ static uiLayout *draw_modifier(uiLayout *layout, Scene *scene, Object *ob, Modif
                        uiBlockSetButLock(block, ob && ob->id.lib, ERROR_LIBDATA_MESSAGE);
                        
                        if (!ELEM5(md->type, eModifierType_Fluidsim, eModifierType_Softbody, eModifierType_ParticleSystem, eModifierType_Cloth, eModifierType_Smoke))
-                               uiItemO(row, TIP_("Copy"), ICON_NONE, "OBJECT_OT_modifier_copy");
+                               uiItemO(row, IFACE_("Copy"), ICON_NONE, "OBJECT_OT_modifier_copy");
                }
                
                /* result is the layout block inside the box, that we return so that modifier settings can be drawn */
index ec96bde13a047b563062b8c41fea7f1d9634cd55..a140888a602a9450c6ba62d5ed3fa26ca799cec4 100644 (file)
@@ -464,7 +464,7 @@ static int modifier_apply_shape(ReportList *reports, Scene *scene, Object *ob, M
                Key *key=me->key;
                KeyBlock *kb;
                
-               if(!modifier_sameTopology(md)) {
+               if(!modifier_sameTopology(md) || mti->type == eModifierTypeType_NonGeometrical) {
                        BKE_report(reports, RPT_ERROR, "Only deforming modifiers can be applied to Shapes");
                        return 0;
                }
@@ -512,7 +512,7 @@ static int modifier_apply_obdata(ReportList *reports, Scene *scene, Object *ob,
                Mesh *me = ob->data;
                MultiresModifierData *mmd= find_multires_modifier_before(scene, md);
 
-               if( me->key) {
+               if(me->key && mti->type != eModifierTypeType_NonGeometrical) {
                        BKE_report(reports, RPT_ERROR, "Modifier cannot be applied to Mesh with Shape Keys");
                        return 0;
                }
index bcbb0ab82e84d9b4d96d65fdd15cae133cadbd2f..d7c78492f927851326f91e6dd0e4089be5437e37 100644 (file)
@@ -408,7 +408,7 @@ ModifierTypeInfo modifierType_UVProject = {
        /* name */              "UVProject",
        /* structName */        "UVProjectModifierData",
        /* structSize */        sizeof(UVProjectModifierData),
-       /* type */              eModifierTypeType_Nonconstructive,
+       /* type */              eModifierTypeType_NonGeometrical,
        /* flags */             eModifierTypeFlag_AcceptsMesh
                                                        | eModifierTypeFlag_SupportsMapping
                                                        | eModifierTypeFlag_SupportsEditmode
index 95862f5f0022743ca4ad6d505402153f5f553d67..473692f123d40772a1b98f6dc0e86f5a9fdf45d0 100644 (file)
@@ -254,7 +254,7 @@ ModifierTypeInfo modifierType_WeightVGEdit = {
        /* name */              "VertexWeightEdit",
        /* structName */        "WeightVGEditModifierData",
        /* structSize */        sizeof(WeightVGEditModifierData),
-       /* type */              eModifierTypeType_Nonconstructive,
+       /* type */              eModifierTypeType_NonGeometrical,
        /* flags */             eModifierTypeFlag_AcceptsMesh
 /*                            |eModifierTypeFlag_SupportsMapping*/
                               |eModifierTypeFlag_SupportsEditmode,
index 34e73adb4b43efcdf5f1a8c9e9337dcc9a65054d..316080ba4a7489a21bc12f45da487b932c37c229 100644 (file)
@@ -386,7 +386,7 @@ ModifierTypeInfo modifierType_WeightVGMix = {
        /* name */              "VertexWeightMix",
        /* structName */        "WeightVGMixModifierData",
        /* structSize */        sizeof(WeightVGMixModifierData),
-       /* type */              eModifierTypeType_Nonconstructive,
+       /* type */              eModifierTypeType_NonGeometrical,
        /* flags */             eModifierTypeFlag_AcceptsMesh
 /*                            |eModifierTypeFlag_SupportsMapping*/
                               |eModifierTypeFlag_SupportsEditmode,
index 2a8b639f2cf88ad38279e5d6c9a06edf48166158..2a4b700ce1d5420cc7e6cba13ecd7acaeacba703 100644 (file)
@@ -520,7 +520,7 @@ ModifierTypeInfo modifierType_WeightVGProximity = {
        /* name */              "VertexWeightProximity",
        /* structName */        "WeightVGProximityModifierData",
        /* structSize */        sizeof(WeightVGProximityModifierData),
-       /* type */              eModifierTypeType_Nonconstructive,
+       /* type */              eModifierTypeType_NonGeometrical,
        /* flags */             eModifierTypeFlag_AcceptsMesh
 /*                            |eModifierTypeFlag_SupportsMapping*/
                               |eModifierTypeFlag_SupportsEditmode,
index 25760910c4ee0d59f5724f7b958b338ff999f1e9..bd121b6177fbd0911177d6a92d4a2beea5cc9857 100644 (file)
@@ -1486,11 +1486,11 @@ static PyObject *Vector_isub(PyObject *v1, PyObject *v2)
  * note: vector/matrix multiplication IS NOT COMMUTATIVE!!!!
  * note: assume read callbacks have been done first.
  */
-int column_vector_multiplication(float rvec[MAX_DIMENSIONS], VectorObject* vec, MatrixObject * mat)
+int column_vector_multiplication(float r_vec[MAX_DIMENSIONS], VectorObject* vec, MatrixObject * mat)
 {
        float vec_cpy[MAX_DIMENSIONS];
        double dot = 0.0f;
-       int x, y, z = 0;
+       int row, col, z = 0;
 
        if (mat->num_col != vec->size) {
                if (mat->num_col == 4 && vec->size == 3) {
@@ -1507,13 +1507,13 @@ int column_vector_multiplication(float rvec[MAX_DIMENSIONS], VectorObject* vec,
 
        memcpy(vec_cpy, vec->vec, vec->size * sizeof(float));
 
-       rvec[3] = 1.0f;
+       r_vec[3] = 1.0f;
 
-       for (x = 0; x < mat->num_row; x++) {
-               for (y = 0; y < mat->num_col; y++) {
-                       dot += (double)(MATRIX_ITEM(mat, y, x) * vec_cpy[y]);
+       for (row = 0; row < mat->num_row; row++) {
+               for (col = 0; col < mat->num_col; col++) {
+                       dot += (double)(MATRIX_ITEM(mat, row, col) * vec_cpy[col]);
                }
-               rvec[z++] = (float)dot;
+               r_vec[z++] = (float)dot;
                dot = 0.0f;
        }
 
@@ -2634,7 +2634,7 @@ static int row_vector_multiplication(float rvec[MAX_DIMENSIONS], VectorObject *v
        //muliplication
        for (x = 0; x < mat->num_col; x++) {
                for (y = 0; y < mat->num_row; y++) {
-                       dot += MATRIX_ITEM(mat, x, y) * vec_cpy[y];
+                       dot += MATRIX_ITEM(mat, y, x) * vec_cpy[y];
                }
                rvec[z++] = (float)dot;
                dot = 0.0f;
index 2b44bad82ab1418e3955b3d4c23da9a1d40cf5e1..dc2de5fb450ad674b798c4a508e0f610cbac70b3 100644 (file)
@@ -190,6 +190,11 @@ void RE_engine_end_result(RenderEngine *engine, RenderResult *result)
 
        if(!result)
                return;
+       
+       result->tilerect.xmin += re->disprect.xmin;
+       result->tilerect.xmax += re->disprect.xmin;
+       result->tilerect.ymin += re->disprect.ymin;
+       result->tilerect.ymax += re->disprect.ymin;
 
        /* merge. on break, don't merge in result for preview renders, looks nicer */
        if(!(re->test_break(re->tbh) && (re->r.scemode & R_PREVIEWBUTS)))