Cycles: SSS and Volume rendering in split kernel
authorHristo Gueorguiev <prem.nirved@gmail.com>
Wed, 8 Mar 2017 14:42:26 +0000 (15:42 +0100)
committerSergey Sharybin <sergey.vfx@gmail.com>
Thu, 9 Mar 2017 16:09:37 +0000 (17:09 +0100)
Decoupled ray marching is not supported yet.

Transparent shadows are always enabled for volume rendering.

Changes in kernel/bvh and kernel/geom are from Sergey.
This simiplifies code significantly, and prepares it for
record-all transparent shadow function in split kernel.

46 files changed:
intern/cycles/device/device.h
intern/cycles/device/device_split_kernel.cpp
intern/cycles/device/device_split_kernel.h
intern/cycles/kernel/CMakeLists.txt
intern/cycles/kernel/bvh/bvh_shadow_all.h
intern/cycles/kernel/bvh/bvh_subsurface.h
intern/cycles/kernel/bvh/bvh_traversal.h
intern/cycles/kernel/bvh/bvh_volume.h
intern/cycles/kernel/bvh/bvh_volume_all.h
intern/cycles/kernel/bvh/qbvh_shadow_all.h
intern/cycles/kernel/bvh/qbvh_subsurface.h
intern/cycles/kernel/bvh/qbvh_traversal.h
intern/cycles/kernel/bvh/qbvh_volume.h
intern/cycles/kernel/bvh/qbvh_volume_all.h
intern/cycles/kernel/geom/geom_object.h
intern/cycles/kernel/kernel_path.h
intern/cycles/kernel/kernel_path_subsurface.h [new file with mode: 0644]
intern/cycles/kernel/kernel_path_surface.h
intern/cycles/kernel/kernel_path_volume.h
intern/cycles/kernel/kernel_shader.h
intern/cycles/kernel/kernel_shadow.h
intern/cycles/kernel/kernel_subsurface.h
intern/cycles/kernel/kernel_types.h
intern/cycles/kernel/kernel_volume.h
intern/cycles/kernel/kernels/cpu/kernel_cpu.h
intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl [moved from intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl with 83% similarity]
intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl [new file with mode: 0644]
intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl [new file with mode: 0644]
intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl [new file with mode: 0644]
intern/cycles/kernel/kernels/opencl/kernel_split.cl [new file with mode: 0644]
intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl [new file with mode: 0644]
intern/cycles/kernel/split/kernel_buffer_update.h [moved from intern/cycles/kernel/split/kernel_background_buffer_update.h with 92% similarity]
intern/cycles/kernel/split/kernel_do_volume.h [new file with mode: 0644]
intern/cycles/kernel/split/kernel_indirect_background.h [new file with mode: 0644]
intern/cycles/kernel/split/kernel_indirect_subsurface.h [new file with mode: 0644]
intern/cycles/kernel/split/kernel_lamp_emission.h
intern/cycles/kernel/split/kernel_path_init.h
intern/cycles/kernel/split/kernel_queue_enqueue.h
intern/cycles/kernel/split/kernel_scene_intersect.h
intern/cycles/kernel/split/kernel_shader_eval.h
intern/cycles/kernel/split/kernel_shadow_blocked.h
intern/cycles/kernel/split/kernel_split_common.h
intern/cycles/kernel/split/kernel_split_data.h
intern/cycles/kernel/split/kernel_split_data_types.h
intern/cycles/kernel/split/kernel_subsurface_scatter.h [new file with mode: 0644]

index c740cada98b72d496b2f7bd3490c7862e6886e94..18e8e27417227241f7988fe6b94d4787cea0ea18 100644 (file)
@@ -194,7 +194,7 @@ public:
                if(!use_patch_evaluation) {
                        build_options += " -D__NO_PATCH_EVAL__";
                }
-               if(!use_transparent) {
+               if(!use_transparent && !use_volume) {
                        build_options += " -D__NO_TRANSPARENT__";
                }
                return build_options;
index b9705077fbf1f646976eee031c00090d243cc96c..6ab0b3c577796279eecddcbf7a49639a88e1d95b 100644 (file)
@@ -35,13 +35,17 @@ DeviceSplitKernel::DeviceSplitKernel(Device *device) : device(device)
        kernel_path_init = NULL;
        kernel_scene_intersect = NULL;
        kernel_lamp_emission = NULL;
+       kernel_do_volume = NULL;
        kernel_queue_enqueue = NULL;
-       kernel_background_buffer_update = NULL;
+       kernel_indirect_background = NULL;
        kernel_shader_eval = NULL;
        kernel_holdout_emission_blurring_pathtermination_ao = NULL;
+       kernel_subsurface_scatter = NULL;
        kernel_direct_lighting = NULL;
        kernel_shadow_blocked = NULL;
        kernel_next_iteration_setup = NULL;
+       kernel_indirect_subsurface = NULL;
+       kernel_buffer_update = NULL;
 }
 
 DeviceSplitKernel::~DeviceSplitKernel()
@@ -55,13 +59,17 @@ DeviceSplitKernel::~DeviceSplitKernel()
        delete kernel_path_init;
        delete kernel_scene_intersect;
        delete kernel_lamp_emission;
+       delete kernel_do_volume;
        delete kernel_queue_enqueue;
-       delete kernel_background_buffer_update;
+       delete kernel_indirect_background;
        delete kernel_shader_eval;
        delete kernel_holdout_emission_blurring_pathtermination_ao;
+       delete kernel_subsurface_scatter;
        delete kernel_direct_lighting;
        delete kernel_shadow_blocked;
        delete kernel_next_iteration_setup;
+       delete kernel_indirect_subsurface;
+       delete kernel_buffer_update;
 }
 
 bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_features)
@@ -75,13 +83,17 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_fe
        LOAD_KERNEL(path_init);
        LOAD_KERNEL(scene_intersect);
        LOAD_KERNEL(lamp_emission);
+       LOAD_KERNEL(do_volume);
        LOAD_KERNEL(queue_enqueue);
-       LOAD_KERNEL(background_buffer_update);
+       LOAD_KERNEL(indirect_background);
        LOAD_KERNEL(shader_eval);
        LOAD_KERNEL(holdout_emission_blurring_pathtermination_ao);
+       LOAD_KERNEL(subsurface_scatter);
        LOAD_KERNEL(direct_lighting);
        LOAD_KERNEL(shadow_blocked);
        LOAD_KERNEL(next_iteration_setup);
+       LOAD_KERNEL(indirect_subsurface);
+       LOAD_KERNEL(buffer_update);
 
 #undef LOAD_KERNEL
 
@@ -220,13 +232,18 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
                        for(int PathIter = 0; PathIter < 16; PathIter++) {
                                ENQUEUE_SPLIT_KERNEL(scene_intersect, global_size, local_size);
                                ENQUEUE_SPLIT_KERNEL(lamp_emission, global_size, local_size);
+                               ENQUEUE_SPLIT_KERNEL(do_volume, global_size, local_size);
                                ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
-                               ENQUEUE_SPLIT_KERNEL(background_buffer_update, global_size, local_size);
+                               ENQUEUE_SPLIT_KERNEL(indirect_background, global_size, local_size);
                                ENQUEUE_SPLIT_KERNEL(shader_eval, global_size, local_size);
                                ENQUEUE_SPLIT_KERNEL(holdout_emission_blurring_pathtermination_ao, global_size, local_size);
+                               ENQUEUE_SPLIT_KERNEL(subsurface_scatter, global_size, local_size);
                                ENQUEUE_SPLIT_KERNEL(direct_lighting, global_size, local_size);
                                ENQUEUE_SPLIT_KERNEL(shadow_blocked, global_size_shadow_blocked, local_size);
                                ENQUEUE_SPLIT_KERNEL(next_iteration_setup, global_size, local_size);
+                               ENQUEUE_SPLIT_KERNEL(indirect_subsurface, global_size, local_size);
+                               ENQUEUE_SPLIT_KERNEL(queue_enqueue, global_size, local_size);
+                               ENQUEUE_SPLIT_KERNEL(buffer_update, global_size, local_size);
 
                                if(task->get_cancel()) {
                                        return true;
index cc3e1aa26ae216ac2c7081da323e484829f7008e..abaf350cbbbfbab58c28d0eaab5aeb5b812a0678 100644 (file)
@@ -58,13 +58,17 @@ private:
        SplitKernelFunction *kernel_path_init;
        SplitKernelFunction *kernel_scene_intersect;
        SplitKernelFunction *kernel_lamp_emission;
+       SplitKernelFunction *kernel_do_volume;
        SplitKernelFunction *kernel_queue_enqueue;
-       SplitKernelFunction *kernel_background_buffer_update;
+       SplitKernelFunction *kernel_indirect_background;
        SplitKernelFunction *kernel_shader_eval;
        SplitKernelFunction *kernel_holdout_emission_blurring_pathtermination_ao;
+       SplitKernelFunction *kernel_subsurface_scatter;
        SplitKernelFunction *kernel_direct_lighting;
        SplitKernelFunction *kernel_shadow_blocked;
        SplitKernelFunction *kernel_next_iteration_setup;
+       SplitKernelFunction *kernel_indirect_subsurface;
+       SplitKernelFunction *kernel_buffer_update;
 
        /* Global memory variables [porting]; These memory is used for
         * co-operation between different kernels; Data written by one
index 1c740b5c6ebb4c11f81b8cc52e29b2e2d7aea27a..30b3a6b52f7f4343d85139683c6eaf68c410f337 100644 (file)
@@ -21,12 +21,16 @@ set(SRC
        kernels/opencl/kernel_queue_enqueue.cl
        kernels/opencl/kernel_scene_intersect.cl
        kernels/opencl/kernel_lamp_emission.cl
-       kernels/opencl/kernel_background_buffer_update.cl
+       kernels/opencl/kernel_do_volume.cl
+       kernels/opencl/kernel_indirect_background.cl
        kernels/opencl/kernel_shader_eval.cl
        kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl
+       kernels/opencl/kernel_subsurface_scatter.cl
        kernels/opencl/kernel_direct_lighting.cl
        kernels/opencl/kernel_shadow_blocked.cl
        kernels/opencl/kernel_next_iteration_setup.cl
+       kernels/opencl/kernel_indirect_subsurface.cl
+       kernels/opencl/kernel_buffer_update.cl
        kernels/cuda/kernel.cu
        kernels/cuda/kernel_split.cu
 )
@@ -71,6 +75,7 @@ set(SRC_HEADERS
        kernel_path_common.h
        kernel_path_state.h
        kernel_path_surface.h
+       kernel_path_subsurface.h
        kernel_path_volume.h
        kernel_projection.h
        kernel_queues.h
@@ -196,10 +201,13 @@ set(SRC_UTIL_HEADERS
 )
 
 set(SRC_SPLIT_HEADERS
-       split/kernel_background_buffer_update.h
+       split/kernel_buffer_update.h
        split/kernel_data_init.h
        split/kernel_direct_lighting.h
+       split/kernel_do_volume.h
        split/kernel_holdout_emission_blurring_pathtermination_ao.h
+       split/kernel_indirect_background.h
+       split/kernel_indirect_subsurface.h
        split/kernel_lamp_emission.h
        split/kernel_next_iteration_setup.h
        split/kernel_path_init.h
@@ -210,6 +218,7 @@ set(SRC_SPLIT_HEADERS
        split/kernel_split_common.h
        split/kernel_split_data.h
        split/kernel_split_data_types.h
+       split/kernel_subsurface_scatter.h
 )
 
 # CUDA module
@@ -407,12 +416,16 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_path_init.cl"
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_queue_enqueue.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_scene_intersect.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_lamp_emission.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
-delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_background_buffer_update.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_do_volume.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_indirect_background.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shader_eval.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_holdout_emission_blurring_pathtermination_ao.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_subsurface_scatter.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_direct_lighting.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_shadow_blocked.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_next_iteration_setup.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_indirect_subsurface.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_buffer_update.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel.cu" ${CYCLES_INSTALL_PATH}/kernel/kernels/cuda)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/cuda/kernel_split.cu" ${CYCLES_INSTALL_PATH}/kernel/kernels/cuda)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel)
index df33a86bb18d6400b32bca498fe223a930a22ccd..b4f65bc8efd34f9bbd70d600ce02effd63223ebb 100644 (file)
@@ -309,9 +309,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
                                        object = kernel_tex_fetch(__prim_object, -prim_addr-1);
 
 #  if BVH_FEATURE(BVH_MOTION)
-                                       bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
+                                       isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
 #  else
-                                       bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
+                                       isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
 #  endif
 
                                        triangle_intersect_precalc(dir, &isect_precalc);
@@ -362,12 +362,10 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
                                }
                        }
                        else {
-                               float ignore_t = FLT_MAX;
-
 #  if BVH_FEATURE(BVH_MOTION)
-                               bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
+                               bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
 #  else
-                               bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
+                               bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
 #  endif
                                triangle_intersect_precalc(dir, &isect_precalc);
                        }
index 889bbca21e2f47f0e3a59208addd831348a709df..583f7f7c469030edadc0d2c619b3daafbac5171e 100644 (file)
@@ -75,16 +75,16 @@ void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
        if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
 #if BVH_FEATURE(BVH_MOTION)
                Transform ob_itfm;
-               bvh_instance_motion_push(kg,
-                                        subsurface_object,
-                                        ray,
-                                        &P,
-                                        &dir,
-                                        &idir,
-                                        &isect_t,
-                                        &ob_itfm);
+               isect_t = bvh_instance_motion_push(kg,
+                                                  subsurface_object,
+                                                  ray,
+                                                  &P,
+                                                  &dir,
+                                                  &idir,
+                                                  isect_t,
+                                                  &ob_itfm);
 #else
-               bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, &isect_t);
+               isect_t = bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, isect_t);
 #endif
                object = subsurface_object;
        }
index 80c8f31473a6629c838c42f5a9da4131eaba3393..0eca0c8e38b51a374b3718b94a45aee5fae03b06 100644 (file)
@@ -354,9 +354,9 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
                                        object = kernel_tex_fetch(__prim_object, -prim_addr-1);
 
 #  if BVH_FEATURE(BVH_MOTION)
-                                       bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
+                                       isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
 #  else
-                                       bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
+                                       isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
 #  endif
                                        triangle_intersect_precalc(dir, &isect_precalc);
 
@@ -391,9 +391,9 @@ ccl_device_noinline bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 
                        /* instance pop */
 #  if BVH_FEATURE(BVH_MOTION)
-                       bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
+                       isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
 #  else
-                       bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
+                       isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
 #  endif
                        triangle_intersect_precalc(dir, &isect_precalc);
 
index 57e5b8d736d6b1bb2982359adf04459baa2a903f..136034aa4844cbd218ff1a3902e9429048bc5aac 100644 (file)
@@ -238,9 +238,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
                                        int object_flag = kernel_tex_fetch(__object_flag, object);
                                        if(object_flag & SD_OBJECT_HAS_VOLUME) {
 #  if BVH_FEATURE(BVH_MOTION)
-                                               bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
+                                               isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
 #  else
-                                               bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
+                                               isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
 #  endif
 
                                                triangle_intersect_precalc(dir, &isect_precalc);
@@ -281,9 +281,9 @@ bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
 
                        /* instance pop */
 #  if BVH_FEATURE(BVH_MOTION)
-                       bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
+                       isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
 #  else
-                       bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
+                       isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
 #  endif
 
                        triangle_intersect_precalc(dir, &isect_precalc);
index 5a1accebaa04a59b893906b0c63ddf9b04dda85c..6f3346e76347369372f95559aad04b614c408b89 100644 (file)
@@ -288,11 +288,10 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
                                        object = kernel_tex_fetch(__prim_object, -prim_addr-1);
                                        int object_flag = kernel_tex_fetch(__object_flag, object);
                                        if(object_flag & SD_OBJECT_HAS_VOLUME) {
-
 #  if BVH_FEATURE(BVH_MOTION)
-                                               bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
+                                               isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
 #  else
-                                               bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
+                                               isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
 #  endif
 
                                                triangle_intersect_precalc(dir, &isect_precalc);
@@ -348,11 +347,10 @@ uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
                                }
                        }
                        else {
-                               float ignore_t = FLT_MAX;
 #  if BVH_FEATURE(BVH_MOTION)
-                               bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
+                               bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
 #  else
-                               bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
+                               bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
 #  endif
                                triangle_intersect_precalc(dir, &isect_precalc);
                        }
index 607295f9ed5b9de95ef03308b946ace7c19dec24..2a4da3eea8287bf97156a4230ed55ae24e6087e9 100644 (file)
@@ -390,9 +390,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
                                        object = kernel_tex_fetch(__prim_object, -prim_addr-1);
 
 #  if BVH_FEATURE(BVH_MOTION)
-                                       bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
+                                       isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
 #  else
-                                       bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
+                                       isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
 #  endif
 
                                        num_hits_in_instance = 0;
@@ -445,11 +445,10 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
                                }
                        }
                        else {
-                               float ignore_t = FLT_MAX;
 #  if BVH_FEATURE(BVH_MOTION)
-                               bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
+                               bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
 #  else
-                               bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
+                               bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
 #  endif
                        }
 
index 84dc4003133a19f2d4220343515ce6bf9161ca50..a6431a94e6e0ea5ad6fd5d9a0414a243d50300e9 100644 (file)
@@ -64,16 +64,16 @@ ccl_device void BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
        if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
 #if BVH_FEATURE(BVH_MOTION)
                Transform ob_itfm;
-               bvh_instance_motion_push(kg,
-                                        subsurface_object,
-                                        ray,
-                                        &P,
-                                        &dir,
-                                        &idir,
-                                        &isect_t,
-                                        &ob_itfm);
+               isect_t = bvh_instance_motion_push(kg,
+                                                  subsurface_object,
+                                                  ray,
+                                                  &P,
+                                                  &dir,
+                                                  &idir,
+                                                  isect_t,
+                                                  &ob_itfm);
 #else
-               bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, &isect_t);
+               isect_t = bvh_instance_push(kg, subsurface_object, ray, &P, &dir, &idir, isect_t);
 #endif
                object = subsurface_object;
        }
index 10ae7bee8524701adbc7f0b3ee096145550a8c3b..c20a8f3703fdd9fbea0258b20de1b3dc50f56f8d 100644 (file)
@@ -468,9 +468,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 
                        /* Instance pop. */
 #  if BVH_FEATURE(BVH_MOTION)
-                       bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
+                       isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
 #  else
-                       bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
+                       isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
 #  endif
 
                        qbvh_near_far_idx_calc(idir,
index dc6627e2dbb35a71e553553db26ea9db417a4cf7..859c5da808b61dcab08789afb97f24c92cd0021a 100644 (file)
@@ -295,9 +295,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
                                        int object_flag = kernel_tex_fetch(__object_flag, object);
                                        if(object_flag & SD_OBJECT_HAS_VOLUME) {
 #  if BVH_FEATURE(BVH_MOTION)
-                                               bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
+                                               isect->t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
 #  else
-                                               bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
+                                               isect->t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect->t);
 #  endif
 
                                                qbvh_near_far_idx_calc(idir,
@@ -341,9 +341,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
 
                        /* Instance pop. */
 #  if BVH_FEATURE(BVH_MOTION)
-                       bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_itfm);
+                       isect->t = bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, isect->t, &ob_itfm);
 #  else
-                       bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &isect->t);
+                       isect->t = bvh_instance_pop(kg, object, ray, &P, &dir, &idir, isect->t);
 #  endif
 
                        qbvh_near_far_idx_calc(idir,
index ff1fa92af6ee56d5df0cfa7d60272f70998ff9ee..bbe588c878faaed7580aff109a981d14aa0de5e9 100644 (file)
@@ -346,9 +346,9 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
                                        int object_flag = kernel_tex_fetch(__object_flag, object);
                                        if(object_flag & SD_OBJECT_HAS_VOLUME) {
 #  if BVH_FEATURE(BVH_MOTION)
-                                               bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect_t, &ob_itfm);
+                                               isect_t = bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, isect_t, &ob_itfm);
 #  else
-                                               bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect_t);
+                                               isect_t = bvh_instance_push(kg, object, ray, &P, &dir, &idir, isect_t);
 #  endif
 
                                                qbvh_near_far_idx_calc(idir,
@@ -406,11 +406,10 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
                                }
                        }
                        else {
-                               float ignore_t = FLT_MAX;
 #  if BVH_FEATURE(BVH_MOTION)
-                               bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, &ignore_t, &ob_itfm);
+                               bvh_instance_motion_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX, &ob_itfm);
 #  else
-                               bvh_instance_pop(kg, object, ray, &P, &dir, &idir, &ignore_t);
+                               bvh_instance_pop(kg, object, ray, &P, &dir, &idir, FLT_MAX);
 #  endif
                        }
 
index 5a04be8b0bfe6c12b80e2387b1602c94294316ae..6ecdfe0173a0806cceec881fd1bffd97ac6db186 100644 (file)
@@ -425,7 +425,13 @@ ccl_device_inline float3 bvh_inverse_direction(float3 dir)
 
 /* Transform ray into object space to enter static object in BVH */
 
-ccl_device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, ccl_addr_space float *t)
+ccl_device_inline float bvh_instance_push(KernelGlobals *kg,
+                                          int object,
+                                          const Ray *ray,
+                                          float3 *P,
+                                          float3 *dir,
+                                          float3 *idir,
+                                          float t)
 {
        Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
 
@@ -435,8 +441,11 @@ ccl_device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ra
        *dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len));
        *idir = bvh_inverse_direction(*dir);
 
-       if(*t != FLT_MAX)
-               *t *= len;
+       if(t != FLT_MAX) {
+               t *= len;
+       }
+
+       return t;
 }
 
 #ifdef __QBVH__
@@ -473,16 +482,24 @@ ccl_device_inline void qbvh_instance_push(KernelGlobals *kg,
 
 /* Transorm ray to exit static object in BVH */
 
-ccl_device_inline void bvh_instance_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, ccl_addr_space float *t)
+ccl_device_inline float bvh_instance_pop(KernelGlobals *kg,
+                                         int object,
+                                         const Ray *ray,
+                                         float3 *P,
+                                         float3 *dir,
+                                         float3 *idir,
+                                         float t)
 {
-       if(*t != FLT_MAX) {
+       if(t != FLT_MAX) {
                Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
-               *t /= len(transform_direction(&tfm, ray->D));
+               t /= len(transform_direction(&tfm, ray->D));
        }
 
        *P = ray->P;
        *dir = bvh_clamp_direction(ray->D);
        *idir = bvh_inverse_direction(*dir);
+
+       return t;
 }
 
 /* Same as above, but returns scale factor to apply to multiple intersection distances */
@@ -501,13 +518,13 @@ ccl_device_inline void bvh_instance_pop_factor(KernelGlobals *kg, int object, co
 #ifdef __OBJECT_MOTION__
 /* Transform ray into object space to enter motion blurred object in BVH */
 
-ccl_device_inline void bvh_instance_motion_push(KernelGlobals *kg,
+ccl_device_inline float bvh_instance_motion_push(KernelGlobals *kg,
                                                 int object,
                                                 const Ray *ray,
                                                 float3 *P,
                                                 float3 *dir,
                                                 float3 *idir,
-                                                ccl_addr_space float *t,
+                                                float t,
                                                 Transform *itfm)
 {
        object_fetch_transform_motion_test(kg, object, ray->time, itfm);
@@ -518,8 +535,11 @@ ccl_device_inline void bvh_instance_motion_push(KernelGlobals *kg,
        *dir = bvh_clamp_direction(normalize_len(transform_direction(itfm, ray->D), &len));
        *idir = bvh_inverse_direction(*dir);
 
-       if(*t != FLT_MAX)
-               *t *= len;
+       if(t != FLT_MAX) {
+               t *= len;
+       }
+
+       return t;
 }
 
 #ifdef __QBVH__
@@ -557,22 +577,24 @@ ccl_device_inline void qbvh_instance_motion_push(KernelGlobals *kg,
 
 /* Transorm ray to exit motion blurred object in BVH */
 
-ccl_device_inline void bvh_instance_motion_pop(KernelGlobals *kg,
-                                               int object,
-                                               const Ray *ray,
-                                               float3 *P,
-                                               float3 *dir,
-                                               float3 *idir,
-                                               ccl_addr_space float *t,
-                                               Transform *itfm)
-{
-       if(*t != FLT_MAX) {
-               *t /= len(transform_direction(itfm, ray->D));
+ccl_device_inline float bvh_instance_motion_pop(KernelGlobals *kg,
+                                                int object,
+                                                const Ray *ray,
+                                                float3 *P,
+                                                float3 *dir,
+                                                float3 *idir,
+                                                float t,
+                                                Transform *itfm)
+{
+       if(t != FLT_MAX) {
+               t /= len(transform_direction(itfm, ray->D));
        }
 
        *P = ray->P;
        *dir = bvh_clamp_direction(ray->D);
        *idir = bvh_inverse_direction(*dir);
+
+       return t;
 }
 
 /* Same as above, but returns scale factor to apply to multiple intersection distances */
index 95c27850513f5ce315bbcebbd1b6071cccdee360..ebf03ad9778f62504e37d920789e3acb31e8ae8c 100644 (file)
@@ -46,6 +46,7 @@
 #include "kernel_path_common.h"
 #include "kernel_path_surface.h"
 #include "kernel_path_volume.h"
+#include "kernel_path_subsurface.h"
 
 #ifdef __KERNEL_DEBUG__
 #  include "kernel_debug.h"
@@ -413,172 +414,6 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
        }
 }
 
-#ifdef __SUBSURFACE__
-#  ifndef __KERNEL_CUDA__
-ccl_device
-#  else
-ccl_device_inline
-#  endif
-bool kernel_path_subsurface_scatter(
-        KernelGlobals *kg,
-        ShaderData *sd,
-        ShaderData *emission_sd,
-        PathRadiance *L,
-        PathState *state,
-        RNG *rng,
-        Ray *ray,
-        float3 *throughput,
-        SubsurfaceIndirectRays *ss_indirect)
-{
-       float bssrdf_probability;
-       ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability);
-
-       /* modify throughput for picking bssrdf or bsdf */
-       *throughput *= bssrdf_probability;
-
-       /* do bssrdf scatter step if we picked a bssrdf closure */
-       if(sc) {
-               /* We should never have two consecutive BSSRDF bounces,
-                * the second one should be converted to a diffuse BSDF to
-                * avoid this.
-                */
-               kernel_assert(!ss_indirect->tracing);
-
-               uint lcg_state = lcg_state_init(rng, state, 0x68bc21eb);
-
-               SubsurfaceIntersection ss_isect;
-               float bssrdf_u, bssrdf_v;
-               path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
-               int num_hits = subsurface_scatter_multi_intersect(kg,
-                                                                 &ss_isect,
-                                                                 sd,
-                                                                 sc,
-                                                                 &lcg_state,
-                                                                 bssrdf_u, bssrdf_v,
-                                                                 false);
-#  ifdef __VOLUME__
-               ss_indirect->need_update_volume_stack =
-                       kernel_data.integrator.use_volumes &&
-                       sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
-#  endif  /* __VOLUME__ */
-
-               /* compute lighting with the BSDF closure */
-               for(int hit = 0; hit < num_hits; hit++) {
-                       /* NOTE: We reuse the existing ShaderData, we assume the path
-                        * integration loop stops when this function returns true.
-                        */
-                       subsurface_scatter_multi_setup(kg,
-                                                      &ss_isect,
-                                                      hit,
-                                                      sd,
-                                                      state,
-                                                      state->flag,
-                                                      sc,
-                                                      false);
-
-                       PathState *hit_state = &ss_indirect->state[ss_indirect->num_rays];
-                       Ray *hit_ray = &ss_indirect->rays[ss_indirect->num_rays];
-                       float3 *hit_tp = &ss_indirect->throughputs[ss_indirect->num_rays];
-                       PathRadiance *hit_L = &ss_indirect->L[ss_indirect->num_rays];
-
-                       *hit_state = *state;
-                       *hit_ray = *ray;
-                       *hit_tp = *throughput;
-
-                       hit_state->rng_offset += PRNG_BOUNCE_NUM;
-
-                       path_radiance_init(hit_L, kernel_data.film.use_light_pass);
-                       hit_L->direct_throughput = L->direct_throughput;
-                       path_radiance_copy_indirect(hit_L, L);
-
-                       kernel_path_surface_connect_light(kg, rng, sd, emission_sd, *hit_tp, state, hit_L);
-
-                       if(kernel_path_surface_bounce(kg,
-                                                     rng,
-                                                     sd,
-                                                     hit_tp,
-                                                     hit_state,
-                                                     hit_L,
-                                                     hit_ray))
-                       {
-#  ifdef __LAMP_MIS__
-                               hit_state->ray_t = 0.0f;
-#  endif  /* __LAMP_MIS__ */
-
-#  ifdef __VOLUME__
-                               if(ss_indirect->need_update_volume_stack) {
-                                       Ray volume_ray = *ray;
-                                       /* Setup ray from previous surface point to the new one. */
-                                       volume_ray.D = normalize_len(hit_ray->P - volume_ray.P,
-                                                                    &volume_ray.t);
-
-                                       kernel_volume_stack_update_for_subsurface(
-                                           kg,
-                                           emission_sd,
-                                           &volume_ray,
-                                           hit_state->volume_stack);
-                               }
-#  endif  /* __VOLUME__ */
-                               path_radiance_reset_indirect(L);
-                               ss_indirect->num_rays++;
-                       }
-                       else {
-                               path_radiance_accum_sample(L, hit_L, 1);
-                       }
-               }
-               return true;
-       }
-       return false;
-}
-
-ccl_device_inline void kernel_path_subsurface_init_indirect(
-        SubsurfaceIndirectRays *ss_indirect)
-{
-       ss_indirect->tracing = false;
-       ss_indirect->num_rays = 0;
-}
-
-ccl_device void kernel_path_subsurface_accum_indirect(
-        SubsurfaceIndirectRays *ss_indirect,
-        PathRadiance *L)
-{
-       if(ss_indirect->tracing) {
-               path_radiance_sum_indirect(L);
-               path_radiance_accum_sample(&ss_indirect->direct_L, L, 1);
-               if(ss_indirect->num_rays == 0) {
-                       *L = ss_indirect->direct_L;
-               }
-       }
-}
-
-ccl_device void kernel_path_subsurface_setup_indirect(
-        KernelGlobals *kg,
-        SubsurfaceIndirectRays *ss_indirect,
-        PathState *state,
-        Ray *ray,
-        PathRadiance *L,
-        float3 *throughput)
-{
-       if(!ss_indirect->tracing) {
-               ss_indirect->direct_L = *L;
-       }
-       ss_indirect->tracing = true;
-
-       /* Setup state, ray and throughput for indirect SSS rays. */
-       ss_indirect->num_rays--;
-
-       Ray *indirect_ray = &ss_indirect->rays[ss_indirect->num_rays];
-       PathRadiance *indirect_L = &ss_indirect->L[ss_indirect->num_rays];
-
-       *state = ss_indirect->state[ss_indirect->num_rays];
-       *ray = *indirect_ray;
-       *L = *indirect_L;
-       *throughput = ss_indirect->throughputs[ss_indirect->num_rays];
-
-       state->rng_offset += ss_indirect->num_rays * PRNG_BOUNCE_NUM;
-}
-
-#endif  /* __SUBSURFACE__ */
 
 ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
                                                RNG *rng,
diff --git a/intern/cycles/kernel/kernel_path_subsurface.h b/intern/cycles/kernel/kernel_path_subsurface.h
new file mode 100644 (file)
index 0000000..d22ec99
--- /dev/null
@@ -0,0 +1,187 @@
+/*
+ * Copyright 2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+#ifdef __SUBSURFACE__
+#  ifndef __KERNEL_CUDA__
+ccl_device
+#  else
+ccl_device_inline
+#  endif
+bool kernel_path_subsurface_scatter(
+        KernelGlobals *kg,
+        ShaderData *sd,
+        ShaderData *emission_sd,
+        PathRadiance *L,
+        ccl_addr_space PathState *state,
+        ccl_addr_space RNG *rng,
+        ccl_addr_space Ray *ray,
+        ccl_addr_space float3 *throughput,
+        ccl_addr_space SubsurfaceIndirectRays *ss_indirect)
+{
+       float bssrdf_probability;
+       ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability);
+
+       /* modify throughput for picking bssrdf or bsdf */
+       *throughput *= bssrdf_probability;
+
+       /* do bssrdf scatter step if we picked a bssrdf closure */
+       if(sc) {
+               /* We should never have two consecutive BSSRDF bounces,
+                * the second one should be converted to a diffuse BSDF to
+                * avoid this.
+                */
+               kernel_assert(!ss_indirect->tracing);
+
+               uint lcg_state = lcg_state_init_addrspace(rng, state, 0x68bc21eb);
+
+               SubsurfaceIntersection ss_isect;
+               float bssrdf_u, bssrdf_v;
+               path_state_rng_2D(kg, rng, state, PRNG_BSDF_U, &bssrdf_u, &bssrdf_v);
+               int num_hits = subsurface_scatter_multi_intersect(kg,
+                                                                 &ss_isect,
+                                                                 sd,
+                                                                 sc,
+                                                                 &lcg_state,
+                                                                 bssrdf_u, bssrdf_v,
+                                                                 false);
+#  ifdef __VOLUME__
+               ss_indirect->need_update_volume_stack =
+                       kernel_data.integrator.use_volumes &&
+                       sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
+#  endif  /* __VOLUME__ */
+
+               /* compute lighting with the BSDF closure */
+               for(int hit = 0; hit < num_hits; hit++) {
+                       /* NOTE: We reuse the existing ShaderData, we assume the path
+                        * integration loop stops when this function returns true.
+                        */
+                       subsurface_scatter_multi_setup(kg,
+                                                      &ss_isect,
+                                                      hit,
+                                                      sd,
+                                                      state,
+                                                      state->flag,
+                                                      sc,
+                                                      false);
+
+                       ccl_addr_space PathState *hit_state = &ss_indirect->state[ss_indirect->num_rays];
+                       ccl_addr_space Ray *hit_ray = &ss_indirect->rays[ss_indirect->num_rays];
+                       ccl_addr_space float3 *hit_tp = &ss_indirect->throughputs[ss_indirect->num_rays];
+                       PathRadiance *hit_L = &ss_indirect->L[ss_indirect->num_rays];
+
+                       *hit_state = *state;
+                       *hit_ray = *ray;
+                       *hit_tp = *throughput;
+
+                       hit_state->rng_offset += PRNG_BOUNCE_NUM;
+
+                       path_radiance_init(hit_L, kernel_data.film.use_light_pass);
+                       hit_L->direct_throughput = L->direct_throughput;
+                       path_radiance_copy_indirect(hit_L, L);
+
+                       kernel_path_surface_connect_light(kg, rng, sd, emission_sd, *hit_tp, state, hit_L);
+
+                       if(kernel_path_surface_bounce(kg,
+                                                     rng,
+                                                     sd,
+                                                     hit_tp,
+                                                     hit_state,
+                                                     hit_L,
+                                                     hit_ray))
+                       {
+#  ifdef __LAMP_MIS__
+                               hit_state->ray_t = 0.0f;
+#  endif  /* __LAMP_MIS__ */
+
+#  ifdef __VOLUME__
+                               if(ss_indirect->need_update_volume_stack) {
+                                       Ray volume_ray = *ray;
+                                       /* Setup ray from previous surface point to the new one. */
+                                       volume_ray.D = normalize_len(hit_ray->P - volume_ray.P,
+                                                                    &volume_ray.t);
+
+                                       kernel_volume_stack_update_for_subsurface(
+                                           kg,
+                                           emission_sd,
+                                           &volume_ray,
+                                           hit_state->volume_stack);
+                               }
+#  endif  /* __VOLUME__ */
+                               path_radiance_reset_indirect(L);
+                               ss_indirect->num_rays++;
+                       }
+                       else {
+                               path_radiance_accum_sample(L, hit_L, 1);
+                       }
+               }
+               return true;
+       }
+       return false;
+}
+
+ccl_device_inline void kernel_path_subsurface_init_indirect(
+        ccl_addr_space SubsurfaceIndirectRays *ss_indirect)
+{
+       ss_indirect->tracing = false;
+       ss_indirect->num_rays = 0;
+}
+
+ccl_device void kernel_path_subsurface_accum_indirect(
+        ccl_addr_space SubsurfaceIndirectRays *ss_indirect,
+        PathRadiance *L)
+{
+       if(ss_indirect->tracing) {
+               path_radiance_sum_indirect(L);
+               path_radiance_accum_sample(&ss_indirect->direct_L, L, 1);
+               if(ss_indirect->num_rays == 0) {
+                       *L = ss_indirect->direct_L;
+               }
+       }
+}
+
+ccl_device void kernel_path_subsurface_setup_indirect(
+        KernelGlobals *kg,
+        ccl_addr_space SubsurfaceIndirectRays *ss_indirect,
+        ccl_addr_space PathState *state,
+        ccl_addr_space Ray *ray,
+        PathRadiance *L,
+        ccl_addr_space float3 *throughput)
+{
+       if(!ss_indirect->tracing) {
+               ss_indirect->direct_L = *L;
+       }
+       ss_indirect->tracing = true;
+
+       /* Setup state, ray and throughput for indirect SSS rays. */
+       ss_indirect->num_rays--;
+
+       ccl_addr_space Ray *indirect_ray = &ss_indirect->rays[ss_indirect->num_rays];
+       PathRadiance *indirect_L = &ss_indirect->L[ss_indirect->num_rays];
+
+       *state = ss_indirect->state[ss_indirect->num_rays];
+       *ray = *indirect_ray;
+       *L = *indirect_L;
+       *throughput = ss_indirect->throughputs[ss_indirect->num_rays];
+
+       state->rng_offset += ss_indirect->num_rays * PRNG_BOUNCE_NUM;
+}
+
+#endif  /* __SUBSURFACE__ */
+
+CCL_NAMESPACE_END
+
index 34a78552c1da8cea5f32ad26bbbc9047663886e4..efa23038089a1c4f4484545e798f51b071459518 100644 (file)
@@ -16,7 +16,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-#if defined(__BRANCHED_PATH__) || defined(__SUBSURFACE__)
+#if (defined(__BRANCHED_PATH__) || defined(__SUBSURFACE__)) && !defined(__SPLIT_KERNEL__)
 
 /* branched path tracing: connect path directly to position on one or more lights and add it to L */
 ccl_device_noinline void kernel_branched_path_surface_connect_light(KernelGlobals *kg, RNG *rng,
@@ -188,7 +188,6 @@ ccl_device bool kernel_branched_path_surface_bounce(KernelGlobals *kg, RNG *rng,
 
 #endif
 
-#ifndef __SPLIT_KERNEL__
 /* path tracing: connect path directly to position on a light and add it to L */
 ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_addr_space RNG *rng,
        ShaderData *sd, ShaderData *emission_sd, float3 throughput, ccl_addr_space PathState *state,
@@ -226,7 +225,6 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_
        }
 #endif
 }
-#endif
 
 /* path tracing: bounce off or through surface to with new direction stored in ray */
 ccl_device bool kernel_path_surface_bounce(KernelGlobals *kg,
index 3d3b7385d8b7aaf8c707f3bcfc574517eb829966..28e1b5ba98e7ad8c5f758f7461bfc362efc41a3d 100644 (file)
@@ -20,11 +20,11 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device_inline void kernel_path_volume_connect_light(
         KernelGlobals *kg,
-        RNG *rng,
+        ccl_addr_space RNG *rng,
         ShaderData *sd,
         ShaderData *emission_sd,
         float3 throughput,
-        PathState *state,
+        ccl_addr_space PathState *state,
         PathRadiance *L)
 {
 #ifdef __EMISSION__
@@ -59,7 +59,7 @@ ccl_device_inline void kernel_path_volume_connect_light(
                        }
                }
        }
-#endif
+#endif /* __EMISSION__ */
 }
 
 #ifdef __KERNEL_GPU__
@@ -67,8 +67,14 @@ ccl_device_noinline
 #else
 ccl_device
 #endif
-bool kernel_path_volume_bounce(KernelGlobals *kg, RNG *rng,
-       ShaderData *sd, float3 *throughput, PathState *state, PathRadiance *L, Ray *ray)
+bool kernel_path_volume_bounce(
+    KernelGlobals *kg,
+    ccl_addr_space RNG *rng,
+    ShaderData *sd,
+    ccl_addr_space float3 *throughput,
+    ccl_addr_space PathState *state,
+    PathRadiance *L,
+    ccl_addr_space Ray *ray)
 {
        /* sample phase function */
        float phase_pdf;
@@ -111,6 +117,7 @@ bool kernel_path_volume_bounce(KernelGlobals *kg, RNG *rng,
        return true;
 }
 
+#ifdef __BRANCHED_PATH__
 ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG *rng,
        ShaderData *sd, ShaderData *emission_sd, float3 throughput, PathState *state, PathRadiance *L,
        bool sample_all_lights, Ray *ray, const VolumeSegment *segment)
@@ -261,10 +268,11 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg, RNG
                        }
                }
        }
-#endif
+#endif /* __EMISSION__ */
 }
+#endif /* __BRANCHED_PATH__ */
 
-#endif
+#endif /* __VOLUME_SCATTER__ */
 
 CCL_NAMESPACE_END
 
index a2ab96b35e23206904b23b1cd50837143ef78285..93a92c63a4087674050f4f94a88041c3d65b7b12 100644 (file)
@@ -203,11 +203,11 @@ void shader_setup_from_subsurface(
 #  ifdef __INSTANCING__
        if(isect->object != OBJECT_NONE) {
                /* instance transform */
-               object_normal_transform(kg, sd, &sd->N);
-               object_normal_transform(kg, sd, &sd->Ng);
+               object_normal_transform_auto(kg, sd, &sd->N);
+               object_normal_transform_auto(kg, sd, &sd->Ng);
 #    ifdef __DPDU__
-               object_dir_transform(kg, sd, &sd->dPdu);
-               object_dir_transform(kg, sd, &sd->dPdv);
+               object_dir_transform_auto(kg, sd, &sd->dPdu);
+               object_dir_transform_auto(kg, sd, &sd->dPdv);
 #    endif
        }
 #  endif
@@ -816,7 +816,7 @@ ccl_device float3 shader_bssrdf_sum(ShaderData *sd, float3 *N_, float *texture_b
                *N_ = (is_zero(N))? sd->N: normalize(N);
 
        if(texture_blur_)
-               *texture_blur_ = texture_blur/weight_sum;
+               *texture_blur_ = safe_divide(texture_blur, weight_sum);
        
        return eval;
 }
@@ -1036,8 +1036,8 @@ ccl_device int shader_phase_sample_closure(KernelGlobals *kg, const ShaderData *
 
 ccl_device_inline void shader_eval_volume(KernelGlobals *kg,
                                           ShaderData *sd,
-                                          PathState *state,
-                                          VolumeStack *stack,
+                                          ccl_addr_space PathState *state,
+                                          ccl_addr_space VolumeStack *stack,
                                           int path_flag,
                                           ShaderContext ctx)
 {
index 2483c5f9ae121d155cd48499aeb4986373c0eca5..68a7ccfd90351a6854eadea7481bdbf7c2256300 100644 (file)
@@ -24,7 +24,7 @@ ccl_device_forceinline bool shadow_handle_transparent_isect(
         ShaderData *shadow_sd,
         ccl_addr_space PathState *state,
 #    ifdef __VOLUME__
-        struct PathState *volume_state,
+        ccl_addr_space struct PathState *volume_state,
 #    endif
         Intersection *isect,
         Ray *ray,
@@ -276,7 +276,13 @@ ccl_device bool shadow_blocked_transparent_stepped_loop(
                float3 Pend = ray->P + ray->D*ray->t;
                int bounce = state->transparent_bounce;
 #    ifdef __VOLUME__
-               PathState ps = *state;
+#      ifdef __SPLIT_KERNEL__
+               ccl_addr_space PathState *ps = &kernel_split_state.state_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)];
+#      else
+               PathState ps_object;
+               PathState *ps = &ps_object;
+#      endif
+               *ps = *state;
 #    endif
                for(;;) {
                        if(bounce >= kernel_data.integrator.transparent_max_bounce) {
@@ -299,7 +305,7 @@ ccl_device bool shadow_blocked_transparent_stepped_loop(
                                                           shadow_sd,
                                                           state,
 #ifdef __VOLUME__
-                                                          &ps,
+                                                          ps,
 #endif
                                                           isect,
                                                           ray,
@@ -316,8 +322,8 @@ ccl_device bool shadow_blocked_transparent_stepped_loop(
                }
 #    ifdef __VOLUME__
                /* Attenuation for last line segment towards light. */
-               if(ps.volume_stack[0].shader != SHADER_NONE) {
-                       kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput);
+               if(ps->volume_stack[0].shader != SHADER_NONE) {
+                       kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput);
                }
 #    endif
                *shadow *= throughput;
@@ -365,21 +371,11 @@ ccl_device bool shadow_blocked_transparent_stepped(
 ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
                                       ShaderData *shadow_sd,
                                       ccl_addr_space PathState *state,
-                                      ccl_addr_space Ray *ray_input,
+                                      Ray *ray_input,
                                       float3 *shadow)
 {
-       /* Special trickery for split kernel: some data is coming from the
-        * global memory.
-        */
-#ifdef __SPLIT_KERNEL__
-       Ray private_ray = *ray_input;
-       Ray *ray = &private_ray;
-       Intersection *isect = &kernel_split_state.isect_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)];
-#else  /* __SPLIT_KERNEL__ */
        Ray *ray = ray_input;
-       Intersection isect_object;
-       Intersection *isect = &isect_object;
-#endif  /* __SPLIT_KERNEL__ */
+       Intersection isect;
        /* Some common early checks. */
        *shadow = make_float3(1.0f, 1.0f, 1.0f);
        if(ray->t == 0.0f) {
@@ -397,7 +393,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
                                             shadow_sd,
                                             state,
                                             ray,
-                                            isect,
+                                            &isect,
                                             shadow);
        }
 #ifdef __TRANSPARENT_SHADOWS__
@@ -423,11 +419,11 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
        const bool blocked = scene_intersect(kg,
                                             *ray,
                                             PATH_RAY_SHADOW_OPAQUE,
-                                            isect,
+                                            &isect,
                                             NULL,
                                             0.0f, 0.0f);
        const bool is_transparent_isect = blocked
-               ? shader_transparent_shadow(kg, isect)
+               ? shader_transparent_shadow(kg, &isect)
                : false;
        if(!blocked || !is_transparent_isect ||
           max_hits + 1 >= SHADOW_STACK_MAX_HITS)
@@ -436,7 +432,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
                                                               shadow_sd,
                                                               state,
                                                               ray,
-                                                              isect,
+                                                              &isect,
                                                               blocked,
                                                               is_transparent_isect,
                                                               shadow);
@@ -454,7 +450,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
                                                  shadow_sd,
                                                  state,
                                                  ray,
-                                                 isect,
+                                                 &isect,
                                                  shadow);
 #  endif  /* __SHADOW_RECORD_ALL__ */
 #endif  /* __TRANSPARENT_SHADOWS__ */
index a8fa64325426a3aa672b3765572b33452ba949db..fe88ba4ff05faf6a95a7e45d61474410c5138fc1 100644 (file)
@@ -185,7 +185,7 @@ ccl_device float3 subsurface_color_pow(float3 color, float exponent)
 
 ccl_device void subsurface_color_bump_blur(KernelGlobals *kg,
                                            ShaderData *sd,
-                                           PathState *state,
+                                           ccl_addr_space PathState *state,
                                            int state_flag,
                                            float3 *eval,
                                            float3 *N)
@@ -277,7 +277,12 @@ ccl_device_inline int subsurface_scatter_multi_intersect(
        float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
 
        /* create ray */
+#ifdef __SPLIT_KERNEL__
+       Ray ray_object = ss_isect->ray;
+       Ray *ray = &ray_object;
+#else
        Ray *ray = &ss_isect->ray;
+#endif
        ray->P = sd->P + disk_N*disk_height + disk_P;
        ray->D = -disk_N;
        ray->t = 2.0f*disk_height;
@@ -351,6 +356,10 @@ ccl_device_inline int subsurface_scatter_multi_intersect(
                ss_isect->weight[hit] = eval;
        }
 
+#ifdef __SPLIT_KERNEL__
+       ss_isect->ray = *ray;
+#endif
+
        return num_eval_hits;
 }
 
@@ -359,13 +368,19 @@ ccl_device_noinline void subsurface_scatter_multi_setup(
         SubsurfaceIntersection* ss_isect,
         int hit,
         ShaderData *sd,
-        PathState *state,
+        ccl_addr_space PathState *state,
         int state_flag,
         ShaderClosure *sc,
         bool all)
 {
+#ifdef __SPLIT_KERNEL__
+       Ray ray_object = ss_isect->ray;
+       Ray *ray = &ray_object;
+#else
+       Ray *ray = &ss_isect->ray;
+#endif
        /* Setup new shading point. */
-       shader_setup_from_subsurface(kg, sd, &ss_isect->hits[hit], &ss_isect->ray);
+       shader_setup_from_subsurface(kg, sd, &ss_isect->hits[hit], ray);
 
        /* Optionally blur colors and bump mapping. */
        float3 weight = ss_isect->weight[hit];
@@ -376,6 +391,7 @@ ccl_device_noinline void subsurface_scatter_multi_setup(
        subsurface_scatter_setup_diffuse_bsdf(sd, weight, true, N);
 }
 
+#ifndef __SPLIT_KERNEL__
 /* subsurface scattering step, from a point on the surface to another nearby point on the same object */
 ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, PathState *state,
        int state_flag, ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all)
@@ -465,6 +481,7 @@ ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, PathS
        /* setup diffuse bsdf */
        subsurface_scatter_setup_diffuse_bsdf(sd, eval, (ss_isect.num_hits > 0), N);
 }
+#endif /* ! __SPLIT_KERNEL__ */
 
 CCL_NAMESPACE_END
 
index a7faaef89ca3e4d6b87f876f4d954f6a527011c6..6c18cab6406bace6d3a2d299fcc0ed825bdc7019 100644 (file)
@@ -76,14 +76,12 @@ CCL_NAMESPACE_BEGIN
 #  ifdef WITH_OSL
 #    define __OSL__
 #  endif
-#  ifndef __SPLIT_KERNEL__
-#    define __SUBSURFACE__
-#  endif
+#  define __SUBSURFACE__
 #  define __CMJ__
+#  define __VOLUME__
+#  define __VOLUME_SCATTER__
 #  ifndef __SPLIT_KERNEL__
-#    define __VOLUME__
 #    define __VOLUME_DECOUPLED__
-#    define __VOLUME_SCATTER__
 #    define __SHADOW_RECORD_ALL__
 #    define __VOLUME_RECORD_ALL__
 #  endif
@@ -130,6 +128,9 @@ CCL_NAMESPACE_BEGIN
 #    define __CL_USE_NATIVE__
 #    define __KERNEL_SHADING__
 #    define __KERNEL_ADV_SHADING__
+#    define __SUBSURFACE__
+#    define __VOLUME__
+#    define __VOLUME_SCATTER__
 #  endif  /* __KERNEL_OPENCL_AMD__ */
 
 #  ifdef __KERNEL_OPENCL_INTEL_CPU__
@@ -552,7 +553,7 @@ typedef struct Ray {
 
 /* Intersection */
 
-typedef ccl_addr_space struct Intersection {
+typedef struct Intersection {
        float t, u, v;
        int prim;
        int object;
@@ -934,7 +935,7 @@ typedef struct PathState {
 /* Subsurface */
 
 /* Struct to gather multiple SSS hits. */
-struct SubsurfaceIntersection
+typedef struct SubsurfaceIntersection
 {
        Ray ray;
        float3 weight[BSSRDF_MAX_HITS];
@@ -942,10 +943,10 @@ struct SubsurfaceIntersection
        int num_hits;
        struct Intersection hits[BSSRDF_MAX_HITS];
        float3 Ng[BSSRDF_MAX_HITS];
-};
+} SubsurfaceIntersection;
 
 /* Struct to gather SSS indirect rays and delay tracing them. */
-struct SubsurfaceIndirectRays
+typedef struct SubsurfaceIndirectRays
 {
        bool need_update_volume_stack;
        bool tracing;
@@ -956,7 +957,7 @@ struct SubsurfaceIndirectRays
        struct Ray rays[BSSRDF_MAX_HITS];
        float3 throughputs[BSSRDF_MAX_HITS];
        struct PathRadiance L[BSSRDF_MAX_HITS];
-};
+} SubsurfaceIndirectRays;
 
 /* Constant Kernel Data
  *
index 10d0d185345baaea16a22ad9bb2f9ee1ec98d086..608350a90386bd4a2124778b6c00ee6e08425dd1 100644 (file)
@@ -38,7 +38,7 @@ typedef struct VolumeShaderCoefficients {
 /* evaluate shader to get extinction coefficient at P */
 ccl_device_inline bool volume_shader_extinction_sample(KernelGlobals *kg,
                                                        ShaderData *sd,
-                                                       PathState *state,
+                                                       ccl_addr_space PathState *state,
                                                        float3 P,
                                                        float3 *extinction)
 {
@@ -64,7 +64,7 @@ ccl_device_inline bool volume_shader_extinction_sample(KernelGlobals *kg,
 /* evaluate shader to get absorption, scattering and emission at P */
 ccl_device_inline bool volume_shader_sample(KernelGlobals *kg,
                                             ShaderData *sd,
-                                            PathState *state,
+                                            ccl_addr_space PathState *state,
                                             float3 P,
                                             VolumeShaderCoefficients *coeff)
 {
@@ -112,7 +112,7 @@ ccl_device float kernel_volume_channel_get(float3 value, int channel)
        return (channel == 0)? value.x: ((channel == 1)? value.y: value.z);
 }
 
-ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, VolumeStack *stack)
+ccl_device bool volume_stack_is_heterogeneous(KernelGlobals *kg, ccl_addr_space VolumeStack *stack)
 {
        for(int i = 0; stack[i].shader != SHADER_NONE; i++) {
                int shader_flag = kernel_tex_fetch(__shader_flag, (stack[i].shader & SHADER_MASK)*SHADER_SIZE);
@@ -161,7 +161,11 @@ ccl_device int volume_stack_sampling_method(KernelGlobals *kg, VolumeStack *stac
 
 /* homogeneous volume: assume shader evaluation at the starts gives
  * the extinction coefficient for the entire line segment */
-ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, float3 *throughput)
+ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg,
+                                                 ccl_addr_space PathState *state,
+                                                 Ray *ray,
+                                                 ShaderData *sd,
+                                                 float3 *throughput)
 {
        float3 sigma_t;
 
@@ -171,7 +175,11 @@ ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg, PathState *s
 
 /* heterogeneous volume: integrate stepping through the volume until we
  * reach the end, get absorbed entirely, or run out of iterations */
-ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState *state, Ray *ray, ShaderData *sd, float3 *throughput)
+ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg,
+                                                   ccl_addr_space PathState *state,
+                                                   Ray *ray,
+                                                   ShaderData *sd,
+                                                   float3 *throughput)
 {
        float3 tp = *throughput;
        const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
@@ -179,7 +187,7 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState
        /* prepare for stepping */
        int max_steps = kernel_data.integrator.volume_max_steps;
        float step = kernel_data.integrator.volume_step_size;
-       float random_jitter_offset = lcg_step_float(&state->rng_congruential) * step;
+       float random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * step;
 
        /* compute extinction at the start */
        float t = 0.0f;
@@ -193,7 +201,7 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState
 
                /* use random position inside this segment to sample shader */
                if(new_t == ray->t)
-                       random_jitter_offset = lcg_step_float(&state->rng_congruential) * dt;
+                       random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * dt;
 
                float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
                float3 sigma_t;
@@ -227,7 +235,11 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg, PathState
 
 /* get the volume attenuation over line segment defined by ray, with the
  * assumption that there are no surfaces blocking light between the endpoints */
-ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg, ShaderData *shadow_sd, PathState *state, Ray *ray, float3 *throughput)
+ccl_device_noinline void kernel_volume_shadow(KernelGlobals *kg,
+                                              ShaderData *shadow_sd,
+                                              ccl_addr_space PathState *state,
+                                              Ray *ray,
+                                              float3 *throughput)
 {
        shader_setup_from_volume(kg, shadow_sd, ray);
 
@@ -341,9 +353,15 @@ ccl_device float3 kernel_volume_emission_integrate(VolumeShaderCoefficients *coe
 
 /* homogeneous volume: assume shader evaluation at the start gives
  * the volume shading coefficient for the entire line segment */
-ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGlobals *kg,
-       PathState *state, Ray *ray, ShaderData *sd, PathRadiance *L, float3 *throughput,
-       RNG *rng, bool probalistic_scatter)
+ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(
+    KernelGlobals *kg,
+    ccl_addr_space PathState *state,
+    Ray *ray,
+    ShaderData *sd,
+    PathRadiance *L,
+    ccl_addr_space float3 *throughput,
+       ccl_addr_space RNG *rng,
+    bool probalistic_scatter)
 {
        VolumeShaderCoefficients coeff;
 
@@ -444,8 +462,14 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_homogeneous(KernelGloba
  * volume until we reach the end, get absorbed entirely, or run out of
  * iterations. this does probabilistically scatter or get transmitted through
  * for path tracing where we don't want to branch. */
-ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg,
-       PathState *state, Ray *ray, ShaderData *sd, PathRadiance *L, float3 *throughput, RNG *rng)
+ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(
+    KernelGlobals *kg,
+    ccl_addr_space PathState *state,
+    Ray *ray,
+    ShaderData *sd,
+    PathRadiance *L,
+    ccl_addr_space float3 *throughput,
+    ccl_addr_space RNG *rng)
 {
        float3 tp = *throughput;
        const float tp_eps = 1e-6f; /* todo: this is likely not the right value */
@@ -453,7 +477,7 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(
        /* prepare for stepping */
        int max_steps = kernel_data.integrator.volume_max_steps;
        float step_size = kernel_data.integrator.volume_step_size;
-       float random_jitter_offset = lcg_step_float(&state->rng_congruential) * step_size;
+       float random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * step_size;
 
        /* compute coefficients at the start */
        float t = 0.0f;
@@ -474,7 +498,7 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(
 
                /* use random position inside this segment to sample shader */
                if(new_t == ray->t)
-                       random_jitter_offset = lcg_step_float(&state->rng_congruential) * dt;
+                       random_jitter_offset = lcg_step_float_addrspace(&state->rng_congruential) * dt;
 
                float3 new_P = ray->P + ray->D * (t + random_jitter_offset);
                VolumeShaderCoefficients coeff;
@@ -579,8 +603,15 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(
  * ray, with the assumption that there are no surfaces blocking light
  * between the endpoints. distance sampling is used to decide if we will
  * scatter or not. */
-ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals *kg,
-       PathState *state, ShaderData *sd, Ray *ray, PathRadiance *L, float3 *throughput, RNG *rng, bool heterogeneous)
+ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(
+    KernelGlobals *kg,
+    ccl_addr_space PathState *state,
+    ShaderData *sd,
+    Ray *ray,
+    PathRadiance *L,
+    ccl_addr_space float3 *throughput,
+    ccl_addr_space RNG *rng,
+    bool heterogeneous)
 {
        shader_setup_from_volume(kg, sd, ray);
 
@@ -590,6 +621,7 @@ ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals
                return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, rng, true);
 }
 
+#ifndef __SPLIT_KERNEL__
 /* Decoupled Volume Sampling
  *
  * VolumeSegment is list of coefficients and transmittance stored at all steps
@@ -990,6 +1022,7 @@ ccl_device VolumeIntegrateResult kernel_volume_decoupled_scatter(
 
        return VOLUME_PATH_SCATTERED;
 }
+#endif /* __SPLIT_KERNEL */
 
 /* decide if we need to use decoupled or not */
 ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneous, bool direct, int sampling_method)
@@ -1021,9 +1054,9 @@ ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneou
 
 ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
                                          ShaderData *stack_sd,
-                                         const PathState *state,
-                                         const Ray *ray,
-                                         VolumeStack *stack)
+                                         ccl_addr_space const PathState *state,
+                                         ccl_addr_space const Ray *ray,
+                                         ccl_addr_space VolumeStack *stack)
 {
        /* NULL ray happens in the baker, does it need proper initialization of
         * camera in volume?
@@ -1166,7 +1199,7 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
        }
 }
 
-ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd, VolumeStack *stack)
+ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd, ccl_addr_space VolumeStack *stack)
 {
        /* todo: we should have some way for objects to indicate if they want the
         * world shader to work inside them. excluding it by default is problematic
@@ -1215,7 +1248,7 @@ ccl_device void kernel_volume_stack_enter_exit(KernelGlobals *kg, ShaderData *sd
 ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
                                                           ShaderData *stack_sd,
                                                           Ray *ray,
-                                                          VolumeStack *stack)
+                                                          ccl_addr_space VolumeStack *stack)
 {
        kernel_assert(kernel_data.integrator.use_volumes);
 
@@ -1277,7 +1310,7 @@ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
  * the world's one after the last bounce to avoid render artifacts.
  */
 ccl_device_inline void kernel_volume_clean_stack(KernelGlobals *kg,
-                                                 VolumeStack *volume_stack)
+                                                 ccl_addr_space VolumeStack *volume_stack)
 {
        if(kernel_data.background.volume_shader != SHADER_NONE) {
                /* Keep the world's volume in stack. */
index deb872444d0840d67d82dcb11bd2914577ebde21..8ce420d8a4802664338f4ea6f44d0ff8e076ce05 100644 (file)
@@ -74,13 +74,17 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
 DECLARE_SPLIT_KERNEL_FUNCTION(path_init)
 DECLARE_SPLIT_KERNEL_FUNCTION(scene_intersect)
 DECLARE_SPLIT_KERNEL_FUNCTION(lamp_emission)
+DECLARE_SPLIT_KERNEL_FUNCTION(do_volume)
 DECLARE_SPLIT_KERNEL_FUNCTION(queue_enqueue)
-DECLARE_SPLIT_KERNEL_FUNCTION(background_buffer_update)
+DECLARE_SPLIT_KERNEL_FUNCTION(indirect_background)
 DECLARE_SPLIT_KERNEL_FUNCTION(shader_eval)
 DECLARE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao)
+DECLARE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
 DECLARE_SPLIT_KERNEL_FUNCTION(direct_lighting)
 DECLARE_SPLIT_KERNEL_FUNCTION(shadow_blocked)
 DECLARE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
+DECLARE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
+DECLARE_SPLIT_KERNEL_FUNCTION(buffer_update)
 
 void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name, void* func));
 
index d6d0db4e034dc4ffd5c1eb70046359600a4712aa..8c519a21d95bb5ae171deee6cecfd27e6b522c39 100644 (file)
 #  include "split/kernel_path_init.h"
 #  include "split/kernel_scene_intersect.h"
 #  include "split/kernel_lamp_emission.h"
+#  include "split/kernel_do_volume.h"
 #  include "split/kernel_queue_enqueue.h"
-#  include "split/kernel_background_buffer_update.h"
+#  include "split/kernel_indirect_background.h"
 #  include "split/kernel_shader_eval.h"
 #  include "split/kernel_holdout_emission_blurring_pathtermination_ao.h"
+#  include "split/kernel_subsurface_scatter.h"
 #  include "split/kernel_direct_lighting.h"
 #  include "split/kernel_shadow_blocked.h"
 #  include "split/kernel_next_iteration_setup.h"
+#  include "split/kernel_indirect_subsurface.h"
+#  include "split/kernel_buffer_update.h"
 #endif
 
 CCL_NAMESPACE_BEGIN
@@ -166,13 +170,17 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
 DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
 DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
 DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
+DEFINE_SPLIT_KERNEL_FUNCTION(do_volume)
 DEFINE_SPLIT_KERNEL_FUNCTION(queue_enqueue)
-DEFINE_SPLIT_KERNEL_FUNCTION(background_buffer_update)
+DEFINE_SPLIT_KERNEL_FUNCTION(indirect_background)
 DEFINE_SPLIT_KERNEL_FUNCTION(shader_eval)
 DEFINE_SPLIT_KERNEL_FUNCTION(holdout_emission_blurring_pathtermination_ao)
+DEFINE_SPLIT_KERNEL_FUNCTION(subsurface_scatter)
 DEFINE_SPLIT_KERNEL_FUNCTION(direct_lighting)
 DEFINE_SPLIT_KERNEL_FUNCTION(shadow_blocked)
 DEFINE_SPLIT_KERNEL_FUNCTION(next_iteration_setup)
+DEFINE_SPLIT_KERNEL_FUNCTION(indirect_subsurface)
+DEFINE_SPLIT_KERNEL_FUNCTION(buffer_update)
 
 void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name, void* func))
 {
@@ -189,13 +197,17 @@ void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name,
        REGISTER(path_init);
        REGISTER(scene_intersect);
        REGISTER(lamp_emission);
+       REGISTER(do_volume);
        REGISTER(queue_enqueue);
-       REGISTER(background_buffer_update);
+       REGISTER(indirect_background);
        REGISTER(shader_eval);
        REGISTER(holdout_emission_blurring_pathtermination_ao);
+       REGISTER(subsurface_scatter);
        REGISTER(direct_lighting);
        REGISTER(shadow_blocked);
        REGISTER(next_iteration_setup);
+       REGISTER(indirect_subsurface);
+       REGISTER(buffer_update);
 
 #undef REGISTER
 #undef REGISTER_EVAL_NAME
similarity index 83%
rename from intern/cycles/kernel/kernels/opencl/kernel_background_buffer_update.cl
rename to intern/cycles/kernel/kernels/opencl/kernel_buffer_update.cl
index 47e363f6e03568e627edfb024e3741ec9fe48bed..d3058501f27f6c7e0abb34cb1b0d17c659af6ab9 100644 (file)
 
 #include "kernel_compat_opencl.h"
 #include "split/kernel_split_common.h"
-#include "split/kernel_background_buffer_update.h"
+#include "split/kernel_buffer_update.h"
 
-__kernel void kernel_ocl_path_trace_background_buffer_update(
+__kernel void kernel_ocl_path_trace_buffer_update(
         KernelGlobals *kg,
         ccl_constant KernelData *data)
 {
-       kernel_background_buffer_update(kg);
+       kernel_buffer_update(kg);
 }
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl b/intern/cycles/kernel/kernels/opencl/kernel_do_volume.cl
new file mode 100644 (file)
index 0000000..6380e9c
--- /dev/null
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel_compat_opencl.h"
+#include "split/kernel_split_common.h"
+#include "split/kernel_do_volume.h"
+
+__kernel void kernel_ocl_path_trace_do_volume(
+        KernelGlobals *kg,
+        ccl_constant KernelData *data)
+{
+       kernel_do_volume(kg);
+}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl b/intern/cycles/kernel/kernels/opencl/kernel_indirect_background.cl
new file mode 100644 (file)
index 0000000..671501b
--- /dev/null
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel_compat_opencl.h"
+#include "split/kernel_split_common.h"
+#include "split/kernel_indirect_background.h"
+
+__kernel void kernel_ocl_path_trace_indirect_background(
+        KernelGlobals *kg,
+        ccl_constant KernelData *data)
+{
+       kernel_indirect_background(kg);
+}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl b/intern/cycles/kernel/kernels/opencl/kernel_indirect_subsurface.cl
new file mode 100644 (file)
index 0000000..b5e52e8
--- /dev/null
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel_compat_opencl.h"
+#include "split/kernel_split_common.h"
+#include "split/kernel_indirect_subsurface.h"
+
+__kernel void kernel_ocl_path_trace_indirect_subsurface(
+        KernelGlobals *kg,
+        ccl_constant KernelData *data)
+{
+       kernel_indirect_subsurface(kg);
+}
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_split.cl b/intern/cycles/kernel/kernels/opencl/kernel_split.cl
new file mode 100644 (file)
index 0000000..7a947c4
--- /dev/null
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel_state_buffer_size.cl"
+#include "kernel_data_init.cl"
+#include "kernel_path_init.cl"
+
+#include "kernel_scene_intersect.cl"
+#include "kernel_lamp_emission.cl"
+#include "kernel_do_volume.cl"
+#include "kernel_indirect_background.cl"
+#include "kernel_queue_enqueue.cl"
+#include "kernel_shader_eval.cl"
+#include "kernel_holdout_emission_blurring_pathtermination_ao.cl"
+#include "kernel_subsurface_scatter.cl"
+#include "kernel_direct_lighting.cl"
+#include "kernel_shadow_blocked.cl"
+#include "kernel_next_iteration_setup.cl"
+#include "kernel_indirect_subsurface.cl"
+#include "kernel_buffer_update.cl"
+
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl b/intern/cycles/kernel/kernels/opencl/kernel_subsurface_scatter.cl
new file mode 100644 (file)
index 0000000..8dae79b
--- /dev/null
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel_compat_opencl.h"
+#include "split/kernel_split_common.h"
+#include "split/kernel_subsurface_scatter.h"
+
+__kernel void kernel_ocl_path_trace_subsurface_scatter(
+        KernelGlobals *kg,
+        ccl_constant KernelData *data)
+{
+       kernel_subsurface_scatter(kg);
+}
similarity index 92%
rename from intern/cycles/kernel/split/kernel_background_buffer_update.h
rename to intern/cycles/kernel/split/kernel_buffer_update.h
index 04aaf1bbaad4d431688875aabe7310ad5d210496..e42605c88e797b3e0f1d06eef119257014395397 100644 (file)
@@ -69,7 +69,7 @@ CCL_NAMESPACE_BEGIN
  * QUEUE_ACTIVE_AND_REGENERATED_RAYS will be filled with RAY_ACTIVE and RAY_REGENERATED rays
  * QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS will be empty
  */
-ccl_device void kernel_background_buffer_update(KernelGlobals *kg)
+ccl_device void kernel_buffer_update(KernelGlobals *kg)
 {
        ccl_local unsigned int local_queue_atomics;
        if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
@@ -141,26 +141,6 @@ ccl_device void kernel_background_buffer_update(KernelGlobals *kg)
        rng_state += kernel_split_params.offset + pixel_x + pixel_y*stride;
        buffer += (kernel_split_params.offset + pixel_x + pixel_y*stride) * kernel_data.film.pass_stride;
 
-       if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
-               /* eval background shader if nothing hit */
-               if(kernel_data.background.transparent && (state->flag & PATH_RAY_CAMERA)) {
-                       *L_transparent = (*L_transparent) + average((*throughput));
-#ifdef __PASSES__
-                       if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
-#endif
-                               ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
-               }
-
-               if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
-#ifdef __BACKGROUND__
-                       /* sample background shader */
-                       float3 L_background = indirect_background(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, ray);
-                       path_radiance_accum_background(L, (*throughput), L_background, state->bounce);
-#endif
-                       ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
-               }
-       }
-
        if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
                float3 L_sum = path_radiance_clamp_and_sum(kg, L);
                kernel_write_light_passes(kg, buffer, L, sample);
@@ -207,6 +187,9 @@ ccl_device void kernel_background_buffer_update(KernelGlobals *kg)
                                *L_transparent = 0.0f;
                                path_radiance_init(L, kernel_data.film.use_light_pass);
                                path_state_init(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, rng, sample, ray);
+#ifdef __SUBSURFACE__
+                               kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]);
+#endif
 #ifdef __KERNEL_DEBUG__
                                debug_data_init(debug_data);
 #endif
diff --git a/intern/cycles/kernel/split/kernel_do_volume.h b/intern/cycles/kernel/split/kernel_do_volume.h
new file mode 100644 (file)
index 0000000..18da6e8
--- /dev/null
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+
+ccl_device void kernel_do_volume(KernelGlobals *kg)
+{
+#ifdef __VOLUME__
+       /* We will empty this queue in this kernel. */
+       if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
+               kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
+       }
+       /* Fetch use_queues_flag. */
+       ccl_local char local_use_queues_flag;
+       if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+               local_use_queues_flag = *kernel_split_params.use_queues_flag;
+       }
+       ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+       int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+       if(local_use_queues_flag) {
+               ray_index = get_ray_index(kg, ray_index,
+                                         QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+                                         kernel_split_state.queue_data,
+                                         kernel_split_params.queue_size,
+                                         1);
+               if(ray_index == QUEUE_EMPTY_SLOT) {
+                       return;
+               }
+       }
+
+       if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
+          IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND)) {
+
+               bool hit = ! IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND);
+
+               PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+               ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+
+               ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+               ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+               ccl_global RNG *rng = &kernel_split_state.rng[ray_index];
+               ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
+               ShaderData *sd = &kernel_split_state.sd[ray_index];
+               ShaderData *sd_input = &kernel_split_state.sd_DL_shadow[ray_index];
+
+               /* Sanitize volume stack. */
+               if(!hit) {
+                       kernel_volume_clean_stack(kg, state->volume_stack);
+               }
+               /* volume attenuation, emission, scatter */
+               if(state->volume_stack[0].shader != SHADER_NONE) {
+                       Ray volume_ray = *ray;
+                       volume_ray.t = (hit)? isect->t: FLT_MAX;
+
+                       bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
+
+                       {
+                               /* integrate along volume segment with distance sampling */
+                               VolumeIntegrateResult result = kernel_volume_integrate(
+                                       kg, state, sd, &volume_ray, L, throughput, rng, heterogeneous);
+
+#  ifdef __VOLUME_SCATTER__
+                               if(result == VOLUME_PATH_SCATTERED) {
+                                       /* direct lighting */
+                                       kernel_path_volume_connect_light(kg, rng, sd, sd_input, *throughput, state, L);
+
+                                       /* indirect light bounce */
+                                       if(kernel_path_volume_bounce(kg, rng, sd, throughput, state, L, ray))
+                                               ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED);
+                                       else
+                                               ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER);
+                               }
+#  endif
+                       }
+               }
+       }
+
+#endif
+}
+
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_indirect_background.h b/intern/cycles/kernel/split/kernel_indirect_background.h
new file mode 100644 (file)
index 0000000..e314a98
--- /dev/null
@@ -0,0 +1,87 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void kernel_indirect_background(KernelGlobals *kg)
+{
+       /*
+       ccl_local unsigned int local_queue_atomics;
+       if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+               local_queue_atomics = 0;
+       }
+       ccl_barrier(CCL_LOCAL_MEM_FENCE);
+       // */
+
+       int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+       ray_index = get_ray_index(kg, ray_index,
+                                 QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+                                 kernel_split_state.queue_data,
+                                 kernel_split_params.queue_size,
+                                 0);
+
+#ifdef __COMPUTE_DEVICE_GPU__
+       /* If we are executing on a GPU device, we exit all threads that are not
+        * required.
+        *
+        * If we are executing on a CPU device, then we need to keep all threads
+        * active since we have barrier() calls later in the kernel. CPU devices,
+        * expect all threads to execute barrier statement.
+        */
+       if(ray_index == QUEUE_EMPTY_SLOT) {
+               return;
+       }
+#endif
+
+#ifndef __COMPUTE_DEVICE_GPU__
+       if(ray_index != QUEUE_EMPTY_SLOT) {
+#endif
+
+
+       ccl_global char *ray_state = kernel_split_state.ray_state;
+       ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+       PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+       ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+       ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+       ccl_global float *L_transparent = &kernel_split_state.L_transparent[ray_index];
+
+       if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
+               /* eval background shader if nothing hit */
+               if(kernel_data.background.transparent && (state->flag & PATH_RAY_CAMERA)) {
+                       *L_transparent = (*L_transparent) + average((*throughput));
+#ifdef __PASSES__
+                       if(!(kernel_data.film.pass_flag & PASS_BACKGROUND))
+#endif
+                               ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+               }
+
+               if(IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
+#ifdef __BACKGROUND__
+                       /* sample background shader */
+                       float3 L_background = indirect_background(kg, &kernel_split_state.sd_DL_shadow[ray_index], state, ray);
+                       path_radiance_accum_background(L, (*throughput), L_background, state->bounce);
+#endif
+                       ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+               }
+       }
+
+#ifndef __COMPUTE_DEVICE_GPU__
+       }
+#endif
+
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_indirect_subsurface.h b/intern/cycles/kernel/split/kernel_indirect_subsurface.h
new file mode 100644 (file)
index 0000000..a56e85a
--- /dev/null
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void kernel_indirect_subsurface(KernelGlobals *kg)
+{
+       int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+       if(thread_index == 0) {
+               /* We will empty both queues in this kernel. */
+               kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
+               kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
+       }
+
+       int ray_index;
+       get_ray_index(kg, thread_index,
+                     QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+                     kernel_split_state.queue_data,
+                     kernel_split_params.queue_size,
+                     1);
+       ray_index = get_ray_index(kg, thread_index,
+                                 QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+                                 kernel_split_state.queue_data,
+                                 kernel_split_params.queue_size,
+                                 1);
+
+#ifdef __SUBSURFACE__
+
+       if(ray_index == QUEUE_EMPTY_SLOT) {
+               return;
+       }
+
+       ccl_global char *ray_state = kernel_split_state.ray_state;
+       ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+       PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+       ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+       ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+
+       if(IS_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER)) {
+               ccl_addr_space SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
+               kernel_path_subsurface_accum_indirect(ss_indirect, L);
+
+               /* Trace indirect subsurface rays by restarting the loop. this uses less
+                * stack memory than invoking kernel_path_indirect.
+                */
+               if(ss_indirect->num_rays) {
+                       kernel_path_subsurface_setup_indirect(kg,
+                                                             ss_indirect,
+                                                             state,
+                                                             ray,
+                                                             L,
+                                                             throughput);
+                       ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
+               }
+               else {
+                       ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+               }
+       }
+
+#endif  /* __SUBSURFACE__ */
+
+}
+
+CCL_NAMESPACE_END
index 261625da31df6d2107e7c0d3aa78e4720fa464ca..84de231b78cf77bf6d14c9044d248f0f3fd31d00 100644 (file)
@@ -38,10 +38,12 @@ CCL_NAMESPACE_BEGIN
  */
 ccl_device void kernel_lamp_emission(KernelGlobals *kg)
 {
+#ifndef __VOLUME__
        /* We will empty this queue in this kernel. */
        if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
                kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
        }
+#endif
        /* Fetch use_queues_flag. */
        ccl_local char local_use_queues_flag;
        if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
@@ -55,7 +57,12 @@ ccl_device void kernel_lamp_emission(KernelGlobals *kg)
                                          QUEUE_ACTIVE_AND_REGENERATED_RAYS,
                                          kernel_split_state.queue_data,
                                          kernel_split_params.queue_size,
-                                         1);
+#ifndef __VOLUME__
+                                         1
+#else
+                                         0
+#endif
+                                         );
                if(ray_index == QUEUE_EMPTY_SLOT) {
                        return;
                }
index fe3c9e1e8a2e7c5480a857c99b02b2f77464e4e1..f44aff30fa9d9cf270f639ae0c65845295513286 100644 (file)
@@ -82,6 +82,10 @@ ccl_device void kernel_path_init(KernelGlobals *kg) {
                                &kernel_split_state.rng[ray_index],
                                my_sample,
                                &kernel_split_state.ray[ray_index]);
+#ifdef __SUBSURFACE__
+               kernel_path_subsurface_init_indirect(&kernel_split_state.ss_rays[ray_index]);
+#endif
+
 #ifdef __KERNEL_DEBUG__
                debug_data_init(&kernel_split_state.debug_data[ray_index]);
 #endif
index 66aad705bd43a0559789fdfe1a8fca6984d677d6..70ec92b394b6cf2c1b8947388fd80083cc182bd2 100644 (file)
@@ -63,10 +63,12 @@ ccl_device void kernel_queue_enqueue(KernelGlobals *kg)
 
        int queue_number = -1;
 
-       if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND)) {
+       if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_HIT_BACKGROUND) ||
+          IS_STATE(kernel_split_state.ray_state, ray_index, RAY_UPDATE_BUFFER)) {
                queue_number = QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS;
        }
-       else if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
+       else if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE) ||
+               IS_STATE(kernel_split_state.ray_state, ray_index, RAY_REGENERATED)) {
                queue_number = QUEUE_ACTIVE_AND_REGENERATED_RAYS;
        }
 
index a7e0c7692a215119c8889ce90f3751f9fa63549d..144cba67e23ceea0a21aab92a1af61e6c9c60933 100644 (file)
@@ -93,7 +93,7 @@ ccl_device void kernel_scene_intersect(KernelGlobals *kg)
 #ifdef __KERNEL_DEBUG__
        DebugData *debug_data = &kernel_split_state.debug_data[ray_index];
 #endif
-       Intersection *isect = &kernel_split_state.isect[ray_index];
+       Intersection isect;
        PathState state = kernel_split_state.path_state[ray_index];
        Ray ray = kernel_split_state.ray[ray_index];
 
@@ -116,16 +116,17 @@ ccl_device void kernel_scene_intersect(KernelGlobals *kg)
                lcg_state = lcg_state_init(&rng, &state, 0x51633e2d);
        }
 
-       bool hit = scene_intersect(kg, ray, visibility, isect, &lcg_state, difl, extmax);
+       bool hit = scene_intersect(kg, ray, visibility, &isect, &lcg_state, difl, extmax);
 #else
-       bool hit = scene_intersect(kg, ray, visibility, isect, NULL, 0.0f, 0.0f);
+       bool hit = scene_intersect(kg, ray, visibility, &isect, NULL, 0.0f, 0.0f);
 #endif
+       kernel_split_state.isect[ray_index] = isect;
 
 #ifdef __KERNEL_DEBUG__
        if(state.flag & PATH_RAY_CAMERA) {
-               debug_data->num_bvh_traversed_nodes += isect->num_traversed_nodes;
-               debug_data->num_bvh_traversed_instances += isect->num_traversed_instances;
-               debug_data->num_bvh_intersections += isect->num_intersections;
+               debug_data->num_bvh_traversed_nodes += isect.num_traversed_nodes;
+               debug_data->num_bvh_traversed_instances += isect.num_traversed_instances;
+               debug_data->num_bvh_intersections += isect.num_intersections;
        }
        debug_data->num_ray_bounces++;
 #endif
index 35ee19ddf1bcbcde9f6649d2c66a74fc3ec0a73c..4bd5c8b6eb06ac5e755615e802ba476ee7e0c0fa 100644 (file)
@@ -76,14 +76,14 @@ ccl_device void kernel_shader_eval(KernelGlobals *kg)
 
        /* Continue on with shader evaluation. */
        if(IS_STATE(kernel_split_state.ray_state, ray_index, RAY_ACTIVE)) {
-               Intersection *isect = &kernel_split_state.isect[ray_index];
+               Intersection isect = kernel_split_state.isect[ray_index];
                ccl_global uint *rng = &kernel_split_state.rng[ray_index];
                ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
                Ray ray = kernel_split_state.ray[ray_index];
 
                shader_setup_from_ray(kg,
                                      &kernel_split_state.sd[ray_index],
-                                     isect,
+                                     &isect,
                                      &ray);
                float rbsdf = path_state_rng_1D_for_decision(kg, rng, state, PRNG_BSDF);
                shader_eval_surface(kg, &kernel_split_state.sd[ray_index], rng, state, rbsdf, state->flag, SHADER_CONTEXT_MAIN);
index d532c7cf55ba36128a6038b2ccfbe8a50b5b9c8d..52f7002acb3cb6eea6442828db4b9ecb8aa76010 100644 (file)
@@ -93,12 +93,14 @@ ccl_device void kernel_shadow_blocked(KernelGlobals *kg)
                                : light_ray_dl_global;
 
                float3 shadow;
+               Ray ray = *light_ray_global;
                update_path_radiance = !(shadow_blocked(kg,
                                                        &kernel_split_state.sd_DL_shadow[thread_index],
                                                        state,
-                                                       light_ray_global,
+                                                       &ray,
                                                        &shadow));
 
+               *light_ray_global = ray;
                /* We use light_ray_global's P and t to store shadow and
                 * update_path_radiance.
                 */
index dd0c3f9c94125758d026172059f020904312394c..5c12fe426ac747eade0ac8196bdf8e2e1d43aef8 100644 (file)
 #include "kernel_passes.h"
 
 #ifdef __SUBSURFACE__
-#include "kernel_subsurface.h"
+#  include "kernel_subsurface.h"
 #endif
 
 #ifdef __VOLUME__
-#include "kernel_volume.h"
+#  include "kernel_volume.h"
 #endif
 
 #include "kernel_path_state.h"
 #include "kernel_path_common.h"
 #include "kernel_path_surface.h"
 #include "kernel_path_volume.h"
+#include "kernel_path_subsurface.h"
 
 #ifdef __KERNEL_DEBUG__
-#include "kernel_debug.h"
+#  include "kernel_debug.h"
 #endif
 
 #include "kernel_queues.h"
index 5380c0c5de60d9c99271fbf3e69f6e95b50ef047..81dcdbaeddee8d291eed619129809368de642712 100644 (file)
@@ -31,6 +31,14 @@ ccl_device_inline size_t split_data_buffer_size(KernelGlobals *kg, size_t num_el
        size = size SPLIT_DATA_ENTRIES;
 #undef SPLIT_DATA_ENTRY
 
+#ifdef __SUBSURFACE__
+       size += align_up(num_elements * sizeof(SubsurfaceIndirectRays), 16); /* ss_rays */
+#endif
+
+#ifdef __VOLUME__
+       size += align_up(2 * num_elements * sizeof(PathState), 16); /* state_shadow */
+#endif
+
        return size;
 }
 
@@ -46,9 +54,19 @@ ccl_device_inline void split_data_init(KernelGlobals *kg,
 
 #define SPLIT_DATA_ENTRY(type, name, num) \
        split_data->name = (type*)p; p += align_up(num_elements * num * sizeof(type), 16);
-       SPLIT_DATA_ENTRIES
+       SPLIT_DATA_ENTRIES;
 #undef SPLIT_DATA_ENTRY
 
+#ifdef __SUBSURFACE__
+       split_data->ss_rays = (ccl_global SubsurfaceIndirectRays*)p;
+       p += align_up(num_elements * sizeof(SubsurfaceIndirectRays), 16);
+#endif
+
+#ifdef __VOLUME__
+       split_data->state_shadow = (ccl_global PathState*)p;
+       p += align_up(2 * num_elements * sizeof(PathState), 16);
+#endif
+
        split_data->ray_state = ray_state;
 }
 
index 62e3ea45ae276d8960b5305a590c37517a3e1552..b39ed4995dcce3fbe85becd079545981de3af4b3 100644 (file)
@@ -68,14 +68,13 @@ typedef struct SplitParams {
        SPLIT_DATA_ENTRY(PathRadiance, path_radiance, 1) \
        SPLIT_DATA_ENTRY(ccl_global Ray, ray, 1) \
        SPLIT_DATA_ENTRY(ccl_global PathState, path_state, 1) \
-       SPLIT_DATA_ENTRY(Intersection, isect, 1) \
+       SPLIT_DATA_ENTRY(ccl_global Intersection, isect, 1) \
        SPLIT_DATA_ENTRY(ccl_global float3, ao_alpha, 1) \
        SPLIT_DATA_ENTRY(ccl_global float3, ao_bsdf, 1) \
        SPLIT_DATA_ENTRY(ccl_global Ray, ao_light_ray, 1) \
        SPLIT_DATA_ENTRY(ccl_global BsdfEval, bsdf_eval, 1) \
        SPLIT_DATA_ENTRY(ccl_global int, is_lamp, 1) \
        SPLIT_DATA_ENTRY(ccl_global Ray, light_ray, 1) \
-       SPLIT_DATA_ENTRY(Intersection, isect_shadow, 2) \
        SPLIT_DATA_ENTRY(ccl_global int, queue_data, (NUM_QUEUES*2)) /* TODO(mai): this is too large? */ \
        SPLIT_DATA_ENTRY(ccl_global uint, work_array, 1) \
        SPLIT_DATA_ENTRY(ShaderData, sd, 1) \
@@ -88,6 +87,14 @@ typedef struct SplitData {
        SPLIT_DATA_ENTRIES
 #undef SPLIT_DATA_ENTRY
 
+#ifdef __SUBSURFACE__
+       ccl_global SubsurfaceIndirectRays *ss_rays;
+#endif
+
+#ifdef __VOLUME__
+       ccl_global PathState *state_shadow;
+#endif
+
        /* this is actually in a separate buffer from the rest of the split state data (so it can be read back from
         * the host easily) but is still used the same as the other data so we have it here in this struct as well
         */
diff --git a/intern/cycles/kernel/split/kernel_subsurface_scatter.h b/intern/cycles/kernel/split/kernel_subsurface_scatter.h
new file mode 100644 (file)
index 0000000..fcdd805
--- /dev/null
@@ -0,0 +1,86 @@
+
+
+CCL_NAMESPACE_BEGIN
+
+
+ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
+{
+#ifdef __SUBSURFACE__
+
+       ccl_local unsigned int local_queue_atomics;
+       if(ccl_local_id(0) == 0 && ccl_local_id(1) == 0) {
+               local_queue_atomics = 0;
+       }
+       ccl_barrier(CCL_LOCAL_MEM_FENCE);
+
+       int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
+       ray_index = get_ray_index(kg, ray_index,
+                                 QUEUE_ACTIVE_AND_REGENERATED_RAYS,
+                                 kernel_split_state.queue_data,
+                                 kernel_split_params.queue_size,
+                                 0);
+
+#ifdef __COMPUTE_DEVICE_GPU__
+       /* If we are executing on a GPU device, we exit all threads that are not
+        * required.
+        *
+        * If we are executing on a CPU device, then we need to keep all threads
+        * active since we have barrier() calls later in the kernel. CPU devices,
+        * expect all threads to execute barrier statement.
+        */
+       if(ray_index == QUEUE_EMPTY_SLOT) {
+               return;
+       }
+#endif
+
+#ifndef __COMPUTE_DEVICE_GPU__
+       if(ray_index != QUEUE_EMPTY_SLOT) {
+#endif
+
+
+       char enqueue_flag = 0;
+       ccl_global char *ray_state = kernel_split_state.ray_state;
+       ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
+       PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
+       ccl_global RNG *rng = &kernel_split_state.rng[ray_index];
+       ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
+       ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
+       ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
+       ShaderData *sd = &kernel_split_state.sd[ray_index];
+       ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
+
+       if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
+               if(sd->flag & SD_BSSRDF) {
+                       if(kernel_path_subsurface_scatter(kg,
+                                                         sd,
+                                                         emission_sd,
+                                                         L,
+                                                         state,
+                                                         rng,
+                                                         ray,
+                                                         throughput,
+                                                         ss_indirect)) {
+                               ASSIGN_RAY_STATE(ray_state, ray_index, RAY_UPDATE_BUFFER);
+                               enqueue_flag = 1;
+                       }
+               }
+       }
+
+#ifndef __COMPUTE_DEVICE_GPU__
+       }
+#endif
+
+       /* Enqueue RAY_UPDATE_BUFFER rays. */
+       enqueue_ray_index_local(ray_index,
+                               QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
+                               enqueue_flag,
+                               kernel_split_params.queue_size,
+                               &local_queue_atomics,
+                               kernel_split_state.queue_data,
+                               kernel_split_params.queue_index);
+
+#endif  /* __SUBSURFACE__ */
+
+}
+
+CCL_NAMESPACE_END