Cycles: Split path initialization into own kernel
authorMai Lavelle <mai.lavelle@gmail.com>
Wed, 1 Mar 2017 06:05:55 +0000 (01:05 -0500)
committerMai Lavelle <mai.lavelle@gmail.com>
Wed, 8 Mar 2017 06:30:43 +0000 (01:30 -0500)
This makes it easier to initialize things correctly in the data_init kernel
before they are needed by path tracing.

intern/cycles/device/device_split_kernel.cpp
intern/cycles/device/device_split_kernel.h
intern/cycles/kernel/CMakeLists.txt
intern/cycles/kernel/kernels/cpu/kernel_cpu.h
intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h
intern/cycles/kernel/kernels/cuda/kernel_split.cu
intern/cycles/kernel/kernels/opencl/kernel_path_init.cl [new file with mode: 0644]
intern/cycles/kernel/split/kernel_data_init.h
intern/cycles/kernel/split/kernel_path_init.h [new file with mode: 0644]

index c50afe85da5314a84a8c8aa2a4d942ae50e54016..85da7024a2ce1c4c1ceabc47eca9490ccecd0fbe 100644 (file)
@@ -41,6 +41,7 @@ DeviceSplitKernel::~DeviceSplitKernel()
        device->mem_free(queue_index);
        device->mem_free(work_pool_wgs);
 
+       delete kernel_path_init;
        delete kernel_scene_intersect;
        delete kernel_lamp_emission;
        delete kernel_queue_enqueue;
@@ -61,6 +62,7 @@ bool DeviceSplitKernel::load_kernels(const DeviceRequestedFeatures& requested_fe
                        return false; \
                }
 
+       LOAD_KERNEL(path_init);
        LOAD_KERNEL(scene_intersect);
        LOAD_KERNEL(lamp_emission);
        LOAD_KERNEL(queue_enqueue);
@@ -200,6 +202,8 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
                        return false;
                }
 
+               ENQUEUE_SPLIT_KERNEL(path_init, global_size, local_size);
+
                bool activeRaysAvailable = true;
 
                while(activeRaysAvailable) {
index b3106fd56329725ef13a00792c588decb0b7b187..1903574f0b53ec5202970786e1cbeffc4379b6ea 100644 (file)
@@ -55,6 +55,7 @@ class DeviceSplitKernel {
 private:
        Device *device;
 
+       SplitKernelFunction *kernel_path_init;
        SplitKernelFunction *kernel_scene_intersect;
        SplitKernelFunction *kernel_lamp_emission;
        SplitKernelFunction *kernel_queue_enqueue;
index 685955170b5b4b466f75a1e713695d93114cb427..d467e40b3e94f443829b903cbece889ae7aeba23 100644 (file)
@@ -16,6 +16,7 @@ set(SRC
        kernels/cpu/kernel_split.cpp
        kernels/opencl/kernel.cl
        kernels/opencl/kernel_data_init.cl
+       kernels/opencl/kernel_path_init.cl
        kernels/opencl/kernel_queue_enqueue.cl
        kernels/opencl/kernel_scene_intersect.cl
        kernels/opencl/kernel_lamp_emission.cl
@@ -201,6 +202,7 @@ set(SRC_SPLIT_HEADERS
        split/kernel_holdout_emission_blurring_pathtermination_ao.h
        split/kernel_lamp_emission.h
        split/kernel_next_iteration_setup.h
+       split/kernel_path_init.h
        split/kernel_queue_enqueue.h
        split/kernel_scene_intersect.h
        split/kernel_shader_eval.h
@@ -400,6 +402,7 @@ endif()
 
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_data_init.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
+delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_path_init.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_queue_enqueue.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_scene_intersect.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
 delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernels/opencl/kernel_lamp_emission.cl" ${CYCLES_INSTALL_PATH}/kernel/kernels/opencl)
index 1d7101578174007320f3156b0a38c5e96372a1de..8c1675665cb2df307a00e1ddc853545f3b8a6229 100644 (file)
@@ -71,6 +71,7 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
 #define DECLARE_SPLIT_KERNEL_FUNCTION(name) \
        void KERNEL_FUNCTION_FULL_NAME(name)(KernelGlobals *kg, KernelData *data);
 
+DECLARE_SPLIT_KERNEL_FUNCTION(path_init)
 DECLARE_SPLIT_KERNEL_FUNCTION(scene_intersect)
 DECLARE_SPLIT_KERNEL_FUNCTION(lamp_emission)
 DECLARE_SPLIT_KERNEL_FUNCTION(queue_enqueue)
index c59f48925464d789c94dc40d1c503b139cc65731..f6e0591ef24bb658006d7cf41e50ce59ec0be0b3 100644 (file)
@@ -38,6 +38,7 @@
 #  include "split/kernel_split_common.h"
 
 #  include "split/kernel_data_init.h"
+#  include "split/kernel_path_init.h"
 #  include "split/kernel_scene_intersect.h"
 #  include "split/kernel_lamp_emission.h"
 #  include "split/kernel_queue_enqueue.h"
@@ -163,6 +164,7 @@ void KERNEL_FUNCTION_FULL_NAME(shader)(KernelGlobals *kg,
                kernel_##name(kg); \
        }
 
+DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
 DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
 DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
 DEFINE_SPLIT_KERNEL_FUNCTION(queue_enqueue)
@@ -186,6 +188,7 @@ void KERNEL_FUNCTION_FULL_NAME(register_functions)(void(*reg)(const char* name,
        REGISTER(shader);
 
        REGISTER(data_init);
+       REGISTER(path_init);
        REGISTER(scene_intersect);
        REGISTER(lamp_emission);
        REGISTER(queue_enqueue);
index 441cd96fafa0283ae0154fa7a3644696683cae7e..3a883265157e1ec3cf9d1e859db2289e81ec6cae 100644 (file)
@@ -25,6 +25,7 @@
 
 #include "../../split/kernel_split_common.h"
 #include "../../split/kernel_data_init.h"
+#include "../../split/kernel_path_init.h"
 #include "../../split/kernel_scene_intersect.h"
 #include "../../split/kernel_lamp_emission.h"
 #include "../../split/kernel_queue_enqueue.h"
@@ -81,6 +82,7 @@ kernel_cuda_path_trace_data_init(
                kernel_##name(NULL); \
        }
 
+DEFINE_SPLIT_KERNEL_FUNCTION(path_init)
 DEFINE_SPLIT_KERNEL_FUNCTION(scene_intersect)
 DEFINE_SPLIT_KERNEL_FUNCTION(lamp_emission)
 DEFINE_SPLIT_KERNEL_FUNCTION(queue_enqueue)
diff --git a/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl b/intern/cycles/kernel/kernels/opencl/kernel_path_init.cl
new file mode 100644 (file)
index 0000000..7e9e4a0
--- /dev/null
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kernel_compat_opencl.h"
+#include "split/kernel_split_common.h"
+#include "split/kernel_path_init.h"
+
+__kernel void kernel_ocl_path_trace_path_init(
+        KernelGlobals *kg,
+        ccl_constant KernelData *data)
+{
+       kernel_path_init(kg);
+}
index 5604363dcd98da227d7f8e192309f5840fecd2bb..982c7be2008549aa5b8e15e8062e1406069736e7 100644 (file)
@@ -18,33 +18,6 @@ CCL_NAMESPACE_BEGIN
 
 /* Note on kernel_data_initialization kernel
  * This kernel Initializes structures needed in path-iteration kernels.
- * This is the first kernel in ray-tracing logic.
- *
- * Ray state of rays outside the tile-boundary will be marked RAY_INACTIVE
- *
- * Its input and output are as follows,
- *
- * Un-initialized rng---------------|--- kernel_data_initialization ---|--- Initialized rng
- * Un-initialized throughput -------|                                  |--- Initialized throughput
- * Un-initialized L_transparent ----|                                  |--- Initialized L_transparent
- * Un-initialized PathRadiance -----|                                  |--- Initialized PathRadiance
- * Un-initialized Ray --------------|                                  |--- Initialized Ray
- * Un-initialized PathState --------|                                  |--- Initialized PathState
- * Un-initialized QueueData --------|                                  |--- Initialized QueueData (to QUEUE_EMPTY_SLOT)
- * Un-initialized QueueIndex -------|                                  |--- Initialized QueueIndex (to 0)
- * Un-initialized use_queues_flag---|                                  |--- Initialized use_queues_flag (to false)
- * Un-initialized ray_state --------|                                  |--- Initialized ray_state
- * parallel_samples --------------- |                                  |--- Initialized per_sample_output_buffers
- * rng_state -----------------------|                                  |--- Initialized work_array
- * data ----------------------------|                                  |--- Initialized work_pool_wgs
- * start_sample --------------------|                                  |
- * sx ------------------------------|                                  |
- * sy ------------------------------|                                  |
- * sw ------------------------------|                                  |
- * sh ------------------------------|                                  |
- * stride --------------------------|                                  |
- * queuesize -----------------------|                                  |
- * num_samples ---------------------|                                  |
  *
  * Note on Queues :
  * All slots in queues are initialized to queue empty slot;
@@ -137,80 +110,6 @@ void KERNEL_FUNCTION_FULL_NAME(data_init)(
                 */
                *use_queues_flag = 0;
        }
-
-       int ray_index = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0);
-
-       /* This is the first assignment to ray_state;
-        * So we dont use ASSIGN_RAY_STATE macro.
-        */
-       kernel_split_state.ray_state[ray_index] = RAY_ACTIVE;
-
-       unsigned int my_sample;
-       unsigned int pixel_x;
-       unsigned int pixel_y;
-       unsigned int tile_x;
-       unsigned int tile_y;
-       unsigned int my_sample_tile;
-
-       unsigned int work_index = 0;
-       /* Get work. */
-       if(!get_next_work(kg, &work_index, ray_index)) {
-               /* No more work, mark ray as inactive */
-               kernel_split_state.ray_state[ray_index] = RAY_INACTIVE;
-
-               return;
-       }
-
-       /* Get the sample associated with the work. */
-       my_sample = get_work_sample(kg, work_index, ray_index) + start_sample;
-
-       my_sample_tile = 0;
-
-       /* Get pixel and tile position associated with the work. */
-       get_work_pixel_tile_position(kg, &pixel_x, &pixel_y,
-                               &tile_x, &tile_y,
-                               work_index,
-                               ray_index);
-       kernel_split_state.work_array[ray_index] = work_index;
-
-       rng_state += kernel_split_params.offset + pixel_x + pixel_y*stride;
-
-       ccl_global float *per_sample_output_buffers = kernel_split_state.per_sample_output_buffers;
-       per_sample_output_buffers += ((tile_x + (tile_y * stride)) + (my_sample_tile)) * kernel_data.film.pass_stride;
-
-       /* Initialize random numbers and ray. */
-       kernel_path_trace_setup(kg,
-                               rng_state,
-                               my_sample,
-                               pixel_x, pixel_y,
-                               &kernel_split_state.rng[ray_index],
-                               &kernel_split_state.ray[ray_index]);
-
-       if(kernel_split_state.ray[ray_index].t != 0.0f) {
-               /* Initialize throughput, L_transparent, Ray, PathState;
-                * These rays proceed with path-iteration.
-                */
-               kernel_split_state.throughput[ray_index] = make_float3(1.0f, 1.0f, 1.0f);
-               kernel_split_state.L_transparent[ray_index] = 0.0f;
-               path_radiance_init(&kernel_split_state.path_radiance[ray_index], kernel_data.film.use_light_pass);
-               path_state_init(kg,
-                               &kernel_split_state.sd_DL_shadow[ray_index],
-                               &kernel_split_state.path_state[ray_index],
-                               &kernel_split_state.rng[ray_index],
-                               my_sample,
-                               &kernel_split_state.ray[ray_index]);
-#ifdef __KERNEL_DEBUG__
-               debug_data_init(&kernel_split_state.debug_data[ray_index]);
-#endif
-       }
-       else {
-               /* These rays do not participate in path-iteration. */
-               float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
-               /* Accumulate result in output buffer. */
-               kernel_write_pass_float4(per_sample_output_buffers, my_sample, L_rad);
-               path_rng_end(kg, rng_state, kernel_split_state.rng[ray_index]);
-               ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE);
-       }
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/split/kernel_path_init.h b/intern/cycles/kernel/split/kernel_path_init.h
new file mode 100644 (file)
index 0000000..e613db2
--- /dev/null
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+/* This kernel initializes structures needed in path-iteration kernels.
+ * This is the first kernel in ray-tracing logic.
+ *
+ * Ray state of rays outside the tile-boundary will be marked RAY_INACTIVE
+ */
+
+ccl_device void kernel_path_init(KernelGlobals *kg) {
+       int ray_index = ccl_global_id(0) + ccl_global_id(1) * ccl_global_size(0);
+
+       /* This is the first assignment to ray_state;
+        * So we dont use ASSIGN_RAY_STATE macro.
+        */
+       kernel_split_state.ray_state[ray_index] = RAY_ACTIVE;
+
+       unsigned int my_sample;
+       unsigned int pixel_x;
+       unsigned int pixel_y;
+       unsigned int tile_x;
+       unsigned int tile_y;
+       unsigned int my_sample_tile;
+
+       unsigned int work_index = 0;
+       /* Get work. */
+       if(!get_next_work(kg, &work_index, ray_index)) {
+               /* No more work, mark ray as inactive */
+               kernel_split_state.ray_state[ray_index] = RAY_INACTIVE;
+
+               return;
+       }
+
+       /* Get the sample associated with the work. */
+       my_sample = get_work_sample(kg, work_index, ray_index) + kernel_split_params.start_sample;
+
+       my_sample_tile = 0;
+
+       /* Get pixel and tile position associated with the work. */
+       get_work_pixel_tile_position(kg, &pixel_x, &pixel_y,
+                               &tile_x, &tile_y,
+                               work_index,
+                               ray_index);
+       kernel_split_state.work_array[ray_index] = work_index;
+
+       ccl_global uint *rng_state = kernel_split_params.rng_state;
+       rng_state += kernel_split_params.offset + pixel_x + pixel_y*kernel_split_params.stride;
+
+       ccl_global float *per_sample_output_buffers = kernel_split_state.per_sample_output_buffers;
+       per_sample_output_buffers += (tile_x + tile_y * kernel_split_params.stride + my_sample_tile)
+                                    * kernel_data.film.pass_stride;
+
+       /* Initialize random numbers and ray. */
+       kernel_path_trace_setup(kg,
+                               rng_state,
+                               my_sample,
+                               pixel_x, pixel_y,
+                               &kernel_split_state.rng[ray_index],
+                               &kernel_split_state.ray[ray_index]);
+
+       if(kernel_split_state.ray[ray_index].t != 0.0f) {
+               /* Initialize throughput, L_transparent, Ray, PathState;
+                * These rays proceed with path-iteration.
+                */
+               kernel_split_state.throughput[ray_index] = make_float3(1.0f, 1.0f, 1.0f);
+               kernel_split_state.L_transparent[ray_index] = 0.0f;
+               path_radiance_init(&kernel_split_state.path_radiance[ray_index], kernel_data.film.use_light_pass);
+               path_state_init(kg,
+                               &kernel_split_state.sd_DL_shadow[ray_index],
+                               &kernel_split_state.path_state[ray_index],
+                               &kernel_split_state.rng[ray_index],
+                               my_sample,
+                               &kernel_split_state.ray[ray_index]);
+#ifdef __KERNEL_DEBUG__
+               debug_data_init(&kernel_split_state.debug_data[ray_index]);
+#endif
+       }
+       else {
+               /* These rays do not participate in path-iteration. */
+               float4 L_rad = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+               /* Accumulate result in output buffer. */
+               kernel_write_pass_float4(per_sample_output_buffers, my_sample, L_rad);
+               path_rng_end(kg, rng_state, kernel_split_state.rng[ray_index]);
+               ASSIGN_RAY_STATE(kernel_split_state.ray_state, ray_index, RAY_TO_REGENERATE);
+       }
+}
+
+CCL_NAMESPACE_END
+