2 * Copyright 2011-2015 Blender Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "split/kernel_background_buffer_update.h"
19 __kernel void kernel_ocl_path_trace_background_buffer_update(
21 ccl_constant KernelData *data,
23 ccl_global float *per_sample_output_buffers,
24 ccl_global uint *rng_state,
25 ccl_global uint *rng_coop, /* Required for buffer Update */
26 ccl_global float3 *throughput_coop, /* Required for background hit processing */
27 PathRadiance *PathRadiance_coop, /* Required for background hit processing and buffer Update */
28 ccl_global Ray *Ray_coop, /* Required for background hit processing */
29 ccl_global PathState *PathState_coop, /* Required for background hit processing */
30 ccl_global float *L_transparent_coop, /* Required for background hit processing and buffer Update */
31 ccl_global char *ray_state, /* Stores information on the current state of a ray */
32 int sw, int sh, int sx, int sy, int stride,
33 int rng_state_offset_x,
34 int rng_state_offset_y,
36 ccl_global unsigned int *work_array, /* Denotes work of each ray */
37 ccl_global int *Queue_data, /* Queues memory */
38 ccl_global int *Queue_index, /* Tracks the number of elements in each queue */
39 int queuesize, /* Size (capacity) of each queue */
42 #ifdef __WORK_STEALING__
43 ccl_global unsigned int *work_pool_wgs,
44 unsigned int num_samples,
46 #ifdef __KERNEL_DEBUG__
47 DebugData *debugdata_coop,
49 int parallel_samples) /* Number of samples to be processed in parallel */
51 ccl_local unsigned int local_queue_atomics;
52 if(get_local_id(0) == 0 && get_local_id(1) == 0) {
53 local_queue_atomics = 0;
55 barrier(CLK_LOCAL_MEM_FENCE);
57 int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0);
59 /* We will empty this queue in this kernel. */
60 Queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
62 char enqueue_flag = 0;
63 ray_index = get_ray_index(ray_index,
64 QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
69 #ifdef __COMPUTE_DEVICE_GPU__
70 /* If we are executing on a GPU device, we exit all threads that are not
73 * If we are executing on a CPU device, then we need to keep all threads
74 * active since we have barrier() calls later in the kernel. CPU devices,
75 * expect all threads to execute barrier statement.
77 if(ray_index == QUEUE_EMPTY_SLOT) {
82 #ifndef __COMPUTE_DEVICE_GPU__
83 if(ray_index != QUEUE_EMPTY_SLOT) {
86 kernel_background_buffer_update((KernelGlobals *)kg,
88 per_sample_output_buffers,
97 sw, sh, sx, sy, stride,
104 #ifdef __WORK_STEALING__
108 #ifdef __KERNEL_DEBUG__
113 #ifndef __COMPUTE_DEVICE_GPU__
117 /* Enqueue RAY_REGENERATED rays into QUEUE_ACTIVE_AND_REGENERATED_RAYS;
118 * These rays will be made active during next SceneIntersectkernel.
120 enqueue_ray_index_local(ray_index,
121 QUEUE_ACTIVE_AND_REGENERATED_RAYS,
124 &local_queue_atomics,