2 * Copyright 2011-2015 Blender Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "split/kernel_background_buffer_update.h"
19 __kernel void kernel_ocl_path_trace_background_buffer_update(
21 ccl_constant KernelData *data,
22 ccl_global float *per_sample_output_buffers,
23 ccl_global uint *rng_state,
24 ccl_global uint *rng_coop, /* Required for buffer Update */
25 ccl_global float3 *throughput_coop, /* Required for background hit processing */
26 PathRadiance *PathRadiance_coop, /* Required for background hit processing and buffer Update */
27 ccl_global Ray *Ray_coop, /* Required for background hit processing */
28 ccl_global PathState *PathState_coop, /* Required for background hit processing */
29 ccl_global float *L_transparent_coop, /* Required for background hit processing and buffer Update */
30 ccl_global char *ray_state, /* Stores information on the current state of a ray */
31 int sw, int sh, int sx, int sy, int stride,
32 int rng_state_offset_x,
33 int rng_state_offset_y,
35 ccl_global unsigned int *work_array, /* Denotes work of each ray */
36 ccl_global int *Queue_data, /* Queues memory */
37 ccl_global int *Queue_index, /* Tracks the number of elements in each queue */
38 int queuesize, /* Size (capacity) of each queue */
41 #ifdef __WORK_STEALING__
42 ccl_global unsigned int *work_pool_wgs,
43 unsigned int num_samples,
45 #ifdef __KERNEL_DEBUG__
46 DebugData *debugdata_coop,
48 int parallel_samples) /* Number of samples to be processed in parallel */
50 ccl_local unsigned int local_queue_atomics;
51 if(get_local_id(0) == 0 && get_local_id(1) == 0) {
52 local_queue_atomics = 0;
54 barrier(CLK_LOCAL_MEM_FENCE);
56 int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0);
58 /* We will empty this queue in this kernel. */
59 Queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
61 char enqueue_flag = 0;
62 ray_index = get_ray_index(ray_index,
63 QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
68 #ifdef __COMPUTE_DEVICE_GPU__
69 /* If we are executing on a GPU device, we exit all threads that are not
72 * If we are executing on a CPU device, then we need to keep all threads
73 * active since we have barrier() calls later in the kernel. CPU devices,
74 * expect all threads to execute barrier statement.
76 if(ray_index == QUEUE_EMPTY_SLOT) {
81 #ifndef __COMPUTE_DEVICE_GPU__
82 if(ray_index != QUEUE_EMPTY_SLOT) {
85 kernel_background_buffer_update((KernelGlobals *)kg,
86 per_sample_output_buffers,
95 sw, sh, sx, sy, stride,
102 #ifdef __WORK_STEALING__
106 #ifdef __KERNEL_DEBUG__
111 #ifndef __COMPUTE_DEVICE_GPU__
115 /* Enqueue RAY_REGENERATED rays into QUEUE_ACTIVE_AND_REGENERATED_RAYS;
116 * These rays will be made active during next SceneIntersectkernel.
118 enqueue_ray_index_local(ray_index,
119 QUEUE_ACTIVE_AND_REGENERATED_RAYS,
122 &local_queue_atomics,