a3eecd3128b998d91f307d60e6927331667bc56e
[blender.git] / intern / cycles / kernel / kernels / opencl / kernel_background_buffer_update.cl
1 /*
2  * Copyright 2011-2015 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "split/kernel_background_buffer_update.h"
18
19 __kernel void kernel_ocl_path_trace_background_buffer_update(
20         ccl_global char *kg,
21         ccl_constant KernelData *data,
22         ccl_global char *sd,
23         ccl_global float *per_sample_output_buffers,
24         ccl_global uint *rng_state,
25         ccl_global uint *rng_coop,             /* Required for buffer Update */
26         ccl_global float3 *throughput_coop,    /* Required for background hit processing */
27         PathRadiance *PathRadiance_coop,       /* Required for background hit processing and buffer Update */
28         ccl_global Ray *Ray_coop,              /* Required for background hit processing */
29         ccl_global PathState *PathState_coop,  /* Required for background hit processing */
30         ccl_global float *L_transparent_coop,  /* Required for background hit processing and buffer Update */
31         ccl_global char *ray_state,            /* Stores information on the current state of a ray */
32         int sw, int sh, int sx, int sy, int stride,
33         int rng_state_offset_x,
34         int rng_state_offset_y,
35         int rng_state_stride,
36         ccl_global unsigned int *work_array,   /* Denotes work of each ray */
37         ccl_global int *Queue_data,            /* Queues memory */
38         ccl_global int *Queue_index,           /* Tracks the number of elements in each queue */
39         int queuesize,                         /* Size (capacity) of each queue */
40         int end_sample,
41         int start_sample,
42 #ifdef __WORK_STEALING__
43         ccl_global unsigned int *work_pool_wgs,
44         unsigned int num_samples,
45 #endif
46 #ifdef __KERNEL_DEBUG__
47         DebugData *debugdata_coop,
48 #endif
49         int parallel_samples)                  /* Number of samples to be processed in parallel */
50 {
51         ccl_local unsigned int local_queue_atomics;
52         if(get_local_id(0) == 0 && get_local_id(1) == 0) {
53                 local_queue_atomics = 0;
54         }
55         barrier(CLK_LOCAL_MEM_FENCE);
56
57         int ray_index = get_global_id(1) * get_global_size(0) + get_global_id(0);
58         if(ray_index == 0) {
59                 /* We will empty this queue in this kernel. */
60                 Queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
61         }
62         char enqueue_flag = 0;
63         ray_index = get_ray_index(ray_index,
64                                   QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
65                                   Queue_data,
66                                   queuesize,
67                                   1);
68
69 #ifdef __COMPUTE_DEVICE_GPU__
70         /* If we are executing on a GPU device, we exit all threads that are not
71          * required.
72          *
73          * If we are executing on a CPU device, then we need to keep all threads
74          * active since we have barrier() calls later in the kernel. CPU devices,
75          * expect all threads to execute barrier statement.
76          */
77         if(ray_index == QUEUE_EMPTY_SLOT) {
78                 return;
79         }
80 #endif
81
82 #ifndef __COMPUTE_DEVICE_GPU__
83         if(ray_index != QUEUE_EMPTY_SLOT) {
84 #endif
85                 enqueue_flag =
86                         kernel_background_buffer_update((KernelGlobals *)kg,
87                                                         (ShaderData *)sd,
88                                                         per_sample_output_buffers,
89                                                         rng_state,
90                                                         rng_coop,
91                                                         throughput_coop,
92                                                         PathRadiance_coop,
93                                                         Ray_coop,
94                                                         PathState_coop,
95                                                         L_transparent_coop,
96                                                         ray_state,
97                                                         sw, sh, sx, sy, stride,
98                                                         rng_state_offset_x,
99                                                         rng_state_offset_y,
100                                                         rng_state_stride,
101                                                         work_array,
102                                                         end_sample,
103                                                         start_sample,
104 #ifdef __WORK_STEALING__
105                                                         work_pool_wgs,
106                                                         num_samples,
107 #endif
108 #ifdef __KERNEL_DEBUG__
109                                                         debugdata_coop,
110 #endif
111                                                         parallel_samples,
112                                                         ray_index);
113 #ifndef __COMPUTE_DEVICE_GPU__
114         }
115 #endif
116
117         /* Enqueue RAY_REGENERATED rays into QUEUE_ACTIVE_AND_REGENERATED_RAYS;
118          * These rays will be made active during next SceneIntersectkernel.
119          */
120         enqueue_ray_index_local(ray_index,
121                                 QUEUE_ACTIVE_AND_REGENERATED_RAYS,
122                                 enqueue_flag,
123                                 queuesize,
124                                 &local_queue_atomics,
125                                 Queue_data,
126                                 Queue_index);
127 }