Cycles: Replace __MAX_CLOSURE__ build option with runtime integrator variable
[blender.git] / intern / cycles / kernel / split / kernel_do_volume.h
1 /*
2  * Copyright 2011-2017 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 #if defined(__BRANCHED_PATH__) && defined(__VOLUME__)
20
21 ccl_device_inline void kernel_split_branched_path_volume_indirect_light_init(KernelGlobals *kg, int ray_index)
22 {
23         kernel_split_branched_path_indirect_loop_init(kg, ray_index);
24
25         ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_VOLUME_INDIRECT);
26 }
27
28 ccl_device_noinline bool kernel_split_branched_path_volume_indirect_light_iter(KernelGlobals *kg, int ray_index)
29 {
30         SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
31
32         ShaderData *sd = kernel_split_sd(sd, ray_index);
33         PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
34         ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
35
36         /* GPU: no decoupled ray marching, scatter probalistically */
37         int num_samples = kernel_data.integrator.volume_samples;
38         float num_samples_inv = 1.0f/num_samples;
39
40         Ray volume_ray = branched_state->ray;
41         volume_ray.t = (!IS_STATE(&branched_state->ray_state, 0, RAY_HIT_BACKGROUND)) ? branched_state->isect.t : FLT_MAX;
42
43         bool heterogeneous = volume_stack_is_heterogeneous(kg, branched_state->path_state.volume_stack);
44
45         for(int j = branched_state->next_sample; j < num_samples; j++) {
46                 ccl_global PathState *ps = &kernel_split_state.path_state[ray_index];
47                 *ps = branched_state->path_state;
48
49                 ccl_global Ray *pray = &kernel_split_state.ray[ray_index];
50                 *pray = branched_state->ray;
51
52                 ccl_global float3 *tp = &kernel_split_state.throughput[ray_index];
53                 *tp = branched_state->throughput * num_samples_inv;
54
55                 /* branch RNG state */
56                 path_state_branch(ps, j, num_samples);
57
58                 /* integrate along volume segment with distance sampling */
59                 VolumeIntegrateResult result = kernel_volume_integrate(
60                         kg, ps, sd, &volume_ray, L, tp, heterogeneous);
61
62 #  ifdef __VOLUME_SCATTER__
63                 if(result == VOLUME_PATH_SCATTERED) {
64                         /* direct lighting */
65                         kernel_path_volume_connect_light(kg, sd, emission_sd, *tp, &branched_state->path_state, L);
66
67                         /* indirect light bounce */
68                         if(!kernel_path_volume_bounce(kg, sd, tp, ps, &L->state, pray)) {
69                                 continue;
70                         }
71
72                         /* start the indirect path */
73                         branched_state->next_closure = 0;
74                         branched_state->next_sample = j+1;
75
76                         /* Attempting to share too many samples is slow for volumes as it causes us to
77                          * loop here more and have many calls to kernel_volume_integrate which evaluates
78                          * shaders. The many expensive shader evaluations cause the work load to become
79                          * unbalanced and many threads to become idle in this kernel. Limiting the
80                          * number of shared samples here helps quite a lot.
81                          */
82                         if(branched_state->shared_sample_count < 2) {
83                                 if(kernel_split_branched_indirect_start_shared(kg, ray_index)) {
84                                         continue;
85                                 }
86                         }
87
88                         return true;
89                 }
90 #  endif
91         }
92
93         branched_state->next_sample = num_samples;
94
95         branched_state->waiting_on_shared_samples = (branched_state->shared_sample_count > 0);
96         if(branched_state->waiting_on_shared_samples) {
97                 return true;
98         }
99
100         kernel_split_branched_path_indirect_loop_end(kg, ray_index);
101
102         /* todo: avoid this calculation using decoupled ray marching */
103         float3 throughput = kernel_split_state.throughput[ray_index];
104         kernel_volume_shadow(kg, emission_sd, &kernel_split_state.path_state[ray_index], &volume_ray, &throughput);
105         kernel_split_state.throughput[ray_index] = throughput;
106
107         return false;
108 }
109
110 #endif  /* __BRANCHED_PATH__ && __VOLUME__ */
111
112 ccl_device void kernel_do_volume(KernelGlobals *kg)
113 {
114 #ifdef __VOLUME__
115         /* We will empty this queue in this kernel. */
116         if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
117                 kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
118 #  ifdef __BRANCHED_PATH__
119                 kernel_split_params.queue_index[QUEUE_VOLUME_INDIRECT_ITER] = 0;
120 #  endif  /* __BRANCHED_PATH__ */
121         }
122
123         int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
124
125         if(*kernel_split_params.use_queues_flag) {
126                 ray_index = get_ray_index(kg, ray_index,
127                                           QUEUE_ACTIVE_AND_REGENERATED_RAYS,
128                                           kernel_split_state.queue_data,
129                                           kernel_split_params.queue_size,
130                                           1);
131         }
132
133         ccl_global char *ray_state = kernel_split_state.ray_state;
134
135         PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
136         ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
137
138         if(IS_STATE(ray_state, ray_index, RAY_ACTIVE) ||
139            IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND)) {
140                 ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
141                 ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
142                 ccl_global Intersection *isect = &kernel_split_state.isect[ray_index];
143                 ShaderData *sd = kernel_split_sd(sd, ray_index);
144                 ShaderData *emission_sd = AS_SHADER_DATA(&kernel_split_state.sd_DL_shadow[ray_index]);
145
146                 bool hit = ! IS_STATE(ray_state, ray_index, RAY_HIT_BACKGROUND);
147
148                 /* Sanitize volume stack. */
149                 if(!hit) {
150                         kernel_volume_clean_stack(kg, state->volume_stack);
151                 }
152                 /* volume attenuation, emission, scatter */
153                 if(state->volume_stack[0].shader != SHADER_NONE) {
154                         Ray volume_ray = *ray;
155                         volume_ray.t = (hit)? isect->t: FLT_MAX;
156
157 #  ifdef __BRANCHED_PATH__
158                         if(!kernel_data.integrator.branched || IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
159 #  endif  /* __BRANCHED_PATH__ */
160                                 bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
161
162                                 {
163                                         /* integrate along volume segment with distance sampling */
164                                         VolumeIntegrateResult result = kernel_volume_integrate(
165                                                 kg, state, sd, &volume_ray, L, throughput, heterogeneous);
166
167 #  ifdef __VOLUME_SCATTER__
168                                         if(result == VOLUME_PATH_SCATTERED) {
169                                                 /* direct lighting */
170                                                 kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L);
171
172                                                 /* indirect light bounce */
173                                                 if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray)) {
174                                                         ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
175                                                 }
176                                                 else {
177                                                         kernel_split_path_end(kg, ray_index);
178                                                 }
179                                         }
180 #  endif  /* __VOLUME_SCATTER__ */
181                                 }
182
183 #  ifdef __BRANCHED_PATH__
184                         }
185                         else {
186                                 kernel_split_branched_path_volume_indirect_light_init(kg, ray_index);
187
188                                 if(kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) {
189                                         ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
190                                 }
191                         }
192 #  endif  /* __BRANCHED_PATH__ */
193                 }
194         }
195
196 #  ifdef __BRANCHED_PATH__
197         /* iter loop */
198         ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
199                                   QUEUE_VOLUME_INDIRECT_ITER,
200                                   kernel_split_state.queue_data,
201                                   kernel_split_params.queue_size,
202                                   1);
203
204         if(IS_STATE(ray_state, ray_index, RAY_VOLUME_INDIRECT_NEXT_ITER)) {
205                 /* for render passes, sum and reset indirect light pass variables
206                  * for the next samples */
207                 path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]);
208                 path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]);
209
210                 if(kernel_split_branched_path_volume_indirect_light_iter(kg, ray_index)) {
211                         ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
212                 }
213         }
214 #  endif  /* __BRANCHED_PATH__ */
215
216 #endif  /* __VOLUME__ */
217 }
218
219
220 CCL_NAMESPACE_END