Cycles: Branched path tracing for the split kernel
[blender.git] / intern / cycles / kernel / split / kernel_subsurface_scatter.h
1 /*
2  * Copyright 2011-2017 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 #if defined(__BRANCHED_PATH__) && defined(__SUBSURFACE__)
20
21 ccl_device_inline void kernel_split_branched_path_subsurface_indirect_light_init(KernelGlobals *kg, int ray_index)
22 {
23         kernel_split_branched_path_indirect_loop_init(kg, ray_index);
24
25         SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
26
27         branched_state->ss_next_closure = 0;
28         branched_state->ss_next_sample = 0;
29
30         branched_state->num_hits = 0;
31         branched_state->next_hit = 0;
32
33         ADD_RAY_FLAG(kernel_split_state.ray_state, ray_index, RAY_BRANCHED_SUBSURFACE_INDIRECT);
34 }
35
36 ccl_device_noinline bool kernel_split_branched_path_subsurface_indirect_light_iter(KernelGlobals *kg, int ray_index)
37 {
38         SplitBranchedState *branched_state = &kernel_split_state.branched_state[ray_index];
39
40         ShaderData *sd = &branched_state->sd;
41         RNG rng = kernel_split_state.rng[ray_index];
42         PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
43         ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
44
45         for(int i = branched_state->ss_next_closure; i < sd->num_closure; i++) {
46                 ShaderClosure *sc = &sd->closure[i];
47
48                 if(!CLOSURE_IS_BSSRDF(sc->type))
49                         continue;
50
51                 /* set up random number generator */
52                 if(branched_state->ss_next_sample == 0 && branched_state->next_hit == 0 &&
53                    branched_state->next_closure == 0 && branched_state->next_sample == 0)
54                 {
55                         branched_state->lcg_state = lcg_state_init(&rng,
56                                                                    branched_state->path_state.rng_offset,
57                                                                    branched_state->path_state.sample,
58                                                                    0x68bc21eb);
59                 }
60                 int num_samples = kernel_data.integrator.subsurface_samples;
61                 float num_samples_inv = 1.0f/num_samples;
62                 RNG bssrdf_rng = cmj_hash(rng, i);
63
64                 /* do subsurface scatter step with copy of shader data, this will
65                  * replace the BSSRDF with a diffuse BSDF closure */
66                 for(int j = branched_state->ss_next_sample; j < num_samples; j++) {
67                         ccl_global SubsurfaceIntersection *ss_isect = &branched_state->ss_isect;
68                         float bssrdf_u, bssrdf_v;
69                         path_branched_rng_2D(kg,
70                                              &bssrdf_rng,
71                                              &branched_state->path_state,
72                                              j,
73                                              num_samples,
74                                              PRNG_BSDF_U,
75                                              &bssrdf_u,
76                                              &bssrdf_v);
77
78                         /* intersection is expensive so avoid doing multiple times for the same input */
79                         if(branched_state->next_hit == 0 && branched_state->next_closure == 0 && branched_state->next_sample == 0) {
80                                 RNG lcg_state = branched_state->lcg_state;
81                                 SubsurfaceIntersection ss_isect_private;
82
83                                 branched_state->num_hits = subsurface_scatter_multi_intersect(kg,
84                                                                                               &ss_isect_private,
85                                                                                               sd,
86                                                                                               sc,
87                                                                                               &lcg_state,
88                                                                                               bssrdf_u, bssrdf_v,
89                                                                                               true);
90
91                                 branched_state->lcg_state = lcg_state;
92                                 *ss_isect = ss_isect_private;
93                         }
94
95 #ifdef __VOLUME__
96                         Ray volume_ray = branched_state->ray;
97                         bool need_update_volume_stack =
98                                 kernel_data.integrator.use_volumes &&
99                                 sd->object_flag & SD_OBJECT_INTERSECTS_VOLUME;
100 #endif  /* __VOLUME__ */
101
102                         /* compute lighting with the BSDF closure */
103                         for(int hit = branched_state->next_hit; hit < branched_state->num_hits; hit++) {
104                                 ShaderData *bssrdf_sd = &kernel_split_state.sd[ray_index];
105                                 *bssrdf_sd = *sd; /* note: copy happens each iteration of inner loop, this is
106                                                    * important as the indirect path will write into bssrdf_sd */
107
108                                 SubsurfaceIntersection ss_isect_private = *ss_isect;
109                                 subsurface_scatter_multi_setup(kg,
110                                                                &ss_isect_private,
111                                                                hit,
112                                                                bssrdf_sd,
113                                                                &branched_state->path_state,
114                                                                branched_state->path_state.flag,
115                                                                sc,
116                                                                true);
117                                 *ss_isect = ss_isect_private;
118
119                                 ccl_global PathState *hit_state = &kernel_split_state.path_state[ray_index];
120                                 *hit_state = branched_state->path_state;
121
122                                 path_state_branch(hit_state, j, num_samples);
123
124 #ifdef __VOLUME__
125                                 if(need_update_volume_stack) {
126                                         /* Setup ray from previous surface point to the new one. */
127                                         float3 P = ray_offset(bssrdf_sd->P, -bssrdf_sd->Ng);
128                                         volume_ray.D = normalize_len(P - volume_ray.P, &volume_ray.t);
129
130                                         /* this next part is expensive as it does scene intersection so only do once */
131                                         if(branched_state->next_closure == 0 && branched_state->next_sample == 0) {
132                                                 for(int k = 0; k < VOLUME_STACK_SIZE; k++) {
133                                                         branched_state->volume_stack[k] = hit_state->volume_stack[k];
134                                                 }
135
136                                                 kernel_volume_stack_update_for_subsurface(kg,
137                                                                                           emission_sd,
138                                                                                           &volume_ray,
139                                                                                           branched_state->volume_stack);
140                                         }
141
142                                         for(int k = 0; k < VOLUME_STACK_SIZE; k++) {
143                                                 hit_state->volume_stack[k] = branched_state->volume_stack[k];
144                                         }
145                                 }
146 #endif  /* __VOLUME__ */
147
148 #ifdef __EMISSION__
149                                 if(branched_state->next_closure == 0 && branched_state->next_sample == 0) {
150                                         /* direct light */
151                                         if(kernel_data.integrator.use_direct_light) {
152                                                 int all = (kernel_data.integrator.sample_all_lights_direct) ||
153                                                               (branched_state->path_state.flag & PATH_RAY_SHADOW_CATCHER);
154                                                 kernel_branched_path_surface_connect_light(kg,
155                                                                                            &rng,
156                                                                                            bssrdf_sd,
157                                                                                            emission_sd,
158                                                                                            hit_state,
159                                                                                            branched_state->throughput,
160                                                                                            num_samples_inv,
161                                                                                            L,
162                                                                                            all);
163                                         }
164                                 }
165 #endif  /* __EMISSION__ */
166
167                                 /* indirect light */
168                                 if(kernel_split_branched_path_surface_indirect_light_iter(kg,
169                                                                                           ray_index,
170                                                                                           num_samples_inv,
171                                                                                           bssrdf_sd,
172                                                                                           false))
173                                 {
174                                         branched_state->ss_next_closure = i;
175                                         branched_state->ss_next_sample = j;
176                                         branched_state->next_hit = hit;
177
178                                         return true;
179                                 }
180
181                                 branched_state->next_closure = 0;
182                         }
183
184                         branched_state->next_hit = 0;
185                 }
186
187                 branched_state->ss_next_sample = 0;
188         }
189
190         kernel_split_branched_path_indirect_loop_end(kg, ray_index);
191
192         return false;
193 }
194
195 #endif  /* __BRANCHED_PATH__ && __SUBSURFACE__ */
196
197 ccl_device void kernel_subsurface_scatter(KernelGlobals *kg)
198 {
199         int thread_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
200         if(thread_index == 0) {
201                 /* We will empty both queues in this kernel. */
202                 kernel_split_params.queue_index[QUEUE_ACTIVE_AND_REGENERATED_RAYS] = 0;
203                 kernel_split_params.queue_index[QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS] = 0;
204         }
205
206         int ray_index = ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0);
207         ray_index = get_ray_index(kg, ray_index,
208                                   QUEUE_ACTIVE_AND_REGENERATED_RAYS,
209                                   kernel_split_state.queue_data,
210                                   kernel_split_params.queue_size,
211                                   1);
212         get_ray_index(kg, thread_index,
213                       QUEUE_HITBG_BUFF_UPDATE_TOREGEN_RAYS,
214                       kernel_split_state.queue_data,
215                       kernel_split_params.queue_size,
216                       1);
217
218 #ifdef __SUBSURFACE__
219         ccl_global char *ray_state = kernel_split_state.ray_state;
220         ccl_global PathState *state = &kernel_split_state.path_state[ray_index];
221         PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
222         RNG rng = kernel_split_state.rng[ray_index];
223         ccl_global Ray *ray = &kernel_split_state.ray[ray_index];
224         ccl_global float3 *throughput = &kernel_split_state.throughput[ray_index];
225         ccl_global SubsurfaceIndirectRays *ss_indirect = &kernel_split_state.ss_rays[ray_index];
226         ShaderData *sd = &kernel_split_state.sd[ray_index];
227         ShaderData *emission_sd = &kernel_split_state.sd_DL_shadow[ray_index];
228
229         if(IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
230                 if(sd->flag & SD_BSSRDF) {
231
232 #ifdef __BRANCHED_PATH__
233                         if(!kernel_data.integrator.branched) {
234 #endif
235                                 if(kernel_path_subsurface_scatter(kg,
236                                                                       sd,
237                                                                       emission_sd,
238                                                                       L,
239                                                                       state,
240                                                                       &rng,
241                                                                       ray,
242                                                                       throughput,
243                                                                       ss_indirect)) {
244                                         kernel_split_path_end(kg, ray_index);
245                                 }
246 #ifdef __BRANCHED_PATH__
247                         }
248                         else if(IS_FLAG(ray_state, ray_index, RAY_BRANCHED_INDIRECT)) {
249                                 float bssrdf_probability;
250                                 ShaderClosure *sc = subsurface_scatter_pick_closure(kg, sd, &bssrdf_probability);
251
252                                 /* modify throughput for picking bssrdf or bsdf */
253                                 *throughput *= bssrdf_probability;
254
255                                 /* do bssrdf scatter step if we picked a bssrdf closure */
256                                 if(sc) {
257                                         uint lcg_state = lcg_state_init(&rng, state->rng_offset, state->sample, 0x68bc21eb);
258
259                                         float bssrdf_u, bssrdf_v;
260                                         path_state_rng_2D(kg,
261                                                               &rng,
262                                                               state,
263                                                               PRNG_BSDF_U,
264                                                               &bssrdf_u, &bssrdf_v);
265                                         subsurface_scatter_step(kg,
266                                                                     sd,
267                                                                     state,
268                                                                     state->flag,
269                                                                     sc,
270                                                                     &lcg_state,
271                                                                     bssrdf_u, bssrdf_v,
272                                                                     false);
273                                 }
274                         }
275                         else {
276                                 kernel_split_branched_path_subsurface_indirect_light_init(kg, ray_index);
277
278                                 if(kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) {
279                                         ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
280                                 }
281                         }
282 #endif
283                 }
284                 kernel_split_state.rng[ray_index] = rng;
285         }
286
287 #  ifdef __BRANCHED_PATH__
288         if(ccl_global_id(0) == 0 && ccl_global_id(1) == 0) {
289                 kernel_split_params.queue_index[QUEUE_SUBSURFACE_INDIRECT_ITER] = 0;
290         }
291
292         /* iter loop */
293         ray_index = get_ray_index(kg, ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0),
294                                   QUEUE_SUBSURFACE_INDIRECT_ITER,
295                                   kernel_split_state.queue_data,
296                                   kernel_split_params.queue_size,
297                                   1);
298
299         if(IS_STATE(ray_state, ray_index, RAY_SUBSURFACE_INDIRECT_NEXT_ITER)) {
300                 /* for render passes, sum and reset indirect light pass variables
301                  * for the next samples */
302                 path_radiance_sum_indirect(&kernel_split_state.path_radiance[ray_index]);
303                 path_radiance_reset_indirect(&kernel_split_state.path_radiance[ray_index]);
304
305                 if(kernel_split_branched_path_subsurface_indirect_light_iter(kg, ray_index)) {
306                         ASSIGN_RAY_STATE(ray_state, ray_index, RAY_REGENERATED);
307                 }
308         }
309 #  endif  /* __BRANCHED_PATH__ */
310
311 #endif  /* __SUBSURFACE__ */
312
313 }
314
315 CCL_NAMESPACE_END