Cycles: Replace __MAX_CLOSURE__ build option with runtime integrator variable
[blender.git] / intern / cycles / kernel / kernel_subsurface.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 /* BSSRDF using disk based importance sampling.
20  *
21  * BSSRDF Importance Sampling, SIGGRAPH 2013
22  * http://library.imageworks.com/pdfs/imageworks-library-BSSRDF-sampling.pdf
23  *
24  */
25
26 /* TODO:
27  * - test using power heuristic for combing bssrdfs
28  * - try to reduce one sample model variance
29  */
30
31 ccl_device_inline float3 subsurface_scatter_eval(ShaderData *sd,
32                                                  const ShaderClosure *sc,
33                                                  float disk_r,
34                                                  float r,
35                                                  bool all)
36 {
37         /* this is the veach one-sample model with balance heuristic, some pdf
38          * factors drop out when using balance heuristic weighting */
39         float3 eval_sum = make_float3(0.0f, 0.0f, 0.0f);
40         float pdf_sum = 0.0f;
41         float sample_weight_inv = 0.0f;
42
43         if(!all) {
44                 float sample_weight_sum = 0.0f;
45
46                 for(int i = 0; i < sd->num_closure; i++) {
47                         sc = &sd->closure[i];
48
49                         if(CLOSURE_IS_BSSRDF(sc->type)) {
50                                 sample_weight_sum += sc->sample_weight;
51                         }
52                 }
53
54                 sample_weight_inv = 1.0f/sample_weight_sum;
55         }
56
57         for(int i = 0; i < sd->num_closure; i++) {
58                 sc = &sd->closure[i];
59                 
60                 if(CLOSURE_IS_BSSRDF(sc->type)) {
61                         /* in case of branched path integrate we sample all bssrdf's once,
62                          * for path trace we pick one, so adjust pdf for that */
63                         float sample_weight = (all)? 1.0f: sc->sample_weight * sample_weight_inv;
64
65                         /* compute pdf */
66                         float pdf = bssrdf_pdf(sc, r);
67                         float disk_pdf = bssrdf_pdf(sc, disk_r);
68
69                         /* TODO power heuristic is not working correct here */
70                         eval_sum += sc->weight*pdf; //*sample_weight*disk_pdf;
71                         pdf_sum += sample_weight*disk_pdf; //*sample_weight*disk_pdf;
72                 }
73         }
74
75         return (pdf_sum > 0.0f)? eval_sum / pdf_sum : make_float3(0.0f, 0.0f, 0.0f);
76 }
77
78 /* replace closures with a single diffuse bsdf closure after scatter step */
79 ccl_device void subsurface_scatter_setup_diffuse_bsdf(KernelGlobals *kg, ShaderData *sd, const ShaderClosure *sc, float3 weight, bool hit, float3 N)
80 {
81         sd->flag &= ~SD_CLOSURE_FLAGS;
82         sd->num_closure = 0;
83         sd->num_closure_left = kernel_data.integrator.max_closures;
84
85         if(hit) {
86                 Bssrdf *bssrdf = (Bssrdf *)sc;
87 #ifdef __PRINCIPLED__
88                 if(bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID) {
89                         PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf*)bsdf_alloc(sd, sizeof(PrincipledDiffuseBsdf), weight);
90
91                         if(bsdf) {
92                                 bsdf->N = N;
93                                 bsdf->roughness = bssrdf->roughness;
94                                 sd->flag |= bsdf_principled_diffuse_setup(bsdf);
95
96                                 /* replace CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID with this special ID so render passes
97                                  * can recognize it as not being a regular Disney principled diffuse closure */
98                                 bsdf->type = CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID;
99                         }
100                 }
101                 else if(CLOSURE_IS_BSDF_BSSRDF(bssrdf->type) ||
102                         CLOSURE_IS_BSSRDF(bssrdf->type))
103 #endif  /* __PRINCIPLED__ */
104                 {
105                         DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight);
106
107                         if(bsdf) {
108                                 bsdf->N = N;
109                                 sd->flag |= bsdf_diffuse_setup(bsdf);
110
111                                 /* replace CLOSURE_BSDF_DIFFUSE_ID with this special ID so render passes
112                                  * can recognize it as not being a regular diffuse closure */
113                                 bsdf->type = CLOSURE_BSDF_BSSRDF_ID;
114                         }
115                 }
116         }
117 }
118
119 /* optionally do blurring of color and/or bump mapping, at the cost of a shader evaluation */
120 ccl_device float3 subsurface_color_pow(float3 color, float exponent)
121 {
122         color = max(color, make_float3(0.0f, 0.0f, 0.0f));
123
124         if(exponent == 1.0f) {
125                 /* nothing to do */
126         }
127         else if(exponent == 0.5f) {
128                 color.x = sqrtf(color.x);
129                 color.y = sqrtf(color.y);
130                 color.z = sqrtf(color.z);
131         }
132         else {
133                 color.x = powf(color.x, exponent);
134                 color.y = powf(color.y, exponent);
135                 color.z = powf(color.z, exponent);
136         }
137
138         return color;
139 }
140
141 ccl_device void subsurface_color_bump_blur(KernelGlobals *kg,
142                                            ShaderData *sd,
143                                            ccl_addr_space PathState *state,
144                                            int state_flag,
145                                            float3 *eval,
146                                            float3 *N)
147 {
148         /* average color and texture blur at outgoing point */
149         float texture_blur;
150         float3 out_color = shader_bssrdf_sum(sd, NULL, &texture_blur);
151
152         /* do we have bump mapping? */
153         bool bump = (sd->flag & SD_HAS_BSSRDF_BUMP) != 0;
154
155         if(bump || texture_blur > 0.0f) {
156                 /* average color and normal at incoming point */
157                 shader_eval_surface(kg, sd, state, state_flag, kernel_data.integrator.max_closures);
158                 float3 in_color = shader_bssrdf_sum(sd, (bump)? N: NULL, NULL);
159
160                 /* we simply divide out the average color and multiply with the average
161                  * of the other one. we could try to do this per closure but it's quite
162                  * tricky to match closures between shader evaluations, their number and
163                  * order may change, this is simpler */
164                 if(texture_blur > 0.0f) {
165                         out_color = subsurface_color_pow(out_color, texture_blur);
166                         in_color = subsurface_color_pow(in_color, texture_blur);
167
168                         *eval *= safe_divide_color(in_color, out_color);
169                 }
170         }
171 }
172
173 /* Subsurface scattering step, from a point on the surface to other
174  * nearby points on the same object.
175  */
176 ccl_device_inline int subsurface_scatter_multi_intersect(
177         KernelGlobals *kg,
178         LocalIntersection *ss_isect,
179         ShaderData *sd,
180         const ShaderClosure *sc,
181         uint *lcg_state,
182         float disk_u,
183         float disk_v,
184         bool all)
185 {
186         /* pick random axis in local frame and point on disk */
187         float3 disk_N, disk_T, disk_B;
188         float pick_pdf_N, pick_pdf_T, pick_pdf_B;
189
190         disk_N = sd->Ng;
191         make_orthonormals(disk_N, &disk_T, &disk_B);
192
193         if(disk_u < 0.5f) {
194                 pick_pdf_N = 0.5f;
195                 pick_pdf_T = 0.25f;
196                 pick_pdf_B = 0.25f;
197                 disk_u *= 2.0f;
198         }
199         else if(disk_u < 0.75f) {
200                 float3 tmp = disk_N;
201                 disk_N = disk_T;
202                 disk_T = tmp;
203                 pick_pdf_N = 0.25f;
204                 pick_pdf_T = 0.5f;
205                 pick_pdf_B = 0.25f;
206                 disk_u = (disk_u - 0.5f)*4.0f;
207         }
208         else {
209                 float3 tmp = disk_N;
210                 disk_N = disk_B;
211                 disk_B = tmp;
212                 pick_pdf_N = 0.25f;
213                 pick_pdf_T = 0.25f;
214                 pick_pdf_B = 0.5f;
215                 disk_u = (disk_u - 0.75f)*4.0f;
216         }
217
218         /* sample point on disk */
219         float phi = M_2PI_F * disk_u;
220         float disk_r = disk_v;
221         float disk_height;
222
223         bssrdf_sample(sc, disk_r, &disk_r, &disk_height);
224
225         float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
226
227         /* create ray */
228 #ifdef __SPLIT_KERNEL__
229         Ray ray_object = ss_isect->ray;
230         Ray *ray = &ray_object;
231 #else
232         Ray *ray = &ss_isect->ray;
233 #endif
234         ray->P = sd->P + disk_N*disk_height + disk_P;
235         ray->D = -disk_N;
236         ray->t = 2.0f*disk_height;
237         ray->dP = sd->dP;
238         ray->dD = differential3_zero();
239         ray->time = sd->time;
240
241         /* intersect with the same object. if multiple intersections are found it
242          * will use at most BSSRDF_MAX_HITS hits, a random subset of all hits */
243         scene_intersect_local(kg,
244                               *ray,
245                               ss_isect,
246                               sd->object,
247                               lcg_state,
248                               BSSRDF_MAX_HITS);
249         int num_eval_hits = min(ss_isect->num_hits, BSSRDF_MAX_HITS);
250
251         for(int hit = 0; hit < num_eval_hits; hit++) {
252                 /* Quickly retrieve P and Ng without setting up ShaderData. */
253                 float3 hit_P;
254                 if(sd->type & PRIMITIVE_TRIANGLE) {
255                         hit_P = triangle_refine_local(kg,
256                                                       sd,
257                                                       &ss_isect->hits[hit],
258                                                       ray);
259                 }
260 #ifdef __OBJECT_MOTION__
261                 else  if(sd->type & PRIMITIVE_MOTION_TRIANGLE) {
262                         float3 verts[3];
263                         motion_triangle_vertices(
264                                 kg,
265                                 sd->object,
266                                 kernel_tex_fetch(__prim_index, ss_isect->hits[hit].prim),
267                                 sd->time,
268                                 verts);
269                         hit_P = motion_triangle_refine_local(kg,
270                                                              sd,
271                                                              &ss_isect->hits[hit],
272                                                              ray,
273                                                              verts);
274                 }
275 #endif  /* __OBJECT_MOTION__ */
276                 else {
277                         ss_isect->weight[hit] = make_float3(0.0f, 0.0f, 0.0f);
278                         continue;
279                 }
280
281                 float3 hit_Ng = ss_isect->Ng[hit];
282                 if(ss_isect->hits[hit].object != OBJECT_NONE) {
283                         object_normal_transform(kg, sd, &hit_Ng);
284                 }
285
286                 /* probability densities for local frame axes */
287                 float pdf_N = pick_pdf_N * fabsf(dot(disk_N, hit_Ng));
288                 float pdf_T = pick_pdf_T * fabsf(dot(disk_T, hit_Ng));
289                 float pdf_B = pick_pdf_B * fabsf(dot(disk_B, hit_Ng));
290
291                 /* multiple importance sample between 3 axes, power heuristic
292                  * found to be slightly better than balance heuristic */
293                 float mis_weight = power_heuristic_3(pdf_N, pdf_T, pdf_B);
294
295                 /* real distance to sampled point */
296                 float r = len(hit_P - sd->P);
297
298                 /* evaluate */
299                 float w = mis_weight / pdf_N;
300                 if(ss_isect->num_hits > BSSRDF_MAX_HITS)
301                         w *= ss_isect->num_hits/(float)BSSRDF_MAX_HITS;
302                 float3 eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w;
303
304                 ss_isect->weight[hit] = eval;
305         }
306
307 #ifdef __SPLIT_KERNEL__
308         ss_isect->ray = *ray;
309 #endif
310
311         return num_eval_hits;
312 }
313
314 ccl_device_noinline void subsurface_scatter_multi_setup(
315         KernelGlobals *kg,
316         LocalIntersection* ss_isect,
317         int hit,
318         ShaderData *sd,
319         ccl_addr_space PathState *state,
320         int state_flag,
321         const ShaderClosure *sc,
322         bool all)
323 {
324 #ifdef __SPLIT_KERNEL__
325         Ray ray_object = ss_isect->ray;
326         Ray *ray = &ray_object;
327 #else
328         Ray *ray = &ss_isect->ray;
329 #endif
330
331         /* Workaround for AMD GPU OpenCL compiler. Most probably cache bypass issue. */
332 #if defined(__SPLIT_KERNEL__) && defined(__KERNEL_OPENCL_AMD__) && defined(__KERNEL_GPU__)
333         kernel_split_params.dummy_sd_flag = sd->flag;
334 #endif
335
336         /* Setup new shading point. */
337         shader_setup_from_subsurface(kg, sd, &ss_isect->hits[hit], ray);
338
339         /* Optionally blur colors and bump mapping. */
340         float3 weight = ss_isect->weight[hit];
341         float3 N = sd->N;
342         subsurface_color_bump_blur(kg, sd, state, state_flag, &weight, &N);
343
344         /* Setup diffuse BSDF. */
345         subsurface_scatter_setup_diffuse_bsdf(kg, sd, sc, weight, true, N);
346 }
347
348 /* subsurface scattering step, from a point on the surface to another nearby point on the same object */
349 ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state,
350         int state_flag, const ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all)
351 {
352         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
353
354         /* pick random axis in local frame and point on disk */
355         float3 disk_N, disk_T, disk_B;
356         float pick_pdf_N, pick_pdf_T, pick_pdf_B;
357
358         disk_N = sd->Ng;
359         make_orthonormals(disk_N, &disk_T, &disk_B);
360
361         if(disk_u < 0.5f) {
362                 pick_pdf_N = 0.5f;
363                 pick_pdf_T = 0.25f;
364                 pick_pdf_B = 0.25f;
365                 disk_u *= 2.0f;
366         }
367         else if(disk_u < 0.75f) {
368                 float3 tmp = disk_N;
369                 disk_N = disk_T;
370                 disk_T = tmp;
371                 pick_pdf_N = 0.25f;
372                 pick_pdf_T = 0.5f;
373                 pick_pdf_B = 0.25f;
374                 disk_u = (disk_u - 0.5f)*4.0f;
375         }
376         else {
377                 float3 tmp = disk_N;
378                 disk_N = disk_B;
379                 disk_B = tmp;
380                 pick_pdf_N = 0.25f;
381                 pick_pdf_T = 0.25f;
382                 pick_pdf_B = 0.5f;
383                 disk_u = (disk_u - 0.75f)*4.0f;
384         }
385
386         /* sample point on disk */
387         float phi = M_2PI_F * disk_u;
388         float disk_r = disk_v;
389         float disk_height;
390
391         bssrdf_sample(sc, disk_r, &disk_r, &disk_height);
392
393         float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
394
395         /* create ray */
396         Ray ray;
397         ray.P = sd->P + disk_N*disk_height + disk_P;
398         ray.D = -disk_N;
399         ray.t = 2.0f*disk_height;
400         ray.dP = sd->dP;
401         ray.dD = differential3_zero();
402         ray.time = sd->time;
403
404         /* intersect with the same object. if multiple intersections are
405          * found it will randomly pick one of them */
406         LocalIntersection ss_isect;
407         scene_intersect_local(kg, ray, &ss_isect, sd->object, lcg_state, 1);
408
409         /* evaluate bssrdf */
410         if(ss_isect.num_hits > 0) {
411                 float3 origP = sd->P;
412
413                 /* Workaround for AMD GPU OpenCL compiler. Most probably cache bypass issue. */
414 #if defined(__SPLIT_KERNEL__) && defined(__KERNEL_OPENCL_AMD__) && defined(__KERNEL_GPU__)
415                 kernel_split_params.dummy_sd_flag = sd->flag;
416 #endif
417                 /* setup new shading point */
418                 shader_setup_from_subsurface(kg, sd, &ss_isect.hits[0], &ray);
419
420                 /* probability densities for local frame axes */
421                 float pdf_N = pick_pdf_N * fabsf(dot(disk_N, sd->Ng));
422                 float pdf_T = pick_pdf_T * fabsf(dot(disk_T, sd->Ng));
423                 float pdf_B = pick_pdf_B * fabsf(dot(disk_B, sd->Ng));
424
425                 /* multiple importance sample between 3 axes, power heuristic
426                  * found to be slightly better than balance heuristic */
427                 float mis_weight = power_heuristic_3(pdf_N, pdf_T, pdf_B);
428
429                 /* real distance to sampled point */
430                 float r = len(sd->P - origP);
431
432                 /* evaluate */
433                 float w = (mis_weight * ss_isect.num_hits) / pdf_N;
434                 eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w;
435         }
436
437         /* optionally blur colors and bump mapping */
438         float3 N = sd->N;
439         subsurface_color_bump_blur(kg, sd, state, state_flag, &eval, &N);
440
441         /* setup diffuse bsdf */
442         subsurface_scatter_setup_diffuse_bsdf(kg, sd, sc, eval, (ss_isect.num_hits > 0), N);
443 }
444
445 CCL_NAMESPACE_END
446