Fix build error on Windows 32 bit.
[blender-staging.git] / intern / cycles / kernel / kernel_subsurface.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 /* BSSRDF using disk based importance sampling.
20  *
21  * BSSRDF Importance Sampling, SIGGRAPH 2013
22  * http://library.imageworks.com/pdfs/imageworks-library-BSSRDF-sampling.pdf
23  */
24
25 ccl_device_inline float3 subsurface_scatter_eval(ShaderData *sd,
26                                                  const ShaderClosure *sc,
27                                                  float disk_r,
28                                                  float r,
29                                                  bool all)
30 {
31         /* this is the veach one-sample model with balance heuristic, some pdf
32          * factors drop out when using balance heuristic weighting */
33         float3 eval_sum = make_float3(0.0f, 0.0f, 0.0f);
34         float pdf_sum = 0.0f;
35         float sample_weight_inv = 0.0f;
36
37         if(!all) {
38                 float sample_weight_sum = 0.0f;
39
40                 for(int i = 0; i < sd->num_closure; i++) {
41                         sc = &sd->closure[i];
42
43                         if(CLOSURE_IS_DISK_BSSRDF(sc->type)) {
44                                 sample_weight_sum += sc->sample_weight;
45                         }
46                 }
47
48                 sample_weight_inv = 1.0f/sample_weight_sum;
49         }
50
51         for(int i = 0; i < sd->num_closure; i++) {
52                 sc = &sd->closure[i];
53                 
54                 if(CLOSURE_IS_DISK_BSSRDF(sc->type)) {
55                         /* in case of branched path integrate we sample all bssrdf's once,
56                          * for path trace we pick one, so adjust pdf for that */
57                         float sample_weight = (all)? 1.0f: sc->sample_weight * sample_weight_inv;
58
59                         /* compute pdf */
60                         float3 eval = bssrdf_eval(sc, r);
61                         float pdf = bssrdf_pdf(sc, disk_r);
62
63                         eval_sum += sc->weight * eval;
64                         pdf_sum += sample_weight * pdf;
65                 }
66         }
67
68         return (pdf_sum > 0.0f)? eval_sum / pdf_sum : make_float3(0.0f, 0.0f, 0.0f);
69 }
70
71 /* replace closures with a single diffuse bsdf closure after scatter step */
72 ccl_device void subsurface_scatter_setup_diffuse_bsdf(KernelGlobals *kg, ShaderData *sd, const ShaderClosure *sc, float3 weight, float3 N)
73 {
74         sd->flag &= ~SD_CLOSURE_FLAGS;
75         sd->num_closure = 0;
76         sd->num_closure_left = kernel_data.integrator.max_closures;
77
78         Bssrdf *bssrdf = (Bssrdf *)sc;
79 #ifdef __PRINCIPLED__
80         if(bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID ||
81            bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID)
82         {
83                 PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf*)bsdf_alloc(sd, sizeof(PrincipledDiffuseBsdf), weight);
84
85                 if(bsdf) {
86                         bsdf->N = N;
87                         bsdf->roughness = bssrdf->roughness;
88                         sd->flag |= bsdf_principled_diffuse_setup(bsdf);
89
90                         /* replace CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID with this special ID so render passes
91                          * can recognize it as not being a regular Disney principled diffuse closure */
92                         bsdf->type = CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID;
93                 }
94         }
95         else if(CLOSURE_IS_BSDF_BSSRDF(bssrdf->type) ||
96                         CLOSURE_IS_BSSRDF(bssrdf->type))
97 #endif  /* __PRINCIPLED__ */
98         {
99                 DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight);
100
101                 if(bsdf) {
102                         bsdf->N = N;
103                         sd->flag |= bsdf_diffuse_setup(bsdf);
104
105                         /* replace CLOSURE_BSDF_DIFFUSE_ID with this special ID so render passes
106                          * can recognize it as not being a regular diffuse closure */
107                         bsdf->type = CLOSURE_BSDF_BSSRDF_ID;
108                 }
109         }
110 }
111
112 /* optionally do blurring of color and/or bump mapping, at the cost of a shader evaluation */
113 ccl_device float3 subsurface_color_pow(float3 color, float exponent)
114 {
115         color = max(color, make_float3(0.0f, 0.0f, 0.0f));
116
117         if(exponent == 1.0f) {
118                 /* nothing to do */
119         }
120         else if(exponent == 0.5f) {
121                 color.x = sqrtf(color.x);
122                 color.y = sqrtf(color.y);
123                 color.z = sqrtf(color.z);
124         }
125         else {
126                 color.x = powf(color.x, exponent);
127                 color.y = powf(color.y, exponent);
128                 color.z = powf(color.z, exponent);
129         }
130
131         return color;
132 }
133
134 ccl_device void subsurface_color_bump_blur(KernelGlobals *kg,
135                                            ShaderData *sd,
136                                            ccl_addr_space PathState *state,
137                                            float3 *eval,
138                                            float3 *N)
139 {
140         /* average color and texture blur at outgoing point */
141         float texture_blur;
142         float3 out_color = shader_bssrdf_sum(sd, NULL, &texture_blur);
143
144         /* do we have bump mapping? */
145         bool bump = (sd->flag & SD_HAS_BSSRDF_BUMP) != 0;
146
147         if(bump || texture_blur > 0.0f) {
148                 /* average color and normal at incoming point */
149                 shader_eval_surface(kg, sd, state, state->flag);
150                 float3 in_color = shader_bssrdf_sum(sd, (bump)? N: NULL, NULL);
151
152                 /* we simply divide out the average color and multiply with the average
153                  * of the other one. we could try to do this per closure but it's quite
154                  * tricky to match closures between shader evaluations, their number and
155                  * order may change, this is simpler */
156                 if(texture_blur > 0.0f) {
157                         out_color = subsurface_color_pow(out_color, texture_blur);
158                         in_color = subsurface_color_pow(in_color, texture_blur);
159
160                         *eval *= safe_divide_color(in_color, out_color);
161                 }
162         }
163 }
164
165 /* Subsurface scattering step, from a point on the surface to other
166  * nearby points on the same object.
167  */
168 ccl_device_inline int subsurface_scatter_disk(
169         KernelGlobals *kg,
170         LocalIntersection *ss_isect,
171         ShaderData *sd,
172         const ShaderClosure *sc,
173         uint *lcg_state,
174         float disk_u,
175         float disk_v,
176         bool all)
177 {
178         /* pick random axis in local frame and point on disk */
179         float3 disk_N, disk_T, disk_B;
180         float pick_pdf_N, pick_pdf_T, pick_pdf_B;
181
182         disk_N = sd->Ng;
183         make_orthonormals(disk_N, &disk_T, &disk_B);
184
185         if(disk_v < 0.5f) {
186                 pick_pdf_N = 0.5f;
187                 pick_pdf_T = 0.25f;
188                 pick_pdf_B = 0.25f;
189                 disk_v *= 2.0f;
190         }
191         else if(disk_v < 0.75f) {
192                 float3 tmp = disk_N;
193                 disk_N = disk_T;
194                 disk_T = tmp;
195                 pick_pdf_N = 0.25f;
196                 pick_pdf_T = 0.5f;
197                 pick_pdf_B = 0.25f;
198                 disk_v = (disk_v - 0.5f)*4.0f;
199         }
200         else {
201                 float3 tmp = disk_N;
202                 disk_N = disk_B;
203                 disk_B = tmp;
204                 pick_pdf_N = 0.25f;
205                 pick_pdf_T = 0.25f;
206                 pick_pdf_B = 0.5f;
207                 disk_v = (disk_v - 0.75f)*4.0f;
208         }
209
210         /* sample point on disk */
211         float phi = M_2PI_F * disk_v;
212         float disk_height, disk_r;
213
214         bssrdf_sample(sc, disk_u, &disk_r, &disk_height);
215
216         float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
217
218         /* create ray */
219 #ifdef __SPLIT_KERNEL__
220         Ray ray_object = ss_isect->ray;
221         Ray *ray = &ray_object;
222 #else
223         Ray *ray = &ss_isect->ray;
224 #endif
225         ray->P = sd->P + disk_N*disk_height + disk_P;
226         ray->D = -disk_N;
227         ray->t = 2.0f*disk_height;
228         ray->dP = sd->dP;
229         ray->dD = differential3_zero();
230         ray->time = sd->time;
231
232         /* intersect with the same object. if multiple intersections are found it
233          * will use at most BSSRDF_MAX_HITS hits, a random subset of all hits */
234         scene_intersect_local(kg,
235                               *ray,
236                               ss_isect,
237                               sd->object,
238                               lcg_state,
239                               BSSRDF_MAX_HITS);
240         int num_eval_hits = min(ss_isect->num_hits, BSSRDF_MAX_HITS);
241
242         for(int hit = 0; hit < num_eval_hits; hit++) {
243                 /* Quickly retrieve P and Ng without setting up ShaderData. */
244                 float3 hit_P;
245                 if(sd->type & PRIMITIVE_TRIANGLE) {
246                         hit_P = triangle_refine_local(kg,
247                                                       sd,
248                                                       &ss_isect->hits[hit],
249                                                       ray);
250                 }
251 #ifdef __OBJECT_MOTION__
252                 else  if(sd->type & PRIMITIVE_MOTION_TRIANGLE) {
253                         float3 verts[3];
254                         motion_triangle_vertices(
255                                 kg,
256                                 sd->object,
257                                 kernel_tex_fetch(__prim_index, ss_isect->hits[hit].prim),
258                                 sd->time,
259                                 verts);
260                         hit_P = motion_triangle_refine_local(kg,
261                                                              sd,
262                                                              &ss_isect->hits[hit],
263                                                              ray,
264                                                              verts);
265                 }
266 #endif  /* __OBJECT_MOTION__ */
267                 else {
268                         ss_isect->weight[hit] = make_float3(0.0f, 0.0f, 0.0f);
269                         continue;
270                 }
271
272                 float3 hit_Ng = ss_isect->Ng[hit];
273                 if(ss_isect->hits[hit].object != OBJECT_NONE) {
274                         object_normal_transform(kg, sd, &hit_Ng);
275                 }
276
277                 /* Probability densities for local frame axes. */
278                 float pdf_N = pick_pdf_N * fabsf(dot(disk_N, hit_Ng));
279                 float pdf_T = pick_pdf_T * fabsf(dot(disk_T, hit_Ng));
280                 float pdf_B = pick_pdf_B * fabsf(dot(disk_B, hit_Ng));
281
282                 /* Multiple importance sample between 3 axes, power heuristic
283                  * found to be slightly better than balance heuristic. pdf_N
284                  * in the MIS weight and denominator cancelled out. */
285                 float w = pdf_N / (sqr(pdf_N) + sqr(pdf_T) + sqr(pdf_B));
286                 if(ss_isect->num_hits > BSSRDF_MAX_HITS) {
287                         w *= ss_isect->num_hits/(float)BSSRDF_MAX_HITS;
288                 }
289
290                 /* Real distance to sampled point. */
291                 float r = len(hit_P - sd->P);
292
293                 /* Evaluate profiles. */
294                 float3 eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w;
295
296                 ss_isect->weight[hit] = eval;
297         }
298
299 #ifdef __SPLIT_KERNEL__
300         ss_isect->ray = *ray;
301 #endif
302
303         return num_eval_hits;
304 }
305
306 ccl_device_noinline void subsurface_scatter_multi_setup(
307         KernelGlobals *kg,
308         LocalIntersection* ss_isect,
309         int hit,
310         ShaderData *sd,
311         ccl_addr_space PathState *state,
312         const ShaderClosure *sc)
313 {
314 #ifdef __SPLIT_KERNEL__
315         Ray ray_object = ss_isect->ray;
316         Ray *ray = &ray_object;
317 #else
318         Ray *ray = &ss_isect->ray;
319 #endif
320
321         /* Workaround for AMD GPU OpenCL compiler. Most probably cache bypass issue. */
322 #if defined(__SPLIT_KERNEL__) && defined(__KERNEL_OPENCL_AMD__) && defined(__KERNEL_GPU__)
323         kernel_split_params.dummy_sd_flag = sd->flag;
324 #endif
325
326         /* Setup new shading point. */
327         shader_setup_from_subsurface(kg, sd, &ss_isect->hits[hit], ray);
328
329         /* Optionally blur colors and bump mapping. */
330         float3 weight = ss_isect->weight[hit];
331         float3 N = sd->N;
332         subsurface_color_bump_blur(kg, sd, state, &weight, &N);
333
334         /* Setup diffuse BSDF. */
335         subsurface_scatter_setup_diffuse_bsdf(kg, sd, sc, weight, N);
336 }
337
338 /* Random walk subsurface scattering.
339  *
340  * "Practical and Controllable Subsurface Scattering for Production Path
341  *  Tracing". Matt Jen-Yuan Chiang, Peter Kutz, Brent Burley. SIGGRAPH 2016. */
342
343 ccl_device void subsurface_random_walk_remap(
344         const float A,
345         const float d,
346         float *sigma_t,
347         float *sigma_s)
348 {
349         /* Compute attenuation and scattering coefficients from albedo. */
350         const float a = 1.0f - expf(A * (-5.09406f + A * (2.61188f - A * 4.31805f)));
351         const float s = 1.9f - A + 3.5f * sqr(A - 0.8f);
352
353         *sigma_t = 1.0f / fmaxf(d * s, 1e-16f);
354         *sigma_s = *sigma_t * a;
355 }
356
357 ccl_device void subsurface_random_walk_coefficients(
358         const ShaderClosure *sc,
359         float3 *sigma_t,
360         float3 *sigma_s,
361         float3 *weight)
362 {
363         const Bssrdf *bssrdf = (const Bssrdf*)sc;
364         const float3 A = bssrdf->albedo;
365         const float3 d = bssrdf->radius;
366         float sigma_t_x, sigma_t_y, sigma_t_z;
367         float sigma_s_x, sigma_s_y, sigma_s_z;
368
369         subsurface_random_walk_remap(A.x, d.x, &sigma_t_x, &sigma_s_x);
370         subsurface_random_walk_remap(A.y, d.y, &sigma_t_y, &sigma_s_y);
371         subsurface_random_walk_remap(A.z, d.z, &sigma_t_z, &sigma_s_z);
372
373         *sigma_t = make_float3(sigma_t_x, sigma_t_y, sigma_t_z);
374         *sigma_s = make_float3(sigma_s_x, sigma_s_y, sigma_s_z);
375
376         /* Closure mixing and Fresnel weights separate from albedo. */
377         *weight = safe_divide_color(bssrdf->weight, A);
378 }
379
380 ccl_device_noinline bool subsurface_random_walk(
381         KernelGlobals *kg,
382         LocalIntersection *ss_isect,
383         ShaderData *sd,
384         ccl_addr_space PathState *state,
385         const ShaderClosure *sc,
386         const float bssrdf_u,
387         const float bssrdf_v)
388 {
389         /* Sample diffuse surface scatter into the object. */
390         float3 D;
391         float pdf;
392         sample_cos_hemisphere(-sd->N, bssrdf_u, bssrdf_v, &D, &pdf);
393         if(dot(-sd->Ng, D) <= 0.0f) {
394                 return 0;
395         }
396
397         /* Convert subsurface to volume coefficients. */
398         float3 sigma_t, sigma_s;
399         float3 throughput = make_float3(1.0f, 1.0f, 1.0f);
400         subsurface_random_walk_coefficients(sc, &sigma_t, &sigma_s, &throughput);
401
402         /* Setup ray. */
403 #ifdef __SPLIT_KERNEL__
404         Ray ray_object = ss_isect->ray;
405         Ray *ray = &ray_object;
406 #else
407         Ray *ray = &ss_isect->ray;
408 #endif
409         ray->P = ray_offset(sd->P, -sd->Ng);
410         ray->D = D;
411         ray->t = FLT_MAX;
412         ray->time = sd->time;
413
414         /* Modify state for RNGs, decorrelated from other paths. */
415         uint prev_rng_offset = state->rng_offset;
416         uint prev_rng_hash = state->rng_hash;
417         state->rng_hash = cmj_hash(state->rng_hash + state->rng_offset, 0xdeadbeef);
418
419         /* Random walk until we hit the surface again. */
420         bool hit = false;
421
422         for(int bounce = 0; bounce < BSSRDF_MAX_BOUNCES; bounce++) {
423                 /* Advance random number offset. */
424                 state->rng_offset += PRNG_BOUNCE_NUM;
425
426                 if(bounce > 0) {
427                         /* Sample scattering direction. */
428                         const float anisotropy = 0.0f;
429                         float scatter_u, scatter_v;
430                         path_state_rng_2D(kg, state, PRNG_BSDF_U, &scatter_u, &scatter_v);
431                         ray->D = henyey_greenstrein_sample(ray->D, anisotropy, scatter_u, scatter_v, NULL);
432                 }
433
434                 /* Sample color channel, use MIS with balance heuristic. */
435                 float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
436                 float3 albedo = safe_divide_color(sigma_s, sigma_t);
437                 float3 channel_pdf;
438                 int channel = kernel_volume_sample_channel(albedo, throughput, rphase, &channel_pdf);
439
440                 /* Distance sampling. */
441                 float rdist = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
442                 float sample_sigma_t = kernel_volume_channel_get(sigma_t, channel);
443                 float t = -logf(1.0f - rdist)/sample_sigma_t;
444
445                 ray->t = t;
446                 scene_intersect_local(kg, *ray, ss_isect, sd->object, NULL, 1);
447                 hit = (ss_isect->num_hits > 0);
448
449                 if(hit) {
450                         /* Compute world space distance to surface hit. */
451                         float3 D = ray->D;
452                         object_inverse_dir_transform(kg, sd, &D);
453                         D = normalize(D) * ss_isect->hits[0].t;
454                         object_dir_transform(kg, sd, &D);
455                         t = len(D);
456                 }
457
458                 /* Advance to new scatter location. */
459                 ray->P += t * ray->D;
460
461                 /* Update throughput. */
462                 float3 transmittance = volume_color_transmittance(sigma_t, t);
463                 float pdf = dot(channel_pdf, (hit)? transmittance: sigma_t * transmittance);
464                 throughput *= ((hit)? transmittance: sigma_s * transmittance) / pdf;
465
466                 if(hit) {
467                         /* If we hit the surface, we are done. */
468                         break;
469                 }
470
471                 /* Russian roulette. */
472                 float terminate = path_state_rng_1D(kg, state, PRNG_TERMINATE);
473                 float probability = min(max3(fabs(throughput)), 1.0f);
474                 if(terminate >= probability) {
475                         break;
476                 }
477                 throughput /= probability;
478         }
479
480         kernel_assert(isfinite_safe(throughput.x) &&
481                       isfinite_safe(throughput.y) &&
482                       isfinite_safe(throughput.z));
483
484         state->rng_offset = prev_rng_offset;
485         state->rng_hash = prev_rng_hash;
486
487         /* Return number of hits in ss_isect. */
488         if(!hit) {
489                 return 0;
490         }
491
492         /* TODO: gain back performance lost from merging with disk BSSRDF. We
493          * only need to return on hit so this indirect ray push/pop overhead
494          * is not actually needed, but it does keep the code simpler. */
495         ss_isect->weight[0] = throughput;
496 #ifdef __SPLIT_KERNEL__
497         ss_isect->ray = *ray;
498 #endif
499
500         return 1;
501 }
502
503 ccl_device_inline int subsurface_scatter_multi_intersect(
504         KernelGlobals *kg,
505         LocalIntersection *ss_isect,
506         ShaderData *sd,
507         ccl_addr_space PathState *state,
508         const ShaderClosure *sc,
509         uint *lcg_state,
510         float bssrdf_u,
511         float bssrdf_v,
512         bool all)
513 {
514         if(CLOSURE_IS_DISK_BSSRDF(sc->type)) {
515                 return subsurface_scatter_disk(kg,
516                                                ss_isect,
517                                                sd,
518                                                sc,
519                                                lcg_state,
520                                                bssrdf_u,
521                                                bssrdf_v,
522                                                all);
523         }
524         else {
525                 return subsurface_random_walk(kg,
526                                               ss_isect,
527                                               sd,
528                                               state,
529                                               sc,
530                                               bssrdf_u,
531                                               bssrdf_v);
532         }
533 }
534
535 CCL_NAMESPACE_END
536