Remove redundant check for armature mode
[blender.git] / intern / cycles / kernel / kernel_subsurface.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 /* BSSRDF using disk based importance sampling.
20  *
21  * BSSRDF Importance Sampling, SIGGRAPH 2013
22  * http://library.imageworks.com/pdfs/imageworks-library-BSSRDF-sampling.pdf
23  *
24  */
25
26 ccl_device_inline float3 subsurface_scatter_eval(ShaderData *sd,
27                                                  const ShaderClosure *sc,
28                                                  float disk_r,
29                                                  float r,
30                                                  bool all)
31 {
32         /* this is the veach one-sample model with balance heuristic, some pdf
33          * factors drop out when using balance heuristic weighting */
34         float3 eval_sum = make_float3(0.0f, 0.0f, 0.0f);
35         float pdf_sum = 0.0f;
36         float sample_weight_inv = 0.0f;
37
38         if(!all) {
39                 float sample_weight_sum = 0.0f;
40
41                 for(int i = 0; i < sd->num_closure; i++) {
42                         sc = &sd->closure[i];
43
44                         if(CLOSURE_IS_BSSRDF(sc->type)) {
45                                 sample_weight_sum += sc->sample_weight;
46                         }
47                 }
48
49                 sample_weight_inv = 1.0f/sample_weight_sum;
50         }
51
52         for(int i = 0; i < sd->num_closure; i++) {
53                 sc = &sd->closure[i];
54                 
55                 if(CLOSURE_IS_BSSRDF(sc->type)) {
56                         /* in case of branched path integrate we sample all bssrdf's once,
57                          * for path trace we pick one, so adjust pdf for that */
58                         float sample_weight = (all)? 1.0f: sc->sample_weight * sample_weight_inv;
59
60                         /* compute pdf */
61                         float3 eval = bssrdf_eval(sc, r);
62                         float pdf = bssrdf_pdf(sc, disk_r);
63
64                         eval_sum += sc->weight * eval;
65                         pdf_sum += sample_weight * pdf;
66                 }
67         }
68
69         return (pdf_sum > 0.0f)? eval_sum / pdf_sum : make_float3(0.0f, 0.0f, 0.0f);
70 }
71
72 /* replace closures with a single diffuse bsdf closure after scatter step */
73 ccl_device void subsurface_scatter_setup_diffuse_bsdf(KernelGlobals *kg, ShaderData *sd, const ShaderClosure *sc, float3 weight, bool hit, float3 N)
74 {
75         sd->flag &= ~SD_CLOSURE_FLAGS;
76         sd->num_closure = 0;
77         sd->num_closure_left = kernel_data.integrator.max_closures;
78
79         if(hit) {
80                 Bssrdf *bssrdf = (Bssrdf *)sc;
81 #ifdef __PRINCIPLED__
82                 if(bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID) {
83                         PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf*)bsdf_alloc(sd, sizeof(PrincipledDiffuseBsdf), weight);
84
85                         if(bsdf) {
86                                 bsdf->N = N;
87                                 bsdf->roughness = bssrdf->roughness;
88                                 sd->flag |= bsdf_principled_diffuse_setup(bsdf);
89
90                                 /* replace CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID with this special ID so render passes
91                                  * can recognize it as not being a regular Disney principled diffuse closure */
92                                 bsdf->type = CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID;
93                         }
94                 }
95                 else if(CLOSURE_IS_BSDF_BSSRDF(bssrdf->type) ||
96                         CLOSURE_IS_BSSRDF(bssrdf->type))
97 #endif  /* __PRINCIPLED__ */
98                 {
99                         DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight);
100
101                         if(bsdf) {
102                                 bsdf->N = N;
103                                 sd->flag |= bsdf_diffuse_setup(bsdf);
104
105                                 /* replace CLOSURE_BSDF_DIFFUSE_ID with this special ID so render passes
106                                  * can recognize it as not being a regular diffuse closure */
107                                 bsdf->type = CLOSURE_BSDF_BSSRDF_ID;
108                         }
109                 }
110         }
111 }
112
113 /* optionally do blurring of color and/or bump mapping, at the cost of a shader evaluation */
114 ccl_device float3 subsurface_color_pow(float3 color, float exponent)
115 {
116         color = max(color, make_float3(0.0f, 0.0f, 0.0f));
117
118         if(exponent == 1.0f) {
119                 /* nothing to do */
120         }
121         else if(exponent == 0.5f) {
122                 color.x = sqrtf(color.x);
123                 color.y = sqrtf(color.y);
124                 color.z = sqrtf(color.z);
125         }
126         else {
127                 color.x = powf(color.x, exponent);
128                 color.y = powf(color.y, exponent);
129                 color.z = powf(color.z, exponent);
130         }
131
132         return color;
133 }
134
135 ccl_device void subsurface_color_bump_blur(KernelGlobals *kg,
136                                            ShaderData *sd,
137                                            ccl_addr_space PathState *state,
138                                            int state_flag,
139                                            float3 *eval,
140                                            float3 *N)
141 {
142         /* average color and texture blur at outgoing point */
143         float texture_blur;
144         float3 out_color = shader_bssrdf_sum(sd, NULL, &texture_blur);
145
146         /* do we have bump mapping? */
147         bool bump = (sd->flag & SD_HAS_BSSRDF_BUMP) != 0;
148
149         if(bump || texture_blur > 0.0f) {
150                 /* average color and normal at incoming point */
151                 shader_eval_surface(kg, sd, state, state_flag, kernel_data.integrator.max_closures);
152                 float3 in_color = shader_bssrdf_sum(sd, (bump)? N: NULL, NULL);
153
154                 /* we simply divide out the average color and multiply with the average
155                  * of the other one. we could try to do this per closure but it's quite
156                  * tricky to match closures between shader evaluations, their number and
157                  * order may change, this is simpler */
158                 if(texture_blur > 0.0f) {
159                         out_color = subsurface_color_pow(out_color, texture_blur);
160                         in_color = subsurface_color_pow(in_color, texture_blur);
161
162                         *eval *= safe_divide_color(in_color, out_color);
163                 }
164         }
165 }
166
167 /* Subsurface scattering step, from a point on the surface to other
168  * nearby points on the same object.
169  */
170 ccl_device_inline int subsurface_scatter_multi_intersect(
171         KernelGlobals *kg,
172         LocalIntersection *ss_isect,
173         ShaderData *sd,
174         const ShaderClosure *sc,
175         uint *lcg_state,
176         float disk_u,
177         float disk_v,
178         bool all)
179 {
180         /* pick random axis in local frame and point on disk */
181         float3 disk_N, disk_T, disk_B;
182         float pick_pdf_N, pick_pdf_T, pick_pdf_B;
183
184         disk_N = sd->Ng;
185         make_orthonormals(disk_N, &disk_T, &disk_B);
186
187         if(disk_v < 0.5f) {
188                 pick_pdf_N = 0.5f;
189                 pick_pdf_T = 0.25f;
190                 pick_pdf_B = 0.25f;
191                 disk_v *= 2.0f;
192         }
193         else if(disk_v < 0.75f) {
194                 float3 tmp = disk_N;
195                 disk_N = disk_T;
196                 disk_T = tmp;
197                 pick_pdf_N = 0.25f;
198                 pick_pdf_T = 0.5f;
199                 pick_pdf_B = 0.25f;
200                 disk_v = (disk_v - 0.5f)*4.0f;
201         }
202         else {
203                 float3 tmp = disk_N;
204                 disk_N = disk_B;
205                 disk_B = tmp;
206                 pick_pdf_N = 0.25f;
207                 pick_pdf_T = 0.25f;
208                 pick_pdf_B = 0.5f;
209                 disk_v = (disk_v - 0.75f)*4.0f;
210         }
211
212         /* sample point on disk */
213         float phi = M_2PI_F * disk_v;
214         float disk_height, disk_r;
215
216         bssrdf_sample(sc, disk_u, &disk_r, &disk_height);
217
218         float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
219
220         /* create ray */
221 #ifdef __SPLIT_KERNEL__
222         Ray ray_object = ss_isect->ray;
223         Ray *ray = &ray_object;
224 #else
225         Ray *ray = &ss_isect->ray;
226 #endif
227         ray->P = sd->P + disk_N*disk_height + disk_P;
228         ray->D = -disk_N;
229         ray->t = 2.0f*disk_height;
230         ray->dP = sd->dP;
231         ray->dD = differential3_zero();
232         ray->time = sd->time;
233
234         /* intersect with the same object. if multiple intersections are found it
235          * will use at most BSSRDF_MAX_HITS hits, a random subset of all hits */
236         scene_intersect_local(kg,
237                               *ray,
238                               ss_isect,
239                               sd->object,
240                               lcg_state,
241                               BSSRDF_MAX_HITS);
242         int num_eval_hits = min(ss_isect->num_hits, BSSRDF_MAX_HITS);
243
244         for(int hit = 0; hit < num_eval_hits; hit++) {
245                 /* Quickly retrieve P and Ng without setting up ShaderData. */
246                 float3 hit_P;
247                 if(sd->type & PRIMITIVE_TRIANGLE) {
248                         hit_P = triangle_refine_local(kg,
249                                                       sd,
250                                                       &ss_isect->hits[hit],
251                                                       ray);
252                 }
253 #ifdef __OBJECT_MOTION__
254                 else  if(sd->type & PRIMITIVE_MOTION_TRIANGLE) {
255                         float3 verts[3];
256                         motion_triangle_vertices(
257                                 kg,
258                                 sd->object,
259                                 kernel_tex_fetch(__prim_index, ss_isect->hits[hit].prim),
260                                 sd->time,
261                                 verts);
262                         hit_P = motion_triangle_refine_local(kg,
263                                                              sd,
264                                                              &ss_isect->hits[hit],
265                                                              ray,
266                                                              verts);
267                 }
268 #endif  /* __OBJECT_MOTION__ */
269                 else {
270                         ss_isect->weight[hit] = make_float3(0.0f, 0.0f, 0.0f);
271                         continue;
272                 }
273
274                 float3 hit_Ng = ss_isect->Ng[hit];
275                 if(ss_isect->hits[hit].object != OBJECT_NONE) {
276                         object_normal_transform(kg, sd, &hit_Ng);
277                 }
278
279                 /* Probability densities for local frame axes. */
280                 float pdf_N = pick_pdf_N * fabsf(dot(disk_N, hit_Ng));
281                 float pdf_T = pick_pdf_T * fabsf(dot(disk_T, hit_Ng));
282                 float pdf_B = pick_pdf_B * fabsf(dot(disk_B, hit_Ng));
283
284                 /* Multiple importance sample between 3 axes, power heuristic
285                  * found to be slightly better than balance heuristic. pdf_N
286                  * in the MIS weight and denominator cancelled out. */
287                 float w = pdf_N / (sqr(pdf_N) + sqr(pdf_T) + sqr(pdf_B));
288                 if(ss_isect->num_hits > BSSRDF_MAX_HITS) {
289                         w *= ss_isect->num_hits/(float)BSSRDF_MAX_HITS;
290                 }
291
292                 /* Real distance to sampled point. */
293                 float r = len(hit_P - sd->P);
294
295                 /* Evaluate profiles. */
296                 float3 eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w;
297
298                 ss_isect->weight[hit] = eval;
299         }
300
301 #ifdef __SPLIT_KERNEL__
302         ss_isect->ray = *ray;
303 #endif
304
305         return num_eval_hits;
306 }
307
308 ccl_device_noinline void subsurface_scatter_multi_setup(
309         KernelGlobals *kg,
310         LocalIntersection* ss_isect,
311         int hit,
312         ShaderData *sd,
313         ccl_addr_space PathState *state,
314         int state_flag,
315         const ShaderClosure *sc,
316         bool all)
317 {
318 #ifdef __SPLIT_KERNEL__
319         Ray ray_object = ss_isect->ray;
320         Ray *ray = &ray_object;
321 #else
322         Ray *ray = &ss_isect->ray;
323 #endif
324
325         /* Workaround for AMD GPU OpenCL compiler. Most probably cache bypass issue. */
326 #if defined(__SPLIT_KERNEL__) && defined(__KERNEL_OPENCL_AMD__) && defined(__KERNEL_GPU__)
327         kernel_split_params.dummy_sd_flag = sd->flag;
328 #endif
329
330         /* Setup new shading point. */
331         shader_setup_from_subsurface(kg, sd, &ss_isect->hits[hit], ray);
332
333         /* Optionally blur colors and bump mapping. */
334         float3 weight = ss_isect->weight[hit];
335         float3 N = sd->N;
336         subsurface_color_bump_blur(kg, sd, state, state_flag, &weight, &N);
337
338         /* Setup diffuse BSDF. */
339         subsurface_scatter_setup_diffuse_bsdf(kg, sd, sc, weight, true, N);
340 }
341
342 /* subsurface scattering step, from a point on the surface to another nearby point on the same object */
343 ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state,
344         int state_flag, const ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all)
345 {
346         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
347
348         /* pick random axis in local frame and point on disk */
349         float3 disk_N, disk_T, disk_B;
350         float pick_pdf_N, pick_pdf_T, pick_pdf_B;
351
352         disk_N = sd->Ng;
353         make_orthonormals(disk_N, &disk_T, &disk_B);
354
355         if(disk_v < 0.5f) {
356                 pick_pdf_N = 0.5f;
357                 pick_pdf_T = 0.25f;
358                 pick_pdf_B = 0.25f;
359                 disk_v *= 2.0f;
360         }
361         else if(disk_v < 0.75f) {
362                 float3 tmp = disk_N;
363                 disk_N = disk_T;
364                 disk_T = tmp;
365                 pick_pdf_N = 0.25f;
366                 pick_pdf_T = 0.5f;
367                 pick_pdf_B = 0.25f;
368                 disk_v = (disk_v - 0.5f)*4.0f;
369         }
370         else {
371                 float3 tmp = disk_N;
372                 disk_N = disk_B;
373                 disk_B = tmp;
374                 pick_pdf_N = 0.25f;
375                 pick_pdf_T = 0.25f;
376                 pick_pdf_B = 0.5f;
377                 disk_v = (disk_v - 0.75f)*4.0f;
378         }
379
380         /* sample point on disk */
381         float phi = M_2PI_F * disk_v;
382         float disk_height, disk_r;
383
384         bssrdf_sample(sc, disk_u, &disk_r, &disk_height);
385
386         float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
387
388         /* create ray */
389         Ray ray;
390         ray.P = sd->P + disk_N*disk_height + disk_P;
391         ray.D = -disk_N;
392         ray.t = 2.0f*disk_height;
393         ray.dP = sd->dP;
394         ray.dD = differential3_zero();
395         ray.time = sd->time;
396
397         /* intersect with the same object. if multiple intersections are
398          * found it will randomly pick one of them */
399         LocalIntersection ss_isect;
400         scene_intersect_local(kg, ray, &ss_isect, sd->object, lcg_state, 1);
401
402         /* evaluate bssrdf */
403         if(ss_isect.num_hits > 0) {
404                 float3 origP = sd->P;
405
406                 /* Workaround for AMD GPU OpenCL compiler. Most probably cache bypass issue. */
407 #if defined(__SPLIT_KERNEL__) && defined(__KERNEL_OPENCL_AMD__) && defined(__KERNEL_GPU__)
408                 kernel_split_params.dummy_sd_flag = sd->flag;
409 #endif
410                 /* setup new shading point */
411                 shader_setup_from_subsurface(kg, sd, &ss_isect.hits[0], &ray);
412
413                 /* Probability densities for local frame axes. */
414                 float pdf_N = pick_pdf_N * fabsf(dot(disk_N, sd->Ng));
415                 float pdf_T = pick_pdf_T * fabsf(dot(disk_T, sd->Ng));
416                 float pdf_B = pick_pdf_B * fabsf(dot(disk_B, sd->Ng));
417
418                 /* Multiple importance sample between 3 axes, power heuristic
419                  * found to be slightly better than balance heuristic. pdf_N
420                  * in the MIS weight and denominator cancelled out. */
421                 float w = pdf_N / (sqr(pdf_N) + sqr(pdf_T) + sqr(pdf_B));
422                 w *= ss_isect.num_hits;
423
424                 /* Real distance to sampled point. */
425                 float r = len(sd->P - origP);
426
427                 /* Evaluate profiles. */
428                 eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w;
429         }
430
431         /* optionally blur colors and bump mapping */
432         float3 N = sd->N;
433         subsurface_color_bump_blur(kg, sd, state, state_flag, &eval, &N);
434
435         /* setup diffuse bsdf */
436         subsurface_scatter_setup_diffuse_bsdf(kg, sd, sc, eval, (ss_isect.num_hits > 0), N);
437 }
438
439 CCL_NAMESPACE_END
440