Cysles: Avoid having ShaderData on the stack
[blender.git] / intern / cycles / kernel / kernel_subsurface.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 /* NEW BSSRDF: See "BSSRDF Importance Sampling", SIGGRAPH 2013 */
20
21 /* TODO:
22  * - test using power heuristic for combing bssrdfs
23  * - try to reduce one sample model variance
24  */
25
26 #define BSSRDF_MULTI_EVAL
27
28 ccl_device ShaderClosure *subsurface_scatter_pick_closure(KernelGlobals *kg, ShaderData *sd, float *probability)
29 {
30         /* sum sample weights of bssrdf and bsdf */
31         float bsdf_sum = 0.0f;
32         float bssrdf_sum = 0.0f;
33
34         for(int i = 0; i < sd->num_closure; i++) {
35                 ShaderClosure *sc = &sd->closure[i];
36                 
37                 if(CLOSURE_IS_BSDF(sc->type))
38                         bsdf_sum += sc->sample_weight;
39                 else if(CLOSURE_IS_BSSRDF(sc->type))
40                         bssrdf_sum += sc->sample_weight;
41         }
42
43         /* use bsdf or bssrdf? */
44         float r = sd->randb_closure*(bsdf_sum + bssrdf_sum);
45
46         if(r < bsdf_sum) {
47                 /* use bsdf, and adjust randb so we can reuse it for picking a bsdf */
48                 sd->randb_closure = r/bsdf_sum;
49                 *probability = (bsdf_sum > 0.0f)? (bsdf_sum + bssrdf_sum)/bsdf_sum: 1.0f;
50                 return NULL;
51         }
52
53         /* use bssrdf */
54         r -= bsdf_sum;
55
56         float sum = 0.0f;
57
58         for(int i = 0; i < sd->num_closure; i++) {
59                 ShaderClosure *sc = &sd->closure[i];
60                 
61                 if(CLOSURE_IS_BSSRDF(sc->type)) {
62                         sum += sc->sample_weight;
63
64                         if(r <= sum) {
65                                 sd->randb_closure = (r - (sum - sc->sample_weight))/sc->sample_weight;
66
67 #ifdef BSSRDF_MULTI_EVAL
68                                 *probability = (bssrdf_sum > 0.0f)? (bsdf_sum + bssrdf_sum)/bssrdf_sum: 1.0f;
69 #else
70                                 *probability = (bssrdf_sum > 0.0f)? (bsdf_sum + bssrdf_sum)/sc->sample_weight: 1.0f;
71 #endif
72                                 return sc;
73                         }
74                 }
75         }
76
77         /* should never happen */
78         sd->randb_closure = 0.0f;
79         *probability = 1.0f;
80         return NULL;
81 }
82
83 ccl_device float3 subsurface_scatter_eval(ShaderData *sd, ShaderClosure *sc, float disk_r, float r, bool all)
84 {
85 #ifdef BSSRDF_MULTI_EVAL
86         /* this is the veach one-sample model with balance heuristic, some pdf
87          * factors drop out when using balance heuristic weighting */
88         float3 eval_sum = make_float3(0.0f, 0.0f, 0.0f);
89         float pdf_sum = 0.0f;
90         float sample_weight_sum = 0.0f;
91         int num_bssrdf = 0;
92
93         for(int i = 0; i < sd->num_closure; i++) {
94                 sc = &sd->closure[i];
95                 
96                 if(CLOSURE_IS_BSSRDF(sc->type)) {
97                         float sample_weight = (all)? 1.0f: sc->sample_weight;
98                         sample_weight_sum += sample_weight;
99                 }
100         }
101
102         float sample_weight_inv = 1.0f/sample_weight_sum;
103
104         //printf("num closures %d\n", sd->num_closure);
105
106         for(int i = 0; i < sd->num_closure; i++) {
107                 sc = &sd->closure[i];
108                 
109                 if(CLOSURE_IS_BSSRDF(sc->type)) {
110                         /* in case of branched path integrate we sample all bssrdf's once,
111                          * for path trace we pick one, so adjust pdf for that */
112                         float sample_weight = (all)? 1.0f: sc->sample_weight * sample_weight_inv;
113
114                         /* compute pdf */
115                         float pdf = bssrdf_pdf(sc, r);
116                         float disk_pdf = bssrdf_pdf(sc, disk_r);
117
118                         /* TODO power heuristic is not working correct here */
119                         eval_sum += sc->weight*pdf; //*sample_weight*disk_pdf;
120                         pdf_sum += sample_weight*disk_pdf; //*sample_weight*disk_pdf;
121
122                         num_bssrdf++;
123                 }
124         }
125
126         return (pdf_sum > 0.0f)? eval_sum / pdf_sum : make_float3(0.0f, 0.0f, 0.0f);
127 #else
128         float pdf = bssrdf_pdf(pick_sc, r);
129         float disk_pdf = bssrdf_pdf(pick_sc, disk_r);
130
131         return pick_sc->weight * pdf / disk_pdf;
132 #endif
133 }
134
135 /* replace closures with a single diffuse bsdf closure after scatter step */
136 ccl_device void subsurface_scatter_setup_diffuse_bsdf(ShaderData *sd, float3 weight, bool hit, float3 N)
137 {
138         sd->flag &= ~SD_CLOSURE_FLAGS;
139         sd->randb_closure = 0.0f;
140
141         if(hit) {
142                 ShaderClosure *sc = &sd->closure[0];
143                 sd->num_closure = 1;
144
145                 sc->weight = weight;
146                 sc->sample_weight = 1.0f;
147                 sc->data0 = 0.0f;
148                 sc->data1 = 0.0f;
149                 sc->N = N;
150                 sd->flag |= bsdf_diffuse_setup(sc);
151
152                 /* replace CLOSURE_BSDF_DIFFUSE_ID with this special ID so render passes
153                  * can recognize it as not being a regular diffuse closure */
154                 sc->type = CLOSURE_BSDF_BSSRDF_ID;
155         }
156         else
157                 sd->num_closure = 0;
158 }
159
160 /* optionally do blurring of color and/or bump mapping, at the cost of a shader evaluation */
161 ccl_device float3 subsurface_color_pow(float3 color, float exponent)
162 {
163         color = max(color, make_float3(0.0f, 0.0f, 0.0f));
164
165         if(exponent == 1.0f) {
166                 /* nothing to do */
167         }
168         else if(exponent == 0.5f) {
169                 color.x = sqrtf(color.x);
170                 color.y = sqrtf(color.y);
171                 color.z = sqrtf(color.z);
172         }
173         else {
174                 color.x = powf(color.x, exponent);
175                 color.y = powf(color.y, exponent);
176                 color.z = powf(color.z, exponent);
177         }
178
179         return color;
180 }
181
182 ccl_device void subsurface_color_bump_blur(KernelGlobals *kg,
183                                            ShaderData *sd,
184                                            int state_flag,
185                                            float3 *eval,
186                                            float3 *N)
187 {
188         /* average color and texture blur at outgoing point */
189         float texture_blur;
190         float3 out_color = shader_bssrdf_sum(sd, NULL, &texture_blur);
191
192         /* do we have bump mapping? */
193         bool bump = (sd->flag & SD_HAS_BSSRDF_BUMP) != 0;
194
195         if(bump || texture_blur > 0.0f) {
196                 /* average color and normal at incoming point */
197                 shader_eval_surface(kg, sd, 0.0f, state_flag, SHADER_CONTEXT_SSS);
198                 float3 in_color = shader_bssrdf_sum(sd, (bump)? N: NULL, NULL);
199
200                 /* we simply divide out the average color and multiply with the average
201                  * of the other one. we could try to do this per closure but it's quite
202                  * tricky to match closures between shader evaluations, their number and
203                  * order may change, this is simpler */
204                 if(texture_blur > 0.0f) {
205                         out_color = subsurface_color_pow(out_color, texture_blur);
206                         in_color = subsurface_color_pow(in_color, texture_blur);
207
208                         *eval *= safe_divide_color(in_color, out_color);
209                 }
210         }
211 }
212
213 /* Subsurface scattering step, from a point on the surface to other
214  * nearby points on the same object.
215  */
216 ccl_device int subsurface_scatter_multi_intersect(
217         KernelGlobals *kg,
218         SubsurfaceIntersection* ss_isect,
219         ShaderData *sd,
220         ShaderClosure *sc,
221         uint *lcg_state,
222         float disk_u,
223         float disk_v,
224         bool all)
225 {
226         /* pick random axis in local frame and point on disk */
227         float3 disk_N, disk_T, disk_B;
228         float pick_pdf_N, pick_pdf_T, pick_pdf_B;
229
230         disk_N = sd->Ng;
231         make_orthonormals(disk_N, &disk_T, &disk_B);
232
233         /* reusing variable for picking the closure gives a bit nicer stratification
234          * for path tracer, for branched we do all closures so it doesn't help */
235         float axisu = (all)? disk_u: sd->randb_closure;
236
237         if(axisu < 0.5f) {
238                 pick_pdf_N = 0.5f;
239                 pick_pdf_T = 0.25f;
240                 pick_pdf_B = 0.25f;
241                 if(all)
242                         disk_u *= 2.0f;
243         }
244         else if(axisu < 0.75f) {
245                 float3 tmp = disk_N;
246                 disk_N = disk_T;
247                 disk_T = tmp;
248                 pick_pdf_N = 0.25f;
249                 pick_pdf_T = 0.5f;
250                 pick_pdf_B = 0.25f;
251                 if(all)
252                         disk_u = (disk_u - 0.5f)*4.0f;
253         }
254         else {
255                 float3 tmp = disk_N;
256                 disk_N = disk_B;
257                 disk_B = tmp;
258                 pick_pdf_N = 0.25f;
259                 pick_pdf_T = 0.25f;
260                 pick_pdf_B = 0.5f;
261                 if(all)
262                         disk_u = (disk_u - 0.75f)*4.0f;
263         }
264
265         /* sample point on disk */
266         float phi = M_2PI_F * disk_u;
267         float disk_r = disk_v;
268         float disk_height;
269
270         bssrdf_sample(sc, disk_r, &disk_r, &disk_height);
271
272         float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
273
274         /* create ray */
275         Ray *ray = &ss_isect->ray;
276         ray->P = sd->P + disk_N*disk_height + disk_P;
277         ray->D = -disk_N;
278         ray->t = 2.0f*disk_height;
279         ray->dP = sd->dP;
280         ray->dD = differential3_zero();
281         ray->time = sd->time;
282
283         /* intersect with the same object. if multiple intersections are found it
284          * will use at most BSSRDF_MAX_HITS hits, a random subset of all hits */
285         scene_intersect_subsurface(kg,
286                                    ray,
287                                    ss_isect,
288                                    sd->object,
289                                    lcg_state,
290                                    BSSRDF_MAX_HITS);
291         /* TODO(sergey): Investigate whether scene_intersect_subsurface() could
292          * indeed return more than BSSRDF_MAX_HITS hits.
293          */
294         int num_eval_hits = min(ss_isect->num_hits, BSSRDF_MAX_HITS);
295
296         for(int hit = 0; hit < num_eval_hits; hit++) {
297                 /* Quickly retrieve P and Ng without setting up ShaderData. */
298                 float3 hit_P = ray->P + ss_isect->hits[hit].t * ray->D;
299                 float3 hit_Ng = ss_isect->Ng[hit];
300                 if(ss_isect->hits[hit].object != OBJECT_NONE) {
301                         object_normal_transform(kg, sd, &hit_Ng);
302                 }
303
304                 /* probability densities for local frame axes */
305                 float pdf_N = pick_pdf_N * fabsf(dot(disk_N, hit_Ng));
306                 float pdf_T = pick_pdf_T * fabsf(dot(disk_T, hit_Ng));
307                 float pdf_B = pick_pdf_B * fabsf(dot(disk_B, hit_Ng));
308
309                 /* multiple importance sample between 3 axes, power heuristic
310                  * found to be slightly better than balance heuristic */
311                 float mis_weight = power_heuristic_3(pdf_N, pdf_T, pdf_B);
312
313                 /* real distance to sampled point */
314                 float r = len(hit_P - sd->P);
315
316                 /* evaluate */
317                 float w = mis_weight / pdf_N;
318                 if(ss_isect->num_hits > BSSRDF_MAX_HITS)
319                         w *= ss_isect->num_hits/(float)BSSRDF_MAX_HITS;
320                 float3 eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w;
321
322                 ss_isect->weight[hit] = eval;
323         }
324
325         return num_eval_hits;
326 }
327
328 ccl_device void subsurface_scatter_multi_setup(KernelGlobals *kg,
329                                                SubsurfaceIntersection* ss_isect,
330                                                int hit,
331                                                ShaderData *sd,
332                                                int state_flag,
333                                                ShaderClosure *sc,
334                                                bool all)
335 {
336         /* Setup new shading point. */
337         shader_setup_from_subsurface(kg, sd, &ss_isect->hits[hit], &ss_isect->ray);
338
339         /* Optionally blur colors and bump mapping. */
340         float3 weight = ss_isect->weight[hit];
341         float3 N = sd->N;
342         subsurface_color_bump_blur(kg, sd, state_flag, &weight, &N);
343
344         /* Setup diffuse BSDF. */
345         subsurface_scatter_setup_diffuse_bsdf(sd, weight, true, N);
346 }
347
348 /* subsurface scattering step, from a point on the surface to another nearby point on the same object */
349 ccl_device void subsurface_scatter_step(KernelGlobals *kg, ShaderData *sd,
350         int state_flag, ShaderClosure *sc, uint *lcg_state, float disk_u, float disk_v, bool all)
351 {
352         float3 eval = make_float3(0.0f, 0.0f, 0.0f);
353
354         /* pick random axis in local frame and point on disk */
355         float3 disk_N, disk_T, disk_B;
356         float pick_pdf_N, pick_pdf_T, pick_pdf_B;
357
358         disk_N = sd->Ng;
359         make_orthonormals(disk_N, &disk_T, &disk_B);
360
361         if(sd->randb_closure < 0.5f) {
362                 pick_pdf_N = 0.5f;
363                 pick_pdf_T = 0.25f;
364                 pick_pdf_B = 0.25f;
365         }
366         else if(sd->randb_closure < 0.75f) {
367                 float3 tmp = disk_N;
368                 disk_N = disk_T;
369                 disk_T = tmp;
370                 pick_pdf_N = 0.25f;
371                 pick_pdf_T = 0.5f;
372                 pick_pdf_B = 0.25f;
373         }
374         else {
375                 float3 tmp = disk_N;
376                 disk_N = disk_B;
377                 disk_B = tmp;
378                 pick_pdf_N = 0.25f;
379                 pick_pdf_T = 0.25f;
380                 pick_pdf_B = 0.5f;
381         }
382
383         /* sample point on disk */
384         float phi = M_2PI_F * disk_u;
385         float disk_r = disk_v;
386         float disk_height;
387
388         bssrdf_sample(sc, disk_r, &disk_r, &disk_height);
389
390         float3 disk_P = (disk_r*cosf(phi)) * disk_T + (disk_r*sinf(phi)) * disk_B;
391
392         /* create ray */
393         Ray ray;
394         ray.P = sd->P + disk_N*disk_height + disk_P;
395         ray.D = -disk_N;
396         ray.t = 2.0f*disk_height;
397         ray.dP = sd->dP;
398         ray.dD = differential3_zero();
399         ray.time = sd->time;
400
401         /* intersect with the same object. if multiple intersections are
402          * found it will randomly pick one of them */
403         SubsurfaceIntersection ss_isect;
404         scene_intersect_subsurface(kg, &ray, &ss_isect, sd->object, lcg_state, 1);
405
406         /* evaluate bssrdf */
407         if(ss_isect.num_hits > 0) {
408                 float3 origP = sd->P;
409
410                 /* setup new shading point */
411                 shader_setup_from_subsurface(kg, sd, &ss_isect.hits[0], &ray);
412
413                 /* probability densities for local frame axes */
414                 float pdf_N = pick_pdf_N * fabsf(dot(disk_N, sd->Ng));
415                 float pdf_T = pick_pdf_T * fabsf(dot(disk_T, sd->Ng));
416                 float pdf_B = pick_pdf_B * fabsf(dot(disk_B, sd->Ng));
417
418                 /* multiple importance sample between 3 axes, power heuristic
419                  * found to be slightly better than balance heuristic */
420                 float mis_weight = power_heuristic_3(pdf_N, pdf_T, pdf_B);
421
422                 /* real distance to sampled point */
423                 float r = len(sd->P - origP);
424
425                 /* evaluate */
426                 float w = (mis_weight * ss_isect.num_hits) / pdf_N;
427                 eval = subsurface_scatter_eval(sd, sc, disk_r, r, all) * w;
428         }
429
430         /* optionally blur colors and bump mapping */
431         float3 N = sd->N;
432         subsurface_color_bump_blur(kg, sd, state_flag, &eval, &N);
433
434         /* setup diffuse bsdf */
435         subsurface_scatter_setup_diffuse_bsdf(sd, eval, (ss_isect.num_hits > 0), N);
436 }
437
438 CCL_NAMESPACE_END
439