Merge branch 'master' into blender2.8
[blender.git] / intern / cycles / kernel / kernel_light.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 /* Light Sample result */
20
21 typedef struct LightSample {
22         float3 P;                       /* position on light, or direction for distant light */
23         float3 Ng;                      /* normal on light */
24         float3 D;                       /* direction from shading point to light */
25         float t;                        /* distance to light (FLT_MAX for distant light) */
26         float u, v;                     /* parametric coordinate on primitive */
27         float pdf;                      /* light sampling probability density function */
28         float eval_fac;         /* intensity multiplier */
29         int object;                     /* object id for triangle/curve lights */
30         int prim;                       /* primitive id for triangle/curve lights */
31         int shader;                     /* shader id */
32         int lamp;                       /* lamp id */
33         LightType type;         /* type of light */
34 } LightSample;
35
36 /* Area light sampling */
37
38 /* Uses the following paper:
39  *
40  * Carlos Urena et al.
41  * An Area-Preserving Parametrization for Spherical Rectangles.
42  *
43  * https://www.solidangle.com/research/egsr2013_spherical_rectangle.pdf
44  *
45  * Note: light_p is modified when sample_coord is true.
46  */
47 ccl_device_inline float rect_light_sample(float3 P,
48                                           float3 *light_p,
49                                           float3 axisu, float3 axisv,
50                                           float randu, float randv,
51                                           bool sample_coord)
52 {
53         /* In our name system we're using P for the center,
54          * which is o in the paper.
55          */
56
57         float3 corner = *light_p - axisu * 0.5f - axisv * 0.5f;
58         float axisu_len, axisv_len;
59         /* Compute local reference system R. */
60         float3 x = normalize_len(axisu, &axisu_len);
61         float3 y = normalize_len(axisv, &axisv_len);
62         float3 z = cross(x, y);
63         /* Compute rectangle coords in local reference system. */
64         float3 dir = corner - P;
65         float z0 = dot(dir, z);
66         /* Flip 'z' to make it point against Q. */
67         if(z0 > 0.0f) {
68                 z *= -1.0f;
69                 z0 *= -1.0f;
70         }
71         float x0 = dot(dir, x);
72         float y0 = dot(dir, y);
73         float x1 = x0 + axisu_len;
74         float y1 = y0 + axisv_len;
75         /* Compute internal angles (gamma_i). */
76         float4 diff = make_float4(x0, y1, x1, y0) - make_float4(x1, y0, x0, y1);
77         float4 nz = make_float4(y0, x1, y1, x0) * diff;
78         nz = nz / sqrt(sqr(z0 * diff) + sqr(nz));
79         float g0 = safe_acosf(-nz.x * nz.y);
80         float g1 = safe_acosf(-nz.y * nz.z);
81         float g2 = safe_acosf(-nz.z * nz.w);
82         float g3 = safe_acosf(-nz.w * nz.x);
83         /* Compute predefined constants. */
84         float b0 = nz.x;
85         float b1 = nz.z;
86         float b0sq = b0 * b0;
87         float k = M_2PI_F - g2 - g3;
88         /* Compute solid angle from internal angles. */
89         float S = g0 + g1 - k;
90
91         if(sample_coord) {
92                 /* Compute cu. */
93                 float au = randu * S + k;
94                 float fu = (cosf(au) * b0 - b1) / sinf(au);
95                 float cu = 1.0f / sqrtf(fu * fu + b0sq) * (fu > 0.0f ? 1.0f : -1.0f);
96                 cu = clamp(cu, -1.0f, 1.0f);
97                 /* Compute xu. */
98                 float xu = -(cu * z0) / max(sqrtf(1.0f - cu * cu), 1e-7f);
99                 xu = clamp(xu, x0, x1);
100                 /* Compute yv. */
101                 float z0sq = z0 * z0;
102                 float y0sq = y0 * y0;
103                 float y1sq = y1 * y1;
104                 float d = sqrtf(xu * xu + z0sq);
105                 float h0 = y0 / sqrtf(d * d + y0sq);
106                 float h1 = y1 / sqrtf(d * d + y1sq);
107                 float hv = h0 + randv * (h1 - h0), hv2 = hv * hv;
108                 float yv = (hv2 < 1.0f - 1e-6f) ? (hv * d) / sqrtf(1.0f - hv2) : y1;
109
110                 /* Transform (xu, yv, z0) to world coords. */
111                 *light_p = P + xu * x + yv * y + z0 * z;
112         }
113
114         /* return pdf */
115         if(S != 0.0f)
116                 return 1.0f / S;
117         else
118                 return 0.0f;
119 }
120
121 ccl_device_inline float3 ellipse_sample(float3 ru, float3 rv, float randu, float randv)
122 {
123         to_unit_disk(&randu, &randv);
124         return ru*randu + rv*randv;
125 }
126
127 ccl_device float3 disk_light_sample(float3 v, float randu, float randv)
128 {
129         float3 ru, rv;
130
131         make_orthonormals(v, &ru, &rv);
132
133         return ellipse_sample(ru, rv, randu, randv);
134 }
135
136 ccl_device float3 distant_light_sample(float3 D, float radius, float randu, float randv)
137 {
138         return normalize(D + disk_light_sample(D, randu, randv)*radius);
139 }
140
141 ccl_device float3 sphere_light_sample(float3 P, float3 center, float radius, float randu, float randv)
142 {
143         return disk_light_sample(normalize(P - center), randu, randv)*radius;
144 }
145
146 ccl_device float spot_light_attenuation(float3 dir, float spot_angle, float spot_smooth, LightSample *ls)
147 {
148         float3 I = ls->Ng;
149
150         float attenuation = dot(dir, I);
151
152         if(attenuation <= spot_angle) {
153                 attenuation = 0.0f;
154         }
155         else {
156                 float t = attenuation - spot_angle;
157
158                 if(t < spot_smooth && spot_smooth != 0.0f)
159                         attenuation *= smoothstepf(t/spot_smooth);
160         }
161
162         return attenuation;
163 }
164
165 ccl_device float lamp_light_pdf(KernelGlobals *kg, const float3 Ng, const float3 I, float t)
166 {
167         float cos_pi = dot(Ng, I);
168
169         if(cos_pi <= 0.0f)
170                 return 0.0f;
171
172         return t*t/cos_pi;
173 }
174
175 /* Background Light */
176
177 #ifdef __BACKGROUND_MIS__
178
179 /* TODO(sergey): In theory it should be all fine to use noinline for all
180  * devices, but we're so close to the release so better not screw things
181  * up for CPU at least.
182  */
183 #ifdef __KERNEL_GPU__
184 ccl_device_noinline
185 #else
186 ccl_device
187 #endif
188 float3 background_map_sample(KernelGlobals *kg, float randu, float randv, float *pdf)
189 {
190         /* for the following, the CDF values are actually a pair of floats, with the
191          * function value as X and the actual CDF as Y.  The last entry's function
192          * value is the CDF total. */
193         int res_x = kernel_data.integrator.pdf_background_res_x;
194         int res_y = kernel_data.integrator.pdf_background_res_y;
195         int cdf_width = res_x + 1;
196
197         /* this is basically std::lower_bound as used by pbrt */
198         int first = 0;
199         int count = res_y;
200
201         while(count > 0) {
202                 int step = count >> 1;
203                 int middle = first + step;
204
205                 if(kernel_tex_fetch(__light_background_marginal_cdf, middle).y < randv) {
206                         first = middle + 1;
207                         count -= step + 1;
208                 }
209                 else
210                         count = step;
211         }
212
213         int index_v = max(0, first - 1);
214         kernel_assert(index_v >= 0 && index_v < res_y);
215
216         float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v);
217         float2 cdf_next_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v + 1);
218         float2 cdf_last_v = kernel_tex_fetch(__light_background_marginal_cdf, res_y);
219
220         /* importance-sampled V direction */
221         float dv = inverse_lerp(cdf_v.y, cdf_next_v.y, randv);
222         float v = (index_v + dv) / res_y;
223
224         /* this is basically std::lower_bound as used by pbrt */
225         first = 0;
226         count = res_x;
227         while(count > 0) {
228                 int step = count >> 1;
229                 int middle = first + step;
230
231                 if(kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + middle).y < randu) {
232                         first = middle + 1;
233                         count -= step + 1;
234                 }
235                 else
236                         count = step;
237         }
238
239         int index_u = max(0, first - 1);
240         kernel_assert(index_u >= 0 && index_u < res_x);
241
242         float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + index_u);
243         float2 cdf_next_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + index_u + 1);
244         float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + res_x);
245
246         /* importance-sampled U direction */
247         float du = inverse_lerp(cdf_u.y, cdf_next_u.y, randu);
248         float u = (index_u + du) / res_x;
249
250         /* compute pdf */
251         float denom = cdf_last_u.x * cdf_last_v.x;
252         float sin_theta = sinf(M_PI_F * v);
253
254         if(sin_theta == 0.0f || denom == 0.0f)
255                 *pdf = 0.0f;
256         else
257                 *pdf = (cdf_u.x * cdf_v.x)/(M_2PI_F * M_PI_F * sin_theta * denom);
258
259         /* compute direction */
260         return equirectangular_to_direction(u, v);
261 }
262
263 /* TODO(sergey): Same as above, after the release we should consider using
264  * 'noinline' for all devices.
265  */
266 #ifdef __KERNEL_GPU__
267 ccl_device_noinline
268 #else
269 ccl_device
270 #endif
271 float background_map_pdf(KernelGlobals *kg, float3 direction)
272 {
273         float2 uv = direction_to_equirectangular(direction);
274         int res_x = kernel_data.integrator.pdf_background_res_x;
275         int res_y = kernel_data.integrator.pdf_background_res_y;
276         int cdf_width = res_x + 1;
277
278         float sin_theta = sinf(uv.y * M_PI_F);
279
280         if(sin_theta == 0.0f)
281                 return 0.0f;
282
283         int index_u = clamp(float_to_int(uv.x * res_x), 0, res_x - 1);
284         int index_v = clamp(float_to_int(uv.y * res_y), 0, res_y - 1);
285
286         /* pdfs in V direction */
287         float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + res_x);
288         float2 cdf_last_v = kernel_tex_fetch(__light_background_marginal_cdf, res_y);
289
290         float denom = cdf_last_u.x * cdf_last_v.x;
291
292         if(denom == 0.0f)
293                 return 0.0f;
294
295         /* pdfs in U direction */
296         float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_width + index_u);
297         float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v);
298
299         return (cdf_u.x * cdf_v.x)/(M_2PI_F * M_PI_F * sin_theta * denom);
300 }
301
302 ccl_device_inline bool background_portal_data_fetch_and_check_side(KernelGlobals *kg,
303                                                                    float3 P,
304                                                                    int index,
305                                                                    float3 *lightpos,
306                                                                    float3 *dir)
307 {
308         int portal = kernel_data.integrator.portal_offset + index;
309         const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
310
311         *lightpos = make_float3(klight->co[0], klight->co[1], klight->co[2]);
312         *dir = make_float3(klight->area.dir[0], klight->area.dir[1], klight->area.dir[2]);
313
314         /* Check whether portal is on the right side. */
315         if(dot(*dir, P - *lightpos) > 1e-4f)
316                 return true;
317
318         return false;
319 }
320
321 ccl_device_inline float background_portal_pdf(KernelGlobals *kg,
322                                               float3 P,
323                                               float3 direction,
324                                               int ignore_portal,
325                                               bool *is_possible)
326 {
327         float portal_pdf = 0.0f;
328
329         int num_possible = 0;
330         for(int p = 0; p < kernel_data.integrator.num_portals; p++) {
331                 if(p == ignore_portal)
332                         continue;
333
334                 float3 lightpos, dir;
335                 if(!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
336                         continue;
337
338                 /* There's a portal that could be sampled from this position. */
339                 if(is_possible) {
340                         *is_possible = true;
341                 }
342                 num_possible++;
343
344                 int portal = kernel_data.integrator.portal_offset + p;
345                 const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
346                 float3 axisu = make_float3(klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
347                 float3 axisv = make_float3(klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
348                 bool is_round = (klight->area.invarea < 0.0f);
349
350                 if(!ray_quad_intersect(P, direction, 1e-4f, FLT_MAX, lightpos, axisu, axisv, dir, NULL, NULL, NULL, NULL, is_round))
351                         continue;
352
353                 if(is_round) {
354                         float t;
355                         float3 D = normalize_len(lightpos - P, &t);
356                         portal_pdf += fabsf(klight->area.invarea) * lamp_light_pdf(kg, dir, -D, t);
357                 }
358                 else {
359                         portal_pdf += rect_light_sample(P, &lightpos, axisu, axisv, 0.0f, 0.0f, false);
360                 }
361         }
362
363         if(ignore_portal >= 0) {
364                 /* We have skipped a portal that could be sampled as well. */
365                 num_possible++;
366         }
367
368         return (num_possible > 0)? portal_pdf / num_possible: 0.0f;
369 }
370
371 ccl_device int background_num_possible_portals(KernelGlobals *kg, float3 P)
372 {
373         int num_possible_portals = 0;
374         for(int p = 0; p < kernel_data.integrator.num_portals; p++) {
375                 float3 lightpos, dir;
376                 if(background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
377                         num_possible_portals++;
378         }
379         return num_possible_portals;
380 }
381
382 ccl_device float3 background_portal_sample(KernelGlobals *kg,
383                                            float3 P,
384                                            float randu,
385                                            float randv,
386                                            int num_possible,
387                                            int *sampled_portal,
388                                            float *pdf)
389 {
390         /* Pick a portal, then re-normalize randv. */
391         randv *= num_possible;
392         int portal = (int)randv;
393         randv -= portal;
394
395         /* TODO(sergey): Some smarter way of finding portal to sample
396          * is welcome.
397          */
398         for(int p = 0; p < kernel_data.integrator.num_portals; p++) {
399                 /* Search for the sampled portal. */
400                 float3 lightpos, dir;
401                 if(!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
402                         continue;
403
404                 if(portal == 0) {
405                         /* p is the portal to be sampled. */
406                         int portal = kernel_data.integrator.portal_offset + p;
407                         const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
408                         float3 axisu = make_float3(klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
409                         float3 axisv = make_float3(klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
410                         bool is_round = (klight->area.invarea < 0.0f);
411
412                         float3 D;
413                         if(is_round) {
414                                 lightpos += ellipse_sample(axisu*0.5f, axisv*0.5f, randu, randv);
415                                 float t;
416                                 D = normalize_len(lightpos - P, &t);
417                                 *pdf = fabsf(klight->area.invarea) * lamp_light_pdf(kg, dir, -D, t);
418                         }
419                         else {
420                                 *pdf = rect_light_sample(P, &lightpos,
421                                                          axisu, axisv,
422                                                          randu, randv,
423                                                          true);
424                                 D = normalize(lightpos - P);
425                         }
426
427                         *pdf /= num_possible;
428                         *sampled_portal = p;
429                         return D;
430                 }
431
432                 portal--;
433         }
434
435         return make_float3(0.0f, 0.0f, 0.0f);
436 }
437
438 ccl_device_inline float3 background_light_sample(KernelGlobals *kg,
439                                                  float3 P,
440                                                  float randu, float randv,
441                                                  float *pdf)
442 {
443         /* Probability of sampling portals instead of the map. */
444         float portal_sampling_pdf = kernel_data.integrator.portal_pdf;
445
446         /* Check if there are portals in the scene which we can sample. */
447         if(portal_sampling_pdf > 0.0f) {
448                 int num_portals = background_num_possible_portals(kg, P);
449                 if(num_portals > 0) {
450                         if(portal_sampling_pdf == 1.0f || randu < portal_sampling_pdf) {
451                                 if(portal_sampling_pdf < 1.0f) {
452                                         randu /= portal_sampling_pdf;
453                                 }
454                                 int portal;
455                                 float3 D = background_portal_sample(kg, P, randu, randv, num_portals, &portal, pdf);
456                                 if(num_portals > 1) {
457                                         /* Ignore the chosen portal, its pdf is already included. */
458                                         *pdf += background_portal_pdf(kg, P, D, portal, NULL);
459                                 }
460                                 /* We could also have sampled the map, so combine with MIS. */
461                                 if(portal_sampling_pdf < 1.0f) {
462                                         float cdf_pdf = background_map_pdf(kg, D);
463                                         *pdf = (portal_sampling_pdf * (*pdf)
464                                              + (1.0f - portal_sampling_pdf) * cdf_pdf);
465                                 }
466                                 return D;
467                         }
468                         else {
469                                 /* Sample map, but with nonzero portal_sampling_pdf for MIS. */
470                                 randu = (randu - portal_sampling_pdf) / (1.0f - portal_sampling_pdf);
471                         }
472                 }
473                 else {
474                         /* We can't sample a portal.
475                          * Check if we can sample the map instead.
476                          */
477                         if(portal_sampling_pdf == 1.0f) {
478                                 /* Use uniform as a fallback if we can't sample the map. */
479                                 *pdf = 1.0f / M_4PI_F;
480                                 return sample_uniform_sphere(randu, randv);
481                         }
482                         else {
483                                 portal_sampling_pdf = 0.0f;
484                         }
485                 }
486         }
487
488         float3 D = background_map_sample(kg, randu, randv, pdf);
489         /* Use MIS if portals could be sampled as well. */
490         if(portal_sampling_pdf > 0.0f) {
491                 float portal_pdf = background_portal_pdf(kg, P, D, -1, NULL);
492                 *pdf = (portal_sampling_pdf * portal_pdf
493                      + (1.0f - portal_sampling_pdf) * (*pdf));
494         }
495         return D;
496 }
497
498 ccl_device float background_light_pdf(KernelGlobals *kg, float3 P, float3 direction)
499 {
500         /* Probability of sampling portals instead of the map. */
501         float portal_sampling_pdf = kernel_data.integrator.portal_pdf;
502
503         float portal_pdf = 0.0f, map_pdf = 0.0f;
504         if(portal_sampling_pdf > 0.0f) {
505                 /* Evaluate PDF of sampling this direction by portal sampling. */
506                 bool is_possible = false;
507                 portal_pdf = background_portal_pdf(kg, P, direction, -1, &is_possible) * portal_sampling_pdf;
508                 if(!is_possible) {
509                         /* Portal sampling is not possible here because all portals point to the wrong side.
510                          * If map sampling is possible, it would be used instead, otherwise fallback sampling is used. */
511                         if(portal_sampling_pdf == 1.0f) {
512                                 return kernel_data.integrator.pdf_lights / M_4PI_F;
513                         }
514                         else {
515                                 /* Force map sampling. */
516                                 portal_sampling_pdf = 0.0f;
517                         }
518                 }
519         }
520         if(portal_sampling_pdf < 1.0f) {
521                 /* Evaluate PDF of sampling this direction by map sampling. */
522                 map_pdf = background_map_pdf(kg, direction) * (1.0f - portal_sampling_pdf);
523         }
524         return (portal_pdf + map_pdf) * kernel_data.integrator.pdf_lights;
525 }
526 #endif
527
528 /* Regular Light */
529
530 ccl_device_inline bool lamp_light_sample(KernelGlobals *kg,
531                                          int lamp,
532                                          float randu, float randv,
533                                          float3 P,
534                                          LightSample *ls)
535 {
536         const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp);
537         LightType type = (LightType)klight->type;
538         ls->type = type;
539         ls->shader = klight->shader_id;
540         ls->object = PRIM_NONE;
541         ls->prim = PRIM_NONE;
542         ls->lamp = lamp;
543         ls->u = randu;
544         ls->v = randv;
545
546         if(type == LIGHT_DISTANT) {
547                 /* distant light */
548                 float3 lightD = make_float3(klight->co[0], klight->co[1], klight->co[2]);
549                 float3 D = lightD;
550                 float radius = klight->distant.radius;
551                 float invarea = klight->distant.invarea;
552
553                 if(radius > 0.0f)
554                         D = distant_light_sample(D, radius, randu, randv);
555
556                 ls->P = D;
557                 ls->Ng = D;
558                 ls->D = -D;
559                 ls->t = FLT_MAX;
560
561                 float costheta = dot(lightD, D);
562                 ls->pdf = invarea/(costheta*costheta*costheta);
563                 ls->eval_fac = ls->pdf;
564         }
565 #ifdef __BACKGROUND_MIS__
566         else if(type == LIGHT_BACKGROUND) {
567                 /* infinite area light (e.g. light dome or env light) */
568                 float3 D = -background_light_sample(kg, P, randu, randv, &ls->pdf);
569
570                 ls->P = D;
571                 ls->Ng = D;
572                 ls->D = -D;
573                 ls->t = FLT_MAX;
574                 ls->eval_fac = 1.0f;
575         }
576 #endif
577         else {
578                 ls->P = make_float3(klight->co[0], klight->co[1], klight->co[2]);
579
580                 if(type == LIGHT_POINT || type == LIGHT_SPOT) {
581                         float radius = klight->spot.radius;
582
583                         if(radius > 0.0f)
584                                 /* sphere light */
585                                 ls->P += sphere_light_sample(P, ls->P, radius, randu, randv);
586
587                         ls->D = normalize_len(ls->P - P, &ls->t);
588                         ls->Ng = -ls->D;
589
590                         float invarea = klight->spot.invarea;
591                         ls->eval_fac = (0.25f*M_1_PI_F)*invarea;
592                         ls->pdf = invarea;
593
594                         if(type == LIGHT_SPOT) {
595                                 /* spot light attenuation */
596                                 float3 dir = make_float3(klight->spot.dir[0],
597                                          klight->spot.dir[1],
598                                                          klight->spot.dir[2]);
599                                 ls->eval_fac *= spot_light_attenuation(dir,
600                                                                        klight->spot.spot_angle,
601                                                                        klight->spot.spot_smooth,
602                                                                        ls);
603                                 if(ls->eval_fac == 0.0f) {
604                                         return false;
605                                 }
606                         }
607                         float2 uv = map_to_sphere(ls->Ng);
608                         ls->u = uv.x;
609                         ls->v = uv.y;
610
611                         ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t);
612                 }
613                 else {
614                         /* area light */
615                         float3 axisu = make_float3(klight->area.axisu[0],
616                                                    klight->area.axisu[1],
617                                                    klight->area.axisu[2]);
618                         float3 axisv = make_float3(klight->area.axisv[0],
619                                                    klight->area.axisv[1],
620                                                    klight->area.axisv[2]);
621                         float3 D = make_float3(klight->area.dir[0],
622                                                klight->area.dir[1],
623                                                klight->area.dir[2]);
624                         float invarea = fabsf(klight->area.invarea);
625                         bool is_round = (klight->area.invarea < 0.0f);
626
627                         if(dot(ls->P - P, D) > 0.0f) {
628                                 return false;
629                         }
630
631                         float3 inplane;
632
633                         if(is_round) {
634                                 inplane = ellipse_sample(axisu*0.5f, axisv*0.5f, randu, randv);
635                                 ls->P += inplane;
636                                 ls->pdf = invarea;
637                         }
638                         else {
639                                 inplane = ls->P;
640                                 ls->pdf = rect_light_sample(P, &ls->P,
641                                                             axisu, axisv,
642                                                             randu, randv,
643                                                             true);
644                                 inplane = ls->P - inplane;
645                         }
646
647                         ls->u = dot(inplane, axisu) * (1.0f / dot(axisu, axisu)) + 0.5f;
648                         ls->v = dot(inplane, axisv) * (1.0f / dot(axisv, axisv)) + 0.5f;
649
650                         ls->Ng = D;
651                         ls->D = normalize_len(ls->P - P, &ls->t);
652
653                         ls->eval_fac = 0.25f*invarea;
654                         if(is_round) {
655                                 ls->pdf *= lamp_light_pdf(kg, D, -ls->D, ls->t);
656                         }
657                 }
658         }
659
660         ls->pdf *= kernel_data.integrator.pdf_lights;
661
662         return (ls->pdf > 0.0f);
663 }
664
665 ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D, float t, LightSample *ls)
666 {
667         const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp);
668         LightType type = (LightType)klight->type;
669         ls->type = type;
670         ls->shader = klight->shader_id;
671         ls->object = PRIM_NONE;
672         ls->prim = PRIM_NONE;
673         ls->lamp = lamp;
674         /* todo: missing texture coordinates */
675         ls->u = 0.0f;
676         ls->v = 0.0f;
677
678         if(!(ls->shader & SHADER_USE_MIS))
679                 return false;
680
681         if(type == LIGHT_DISTANT) {
682                 /* distant light */
683                 float radius = klight->distant.radius;
684
685                 if(radius == 0.0f)
686                         return false;
687                 if(t != FLT_MAX)
688                         return false;
689
690                 /* a distant light is infinitely far away, but equivalent to a disk
691                  * shaped light exactly 1 unit away from the current shading point.
692                  *
693                  *     radius              t^2/cos(theta)
694                  *  <---------->           t = sqrt(1^2 + tan(theta)^2)
695                  *       tan(th)           area = radius*radius*pi
696                  *       <----->
697                  *        \    |           (1 + tan(theta)^2)/cos(theta)
698                  *         \   |           (1 + tan(acos(cos(theta)))^2)/cos(theta)
699                  *       t  \th| 1         simplifies to
700                  *           \-|           1/(cos(theta)^3)
701                  *            \|           magic!
702                  *             P
703                  */
704
705                 float3 lightD = make_float3(klight->co[0], klight->co[1], klight->co[2]);
706                 float costheta = dot(-lightD, D);
707                 float cosangle = klight->distant.cosangle;
708
709                 if(costheta < cosangle)
710                         return false;
711
712                 ls->P = -D;
713                 ls->Ng = -D;
714                 ls->D = D;
715                 ls->t = FLT_MAX;
716
717                 /* compute pdf */
718                 float invarea = klight->distant.invarea;
719                 ls->pdf = invarea/(costheta*costheta*costheta);
720                 ls->eval_fac = ls->pdf;
721         }
722         else if(type == LIGHT_POINT || type == LIGHT_SPOT) {
723                 float3 lightP = make_float3(klight->co[0], klight->co[1], klight->co[2]);
724
725                 float radius = klight->spot.radius;
726
727                 /* sphere light */
728                 if(radius == 0.0f)
729                         return false;
730
731                 if(!ray_aligned_disk_intersect(P, D, t,
732                                                lightP, radius, &ls->P, &ls->t))
733                 {
734                         return false;
735                 }
736
737                 ls->Ng = -D;
738                 ls->D = D;
739
740                 float invarea = klight->spot.invarea;
741                 ls->eval_fac = (0.25f*M_1_PI_F)*invarea;
742                 ls->pdf = invarea;
743
744                 if(type == LIGHT_SPOT) {
745                         /* spot light attenuation */
746                         float3 dir = make_float3(klight->spot.dir[0],
747                                                  klight->spot.dir[1],
748                                                  klight->spot.dir[2]);
749                         ls->eval_fac *= spot_light_attenuation(dir,
750                                                                klight->spot.spot_angle,
751                                                                klight->spot.spot_smooth,
752                                                                ls);
753
754                         if(ls->eval_fac == 0.0f)
755                                 return false;
756                 }
757                 float2 uv = map_to_sphere(ls->Ng);
758                 ls->u = uv.x;
759                 ls->v = uv.y;
760
761                 /* compute pdf */
762                 if(ls->t != FLT_MAX)
763                         ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t);
764         }
765         else if(type == LIGHT_AREA) {
766                 /* area light */
767                 float invarea = fabsf(klight->area.invarea);
768                 bool is_round = (klight->area.invarea < 0.0f);
769                 if(invarea == 0.0f)
770                         return false;
771
772                 float3 axisu = make_float3(klight->area.axisu[0],
773                                            klight->area.axisu[1],
774                                            klight->area.axisu[2]);
775                 float3 axisv = make_float3(klight->area.axisv[0],
776                                            klight->area.axisv[1],
777                                            klight->area.axisv[2]);
778                 float3 Ng = make_float3(klight->area.dir[0],
779                                         klight->area.dir[1],
780                                         klight->area.dir[2]);
781
782                 /* one sided */
783                 if(dot(D, Ng) >= 0.0f)
784                         return false;
785
786                 float3 light_P = make_float3(klight->co[0], klight->co[1], klight->co[2]);
787
788                 if(!ray_quad_intersect(P, D, 0.0f, t, light_P,
789                                        axisu, axisv, Ng,
790                                        &ls->P, &ls->t,
791                                        &ls->u, &ls->v,
792                                        is_round))
793                 {
794                         return false;
795                 }
796
797                 ls->D = D;
798                 ls->Ng = Ng;
799                 if(is_round) {
800                         ls->pdf = invarea * lamp_light_pdf(kg, Ng, -D, ls->t);
801                 }
802                 else {
803                         ls->pdf = rect_light_sample(P, &light_P, axisu, axisv, 0, 0, false);
804                 }
805                 ls->eval_fac = 0.25f*invarea;
806         }
807         else {
808                 return false;
809         }
810
811         ls->pdf *= kernel_data.integrator.pdf_lights;
812
813         return true;
814 }
815
816 /* Triangle Light */
817
818 /* returns true if the triangle is has motion blur or an instancing transform applied */
819 ccl_device_inline bool triangle_world_space_vertices(KernelGlobals *kg, int object, int prim, float time, float3 V[3])
820 {
821         bool has_motion = false;
822         const int object_flag = kernel_tex_fetch(__object_flag, object);
823
824         if(object_flag & SD_OBJECT_HAS_VERTEX_MOTION && time >= 0.0f) {
825                 motion_triangle_vertices(kg, object, prim, time, V);
826                 has_motion = true;
827         }
828         else {
829                 triangle_vertices(kg, prim, V);
830         }
831
832 #ifdef __INSTANCING__
833         if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
834 #  ifdef __OBJECT_MOTION__
835                 float object_time = (time >= 0.0f) ? time : 0.5f;
836                 Transform tfm = object_fetch_transform_motion_test(kg, object, object_time, NULL);
837 #  else
838                 Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
839 #  endif
840                 V[0] = transform_point(&tfm, V[0]);
841                 V[1] = transform_point(&tfm, V[1]);
842                 V[2] = transform_point(&tfm, V[2]);
843                 has_motion = true;
844         }
845 #endif
846         return has_motion;
847 }
848
849 ccl_device_inline float triangle_light_pdf_area(KernelGlobals *kg, const float3 Ng, const float3 I, float t)
850 {
851         float pdf = kernel_data.integrator.pdf_triangles;
852         float cos_pi = fabsf(dot(Ng, I));
853
854         if(cos_pi == 0.0f)
855                 return 0.0f;
856
857         return t*t*pdf/cos_pi;
858 }
859
860 ccl_device_forceinline float triangle_light_pdf(KernelGlobals *kg, ShaderData *sd, float t)
861 {
862         /* A naive heuristic to decide between costly solid angle sampling
863          * and simple area sampling, comparing the distance to the triangle plane
864          * to the length of the edges of the triangle. */
865
866         float3 V[3];
867         bool has_motion = triangle_world_space_vertices(kg, sd->object, sd->prim, sd->time, V);
868
869         const float3 e0 = V[1] - V[0];
870         const float3 e1 = V[2] - V[0];
871         const float3 e2 = V[2] - V[1];
872         const float longest_edge_squared = max(len_squared(e0), max(len_squared(e1), len_squared(e2)));
873         const float3 N = cross(e0, e1);
874         const float distance_to_plane = fabsf(dot(N, sd->I * t))/dot(N, N);
875
876         if(longest_edge_squared > distance_to_plane*distance_to_plane) {
877                 /* sd contains the point on the light source
878                  * calculate Px, the point that we're shading */
879                 const float3 Px = sd->P + sd->I * t;
880                 const float3 v0_p = V[0] - Px;
881                 const float3 v1_p = V[1] - Px;
882                 const float3 v2_p = V[2] - Px;
883
884                 const float3 u01 = safe_normalize(cross(v0_p, v1_p));
885                 const float3 u02 = safe_normalize(cross(v0_p, v2_p));
886                 const float3 u12 = safe_normalize(cross(v1_p, v2_p));
887
888                 const float alpha = fast_acosf(dot(u02, u01));
889                 const float beta = fast_acosf(-dot(u01, u12));
890                 const float gamma = fast_acosf(dot(u02, u12));
891                 const float solid_angle =  alpha + beta + gamma - M_PI_F;
892
893                 /* pdf_triangles is calculated over triangle area, but we're not sampling over its area */
894                 if(UNLIKELY(solid_angle == 0.0f)) {
895                         return 0.0f;
896                 }
897                 else {
898                         float area = 1.0f;
899                         if(has_motion) {
900                                 /* get the center frame vertices, this is what the PDF was calculated from */
901                                 triangle_world_space_vertices(kg, sd->object, sd->prim, -1.0f, V);
902                                 area = triangle_area(V[0], V[1], V[2]);
903                         }
904                         else {
905                                 area = 0.5f * len(N);
906                         }
907                         const float pdf = area * kernel_data.integrator.pdf_triangles;
908                         return pdf / solid_angle;
909                 }
910         }
911         else {
912                 float pdf = triangle_light_pdf_area(kg, sd->Ng, sd->I, t);
913                 if(has_motion) {
914                         const float     area = 0.5f * len(N);
915                         if(UNLIKELY(area == 0.0f)) {
916                                 return 0.0f;
917                         }
918                         /* scale the PDF.
919                          * area = the area the sample was taken from
920                          * area_pre = the are from which pdf_triangles was calculated from */
921                         triangle_world_space_vertices(kg, sd->object, sd->prim, -1.0f, V);
922                         const float area_pre = triangle_area(V[0], V[1], V[2]);
923                         pdf = pdf * area_pre / area;
924                 }
925                 return pdf;
926         }
927 }
928
929 ccl_device_forceinline void triangle_light_sample(KernelGlobals *kg, int prim, int object,
930         float randu, float randv, float time, LightSample *ls, const float3 P)
931 {
932         /* A naive heuristic to decide between costly solid angle sampling
933          * and simple area sampling, comparing the distance to the triangle plane
934          * to the length of the edges of the triangle. */
935
936         float3 V[3];
937         bool has_motion = triangle_world_space_vertices(kg, object, prim, time, V);
938
939         const float3 e0 = V[1] - V[0];
940         const float3 e1 = V[2] - V[0];
941         const float3 e2 = V[2] - V[1];
942         const float longest_edge_squared = max(len_squared(e0), max(len_squared(e1), len_squared(e2)));
943         const float3 N0 = cross(e0, e1);
944         float Nl = 0.0f;
945         ls->Ng = safe_normalize_len(N0, &Nl);
946         float area = 0.5f * Nl;
947
948         /* flip normal if necessary */
949         const int object_flag = kernel_tex_fetch(__object_flag, object);
950         if(object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
951                 ls->Ng = -ls->Ng;
952         }
953         ls->eval_fac = 1.0f;
954         ls->shader = kernel_tex_fetch(__tri_shader, prim);
955         ls->object = object;
956         ls->prim = prim;
957         ls->lamp = LAMP_NONE;
958         ls->shader |= SHADER_USE_MIS;
959         ls->type = LIGHT_TRIANGLE;
960
961         float distance_to_plane = fabsf(dot(N0, V[0] - P)/dot(N0, N0));
962
963         if(longest_edge_squared > distance_to_plane*distance_to_plane) {
964                 /* see James Arvo, "Stratified Sampling of Spherical Triangles"
965                  * http://www.graphics.cornell.edu/pubs/1995/Arv95c.pdf */
966
967                 /* project the triangle to the unit sphere
968                  * and calculate its edges and angles */
969                 const float3 v0_p = V[0] - P;
970                 const float3 v1_p = V[1] - P;
971                 const float3 v2_p = V[2] - P;
972
973                 const float3 u01 = safe_normalize(cross(v0_p, v1_p));
974                 const float3 u02 = safe_normalize(cross(v0_p, v2_p));
975                 const float3 u12 = safe_normalize(cross(v1_p, v2_p));
976
977                 const float3 A = safe_normalize(v0_p);
978                 const float3 B = safe_normalize(v1_p);
979                 const float3 C = safe_normalize(v2_p);
980
981                 const float cos_alpha = dot(u02, u01);
982                 const float cos_beta = -dot(u01, u12);
983                 const float cos_gamma = dot(u02, u12);
984
985                 /* calculate dihedral angles */
986                 const float alpha = fast_acosf(cos_alpha);
987                 const float beta = fast_acosf(cos_beta);
988                 const float gamma = fast_acosf(cos_gamma);
989                 /* the area of the unit spherical triangle = solid angle */
990                 const float solid_angle =  alpha + beta + gamma - M_PI_F;
991
992                 /* precompute a few things
993                  * these could be re-used to take several samples
994                  * as they are independent of randu/randv */
995                 const float cos_c = dot(A, B);
996                 const float sin_alpha = fast_sinf(alpha);
997                 const float product = sin_alpha * cos_c;
998
999                 /* Select a random sub-area of the spherical triangle
1000                  * and calculate the third vertex C_ of that new triangle */
1001                 const float phi = randu * solid_angle - alpha;
1002                 float s, t;
1003                 fast_sincosf(phi, &s, &t);
1004                 const float u = t - cos_alpha;
1005                 const float v = s + product;
1006
1007                 const float3 U = safe_normalize(C - dot(C, A) * A);
1008
1009                 float q = 1.0f;
1010                 const float det = ((v * s + u * t) * sin_alpha);
1011                 if(det != 0.0f) {
1012                         q = ((v * t - u * s) * cos_alpha - v) / det;
1013                 }
1014                 const float temp = max(1.0f - q*q, 0.0f);
1015
1016                 const float3 C_ = safe_normalize(q * A + sqrtf(temp) * U);
1017
1018                 /* Finally, select a random point along the edge of the new triangle
1019                  * That point on the spherical triangle is the sampled ray direction */
1020                 const float z = 1.0f - randv * (1.0f - dot(C_, B));
1021                 ls->D = z * B + safe_sqrtf(1.0f - z*z) * safe_normalize(C_ - dot(C_, B) * B);
1022
1023                 /* calculate intersection with the planar triangle */
1024                 if(!ray_triangle_intersect(P, ls->D, FLT_MAX,
1025 #if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
1026                                            (ssef*)V,
1027 #else
1028                                            V[0], V[1], V[2],
1029 #endif
1030                                            &ls->u, &ls->v, &ls->t)) {
1031                         ls->pdf = 0.0f;
1032                         return;
1033                 }
1034
1035                 ls->P = P + ls->D * ls->t;
1036
1037                 /* pdf_triangles is calculated over triangle area, but we're sampling over solid angle */
1038                 if(UNLIKELY(solid_angle == 0.0f)) {
1039                         ls->pdf = 0.0f;
1040                         return;
1041                 }
1042                 else {
1043                         if(has_motion) {
1044                                 /* get the center frame vertices, this is what the PDF was calculated from */
1045                                 triangle_world_space_vertices(kg, object, prim, -1.0f, V);
1046                                 area = triangle_area(V[0], V[1], V[2]);
1047                         }
1048                         const float pdf = area * kernel_data.integrator.pdf_triangles;
1049                         ls->pdf = pdf / solid_angle;
1050                 }
1051         }
1052         else {
1053                 /* compute random point in triangle */
1054                 randu = sqrtf(randu);
1055
1056                 const float u = 1.0f - randu;
1057                 const float v = randv*randu;
1058                 const float t = 1.0f - u - v;
1059                 ls->P = u * V[0] + v * V[1] + t * V[2];
1060                 /* compute incoming direction, distance and pdf */
1061                 ls->D = normalize_len(ls->P - P, &ls->t);
1062                 ls->pdf = triangle_light_pdf_area(kg, ls->Ng, -ls->D, ls->t);
1063                 if(has_motion && area != 0.0f) {
1064                         /* scale the PDF.
1065                          * area = the area the sample was taken from
1066                          * area_pre = the are from which pdf_triangles was calculated from */
1067                         triangle_world_space_vertices(kg, object, prim, -1.0f, V);
1068                         const float area_pre = triangle_area(V[0], V[1], V[2]);
1069                         ls->pdf = ls->pdf * area_pre / area;
1070                 }
1071                 ls->u = u;
1072                 ls->v = v;
1073         }
1074 }
1075
1076 /* Light Distribution */
1077
1078 ccl_device int light_distribution_sample(KernelGlobals *kg, float *randu)
1079 {
1080         /* This is basically std::upper_bound as used by pbrt, to find a point light or
1081          * triangle to emit from, proportional to area. a good improvement would be to
1082          * also sample proportional to power, though it's not so well defined with
1083          * arbitrary shaders. */
1084         int first = 0;
1085         int len = kernel_data.integrator.num_distribution + 1;
1086         float r = *randu;
1087
1088         while(len > 0) {
1089                 int half_len = len >> 1;
1090                 int middle = first + half_len;
1091
1092                 if(r < kernel_tex_fetch(__light_distribution, middle).totarea) {
1093                         len = half_len;
1094                 }
1095                 else {
1096                         first = middle + 1;
1097                         len = len - half_len - 1;
1098                 }
1099         }
1100
1101         /* Clamping should not be needed but float rounding errors seem to
1102          * make this fail on rare occasions. */
1103         int index = clamp(first-1, 0, kernel_data.integrator.num_distribution-1);
1104
1105         /* Rescale to reuse random number. this helps the 2D samples within
1106          * each area light be stratified as well. */
1107         float distr_min = kernel_tex_fetch(__light_distribution, index).totarea;
1108         float distr_max = kernel_tex_fetch(__light_distribution, index+1).totarea;
1109         *randu = (r - distr_min)/(distr_max - distr_min);
1110
1111         return index;
1112 }
1113
1114 /* Generic Light */
1115
1116 ccl_device bool light_select_reached_max_bounces(KernelGlobals *kg, int index, int bounce)
1117 {
1118         return (bounce > kernel_tex_fetch(__lights, index).max_bounces);
1119 }
1120
1121 ccl_device_noinline bool light_sample(KernelGlobals *kg,
1122                                       float randu,
1123                                       float randv,
1124                                       float time,
1125                                       float3 P,
1126                                       int bounce,
1127                                       LightSample *ls)
1128 {
1129         /* sample index */
1130         int index = light_distribution_sample(kg, &randu);
1131
1132         /* fetch light data */
1133         const ccl_global KernelLightDistribution *kdistribution = &kernel_tex_fetch(__light_distribution, index);
1134         int prim = kdistribution->prim;
1135
1136         if(prim >= 0) {
1137                 int object = kdistribution->mesh_light.object_id;
1138                 int shader_flag = kdistribution->mesh_light.shader_flag;
1139
1140                 triangle_light_sample(kg, prim, object, randu, randv, time, ls, P);
1141                 ls->shader |= shader_flag;
1142                 return (ls->pdf > 0.0f);
1143         }
1144         else {
1145                 int lamp = -prim-1;
1146
1147                 if(UNLIKELY(light_select_reached_max_bounces(kg, lamp, bounce))) {
1148                         return false;
1149                 }
1150
1151                 return lamp_light_sample(kg, lamp, randu, randv, P, ls);
1152         }
1153 }
1154
1155 ccl_device int light_select_num_samples(KernelGlobals *kg, int index)
1156 {
1157         return kernel_tex_fetch(__lights, index).samples;
1158 }
1159
1160 CCL_NAMESPACE_END