Merge branch 'master' into blender2.8
[blender.git] / intern / cycles / kernel / kernel_light.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 CCL_NAMESPACE_BEGIN
18
19 /* Light Sample result */
20
21 typedef struct LightSample {
22         float3 P;                       /* position on light, or direction for distant light */
23         float3 Ng;                      /* normal on light */
24         float3 D;                       /* direction from shading point to light */
25         float t;                        /* distance to light (FLT_MAX for distant light) */
26         float u, v;                     /* parametric coordinate on primitive */
27         float pdf;                      /* light sampling probability density function */
28         float eval_fac;         /* intensity multiplier */
29         int object;                     /* object id for triangle/curve lights */
30         int prim;                       /* primitive id for triangle/curve lights */
31         int shader;                     /* shader id */
32         int lamp;                       /* lamp id */
33         LightType type;         /* type of light */
34 } LightSample;
35
36 /* Area light sampling */
37
38 /* Uses the following paper:
39  *
40  * Carlos Urena et al.
41  * An Area-Preserving Parametrization for Spherical Rectangles.
42  *
43  * https://www.solidangle.com/research/egsr2013_spherical_rectangle.pdf
44  *
45  * Note: light_p is modified when sample_coord is true.
46  */
47 ccl_device_inline float rect_light_sample(float3 P,
48                                           float3 *light_p,
49                                           float3 axisu, float3 axisv,
50                                           float randu, float randv,
51                                           bool sample_coord)
52 {
53         /* In our name system we're using P for the center,
54          * which is o in the paper.
55          */
56
57         float3 corner = *light_p - axisu * 0.5f - axisv * 0.5f;
58         float axisu_len, axisv_len;
59         /* Compute local reference system R. */
60         float3 x = normalize_len(axisu, &axisu_len);
61         float3 y = normalize_len(axisv, &axisv_len);
62         float3 z = cross(x, y);
63         /* Compute rectangle coords in local reference system. */
64         float3 dir = corner - P;
65         float z0 = dot(dir, z);
66         /* Flip 'z' to make it point against Q. */
67         if(z0 > 0.0f) {
68                 z *= -1.0f;
69                 z0 *= -1.0f;
70         }
71         float x0 = dot(dir, x);
72         float y0 = dot(dir, y);
73         float x1 = x0 + axisu_len;
74         float y1 = y0 + axisv_len;
75         /* Create vectors to four vertices. */
76         float3 v00 = make_float3(x0, y0, z0);
77         float3 v01 = make_float3(x0, y1, z0);
78         float3 v10 = make_float3(x1, y0, z0);
79         float3 v11 = make_float3(x1, y1, z0);
80         /* Compute normals to edges. */
81         float3 n0 = normalize(cross(v00, v10));
82         float3 n1 = normalize(cross(v10, v11));
83         float3 n2 = normalize(cross(v11, v01));
84         float3 n3 = normalize(cross(v01, v00));
85         /* Compute internal angles (gamma_i). */
86         float g0 = safe_acosf(-dot(n0, n1));
87         float g1 = safe_acosf(-dot(n1, n2));
88         float g2 = safe_acosf(-dot(n2, n3));
89         float g3 = safe_acosf(-dot(n3, n0));
90         /* Compute predefined constants. */
91         float b0 = n0.z;
92         float b1 = n2.z;
93         float b0sq = b0 * b0;
94         float k = M_2PI_F - g2 - g3;
95         /* Compute solid angle from internal angles. */
96         float S = g0 + g1 - k;
97
98         if(sample_coord) {
99                 /* Compute cu. */
100                 float au = randu * S + k;
101                 float fu = (cosf(au) * b0 - b1) / sinf(au);
102                 float cu = 1.0f / sqrtf(fu * fu + b0sq) * (fu > 0.0f ? 1.0f : -1.0f);
103                 cu = clamp(cu, -1.0f, 1.0f);
104                 /* Compute xu. */
105                 float xu = -(cu * z0) / max(sqrtf(1.0f - cu * cu), 1e-7f);
106                 xu = clamp(xu, x0, x1);
107                 /* Compute yv. */
108                 float z0sq = z0 * z0;
109                 float y0sq = y0 * y0;
110                 float y1sq = y1 * y1;
111                 float d = sqrtf(xu * xu + z0sq);
112                 float h0 = y0 / sqrtf(d * d + y0sq);
113                 float h1 = y1 / sqrtf(d * d + y1sq);
114                 float hv = h0 + randv * (h1 - h0), hv2 = hv * hv;
115                 float yv = (hv2 < 1.0f - 1e-6f) ? (hv * d) / sqrtf(1.0f - hv2) : y1;
116
117                 /* Transform (xu, yv, z0) to world coords. */
118                 *light_p = P + xu * x + yv * y + z0 * z;
119         }
120
121         /* return pdf */
122         if(S != 0.0f)
123                 return 1.0f / S;
124         else
125                 return 0.0f;
126 }
127
128 ccl_device_inline float3 ellipse_sample(float3 ru, float3 rv, float randu, float randv)
129 {
130         to_unit_disk(&randu, &randv);
131         return ru*randu + rv*randv;
132 }
133
134 ccl_device float3 disk_light_sample(float3 v, float randu, float randv)
135 {
136         float3 ru, rv;
137
138         make_orthonormals(v, &ru, &rv);
139
140         return ellipse_sample(ru, rv, randu, randv);
141 }
142
143 ccl_device float3 distant_light_sample(float3 D, float radius, float randu, float randv)
144 {
145         return normalize(D + disk_light_sample(D, randu, randv)*radius);
146 }
147
148 ccl_device float3 sphere_light_sample(float3 P, float3 center, float radius, float randu, float randv)
149 {
150         return disk_light_sample(normalize(P - center), randu, randv)*radius;
151 }
152
153 ccl_device float spot_light_attenuation(float3 dir, float spot_angle, float spot_smooth, LightSample *ls)
154 {
155         float3 I = ls->Ng;
156
157         float attenuation = dot(dir, I);
158
159         if(attenuation <= spot_angle) {
160                 attenuation = 0.0f;
161         }
162         else {
163                 float t = attenuation - spot_angle;
164
165                 if(t < spot_smooth && spot_smooth != 0.0f)
166                         attenuation *= smoothstepf(t/spot_smooth);
167         }
168
169         return attenuation;
170 }
171
172 ccl_device float lamp_light_pdf(KernelGlobals *kg, const float3 Ng, const float3 I, float t)
173 {
174         float cos_pi = dot(Ng, I);
175
176         if(cos_pi <= 0.0f)
177                 return 0.0f;
178
179         return t*t/cos_pi;
180 }
181
182 /* Background Light */
183
184 #ifdef __BACKGROUND_MIS__
185
186 /* TODO(sergey): In theory it should be all fine to use noinline for all
187  * devices, but we're so close to the release so better not screw things
188  * up for CPU at least.
189  */
190 #ifdef __KERNEL_GPU__
191 ccl_device_noinline
192 #else
193 ccl_device
194 #endif
195 float3 background_map_sample(KernelGlobals *kg, float randu, float randv, float *pdf)
196 {
197         /* for the following, the CDF values are actually a pair of floats, with the
198          * function value as X and the actual CDF as Y.  The last entry's function
199          * value is the CDF total. */
200         int res = kernel_data.integrator.pdf_background_res;
201         int cdf_count = res + 1;
202
203         /* this is basically std::lower_bound as used by pbrt */
204         int first = 0;
205         int count = res;
206
207         while(count > 0) {
208                 int step = count >> 1;
209                 int middle = first + step;
210
211                 if(kernel_tex_fetch(__light_background_marginal_cdf, middle).y < randv) {
212                         first = middle + 1;
213                         count -= step + 1;
214                 }
215                 else
216                         count = step;
217         }
218
219         int index_v = max(0, first - 1);
220         kernel_assert(index_v >= 0 && index_v < res);
221
222         float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v);
223         float2 cdf_next_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v + 1);
224         float2 cdf_last_v = kernel_tex_fetch(__light_background_marginal_cdf, res);
225
226         /* importance-sampled V direction */
227         float dv = inverse_lerp(cdf_v.y, cdf_next_v.y, randv);
228         float v = (index_v + dv) / res;
229
230         /* this is basically std::lower_bound as used by pbrt */
231         first = 0;
232         count = res;
233         while(count > 0) {
234                 int step = count >> 1;
235                 int middle = first + step;
236
237                 if(kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_count + middle).y < randu) {
238                         first = middle + 1;
239                         count -= step + 1;
240                 }
241                 else
242                         count = step;
243         }
244
245         int index_u = max(0, first - 1);
246         kernel_assert(index_u >= 0 && index_u < res);
247
248         float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_count + index_u);
249         float2 cdf_next_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_count + index_u + 1);
250         float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * cdf_count + res);
251
252         /* importance-sampled U direction */
253         float du = inverse_lerp(cdf_u.y, cdf_next_u.y, randu);
254         float u = (index_u + du) / res;
255
256         /* compute pdf */
257         float denom = cdf_last_u.x * cdf_last_v.x;
258         float sin_theta = sinf(M_PI_F * v);
259
260         if(sin_theta == 0.0f || denom == 0.0f)
261                 *pdf = 0.0f;
262         else
263                 *pdf = (cdf_u.x * cdf_v.x)/(M_2PI_F * M_PI_F * sin_theta * denom);
264
265         /* compute direction */
266         return equirectangular_to_direction(u, v);
267 }
268
269 /* TODO(sergey): Same as above, after the release we should consider using
270  * 'noinline' for all devices.
271  */
272 #ifdef __KERNEL_GPU__
273 ccl_device_noinline
274 #else
275 ccl_device
276 #endif
277 float background_map_pdf(KernelGlobals *kg, float3 direction)
278 {
279         float2 uv = direction_to_equirectangular(direction);
280         int res = kernel_data.integrator.pdf_background_res;
281
282         float sin_theta = sinf(uv.y * M_PI_F);
283
284         if(sin_theta == 0.0f)
285                 return 0.0f;
286
287         int index_u = clamp(float_to_int(uv.x * res), 0, res - 1);
288         int index_v = clamp(float_to_int(uv.y * res), 0, res - 1);
289
290         /* pdfs in V direction */
291         float2 cdf_last_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * (res + 1) + res);
292         float2 cdf_last_v = kernel_tex_fetch(__light_background_marginal_cdf, res);
293
294         float denom = cdf_last_u.x * cdf_last_v.x;
295
296         if(denom == 0.0f)
297                 return 0.0f;
298
299         /* pdfs in U direction */
300         float2 cdf_u = kernel_tex_fetch(__light_background_conditional_cdf, index_v * (res + 1) + index_u);
301         float2 cdf_v = kernel_tex_fetch(__light_background_marginal_cdf, index_v);
302
303         return (cdf_u.x * cdf_v.x)/(M_2PI_F * M_PI_F * sin_theta * denom);
304 }
305
306 ccl_device_inline bool background_portal_data_fetch_and_check_side(KernelGlobals *kg,
307                                                                    float3 P,
308                                                                    int index,
309                                                                    float3 *lightpos,
310                                                                    float3 *dir)
311 {
312         int portal = kernel_data.integrator.portal_offset + index;
313         const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
314
315         *lightpos = make_float3(klight->co[0], klight->co[1], klight->co[2]);
316         *dir = make_float3(klight->area.dir[0], klight->area.dir[1], klight->area.dir[2]);
317
318         /* Check whether portal is on the right side. */
319         if(dot(*dir, P - *lightpos) > 1e-4f)
320                 return true;
321
322         return false;
323 }
324
325 ccl_device_inline float background_portal_pdf(KernelGlobals *kg,
326                                               float3 P,
327                                               float3 direction,
328                                               int ignore_portal,
329                                               bool *is_possible)
330 {
331         float portal_pdf = 0.0f;
332
333         int num_possible = 0;
334         for(int p = 0; p < kernel_data.integrator.num_portals; p++) {
335                 if(p == ignore_portal)
336                         continue;
337
338                 float3 lightpos, dir;
339                 if(!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
340                         continue;
341
342                 /* There's a portal that could be sampled from this position. */
343                 if(is_possible) {
344                         *is_possible = true;
345                 }
346                 num_possible++;
347
348                 int portal = kernel_data.integrator.portal_offset + p;
349                 const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
350                 float3 axisu = make_float3(klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
351                 float3 axisv = make_float3(klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
352                 bool is_round = (klight->area.invarea < 0.0f);
353
354                 if(!ray_quad_intersect(P, direction, 1e-4f, FLT_MAX, lightpos, axisu, axisv, dir, NULL, NULL, NULL, NULL, is_round))
355                         continue;
356
357                 if(is_round) {
358                         float t;
359                         float3 D = normalize_len(lightpos - P, &t);
360                         portal_pdf += fabsf(klight->area.invarea) * lamp_light_pdf(kg, dir, -D, t);
361                 }
362                 else {
363                         portal_pdf += rect_light_sample(P, &lightpos, axisu, axisv, 0.0f, 0.0f, false);
364                 }
365         }
366
367         if(ignore_portal >= 0) {
368                 /* We have skipped a portal that could be sampled as well. */
369                 num_possible++;
370         }
371
372         return (num_possible > 0)? portal_pdf / num_possible: 0.0f;
373 }
374
375 ccl_device int background_num_possible_portals(KernelGlobals *kg, float3 P)
376 {
377         int num_possible_portals = 0;
378         for(int p = 0; p < kernel_data.integrator.num_portals; p++) {
379                 float3 lightpos, dir;
380                 if(background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
381                         num_possible_portals++;
382         }
383         return num_possible_portals;
384 }
385
386 ccl_device float3 background_portal_sample(KernelGlobals *kg,
387                                            float3 P,
388                                            float randu,
389                                            float randv,
390                                            int num_possible,
391                                            int *sampled_portal,
392                                            float *pdf)
393 {
394         /* Pick a portal, then re-normalize randv. */
395         randv *= num_possible;
396         int portal = (int)randv;
397         randv -= portal;
398
399         /* TODO(sergey): Some smarter way of finding portal to sample
400          * is welcome.
401          */
402         for(int p = 0; p < kernel_data.integrator.num_portals; p++) {
403                 /* Search for the sampled portal. */
404                 float3 lightpos, dir;
405                 if(!background_portal_data_fetch_and_check_side(kg, P, p, &lightpos, &dir))
406                         continue;
407
408                 if(portal == 0) {
409                         /* p is the portal to be sampled. */
410                         int portal = kernel_data.integrator.portal_offset + p;
411                         const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, portal);
412                         float3 axisu = make_float3(klight->area.axisu[0], klight->area.axisu[1], klight->area.axisu[2]);
413                         float3 axisv = make_float3(klight->area.axisv[0], klight->area.axisv[1], klight->area.axisv[2]);
414                         bool is_round = (klight->area.invarea < 0.0f);
415
416                         float3 D;
417                         if(is_round) {
418                                 lightpos += ellipse_sample(axisu*0.5f, axisv*0.5f, randu, randv);
419                                 float t;
420                                 D = normalize_len(lightpos - P, &t);
421                                 *pdf = fabsf(klight->area.invarea) * lamp_light_pdf(kg, dir, -D, t);
422                         }
423                         else {
424                                 *pdf = rect_light_sample(P, &lightpos,
425                                                          axisu, axisv,
426                                                          randu, randv,
427                                                          true);
428                                 D = normalize(lightpos - P);
429                         }
430
431                         *pdf /= num_possible;
432                         *sampled_portal = p;
433                         return D;
434                 }
435
436                 portal--;
437         }
438
439         return make_float3(0.0f, 0.0f, 0.0f);
440 }
441
442 ccl_device_inline float3 background_light_sample(KernelGlobals *kg,
443                                                  float3 P,
444                                                  float randu, float randv,
445                                                  float *pdf)
446 {
447         /* Probability of sampling portals instead of the map. */
448         float portal_sampling_pdf = kernel_data.integrator.portal_pdf;
449
450         /* Check if there are portals in the scene which we can sample. */
451         if(portal_sampling_pdf > 0.0f) {
452                 int num_portals = background_num_possible_portals(kg, P);
453                 if(num_portals > 0) {
454                         if(portal_sampling_pdf == 1.0f || randu < portal_sampling_pdf) {
455                                 if(portal_sampling_pdf < 1.0f) {
456                                         randu /= portal_sampling_pdf;
457                                 }
458                                 int portal;
459                                 float3 D = background_portal_sample(kg, P, randu, randv, num_portals, &portal, pdf);
460                                 if(num_portals > 1) {
461                                         /* Ignore the chosen portal, its pdf is already included. */
462                                         *pdf += background_portal_pdf(kg, P, D, portal, NULL);
463                                 }
464                                 /* We could also have sampled the map, so combine with MIS. */
465                                 if(portal_sampling_pdf < 1.0f) {
466                                         float cdf_pdf = background_map_pdf(kg, D);
467                                         *pdf = (portal_sampling_pdf * (*pdf)
468                                              + (1.0f - portal_sampling_pdf) * cdf_pdf);
469                                 }
470                                 return D;
471                         }
472                         else {
473                                 /* Sample map, but with nonzero portal_sampling_pdf for MIS. */
474                                 randu = (randu - portal_sampling_pdf) / (1.0f - portal_sampling_pdf);
475                         }
476                 }
477                 else {
478                         /* We can't sample a portal.
479                          * Check if we can sample the map instead.
480                          */
481                         if(portal_sampling_pdf == 1.0f) {
482                                 /* Use uniform as a fallback if we can't sample the map. */
483                                 *pdf = 1.0f / M_4PI_F;
484                                 return sample_uniform_sphere(randu, randv);
485                         }
486                         else {
487                                 portal_sampling_pdf = 0.0f;
488                         }
489                 }
490         }
491
492         float3 D = background_map_sample(kg, randu, randv, pdf);
493         /* Use MIS if portals could be sampled as well. */
494         if(portal_sampling_pdf > 0.0f) {
495                 float portal_pdf = background_portal_pdf(kg, P, D, -1, NULL);
496                 *pdf = (portal_sampling_pdf * portal_pdf
497                      + (1.0f - portal_sampling_pdf) * (*pdf));
498         }
499         return D;
500 }
501
502 ccl_device float background_light_pdf(KernelGlobals *kg, float3 P, float3 direction)
503 {
504         /* Probability of sampling portals instead of the map. */
505         float portal_sampling_pdf = kernel_data.integrator.portal_pdf;
506
507         float portal_pdf = 0.0f, map_pdf = 0.0f;
508         if(portal_sampling_pdf > 0.0f) {
509                 /* Evaluate PDF of sampling this direction by portal sampling. */
510                 bool is_possible = false;
511                 portal_pdf = background_portal_pdf(kg, P, direction, -1, &is_possible) * portal_sampling_pdf;
512                 if(!is_possible) {
513                         /* Portal sampling is not possible here because all portals point to the wrong side.
514                          * If map sampling is possible, it would be used instead, otherwise fallback sampling is used. */
515                         if(portal_sampling_pdf == 1.0f) {
516                                 return kernel_data.integrator.pdf_lights / M_4PI_F;
517                         }
518                         else {
519                                 /* Force map sampling. */
520                                 portal_sampling_pdf = 0.0f;
521                         }
522                 }
523         }
524         if(portal_sampling_pdf < 1.0f) {
525                 /* Evaluate PDF of sampling this direction by map sampling. */
526                 map_pdf = background_map_pdf(kg, direction) * (1.0f - portal_sampling_pdf);
527         }
528         return (portal_pdf + map_pdf) * kernel_data.integrator.pdf_lights;
529 }
530 #endif
531
532 /* Regular Light */
533
534 ccl_device_inline bool lamp_light_sample(KernelGlobals *kg,
535                                          int lamp,
536                                          float randu, float randv,
537                                          float3 P,
538                                          LightSample *ls)
539 {
540         const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp);
541         LightType type = (LightType)klight->type;
542         ls->type = type;
543         ls->shader = klight->shader_id;
544         ls->object = PRIM_NONE;
545         ls->prim = PRIM_NONE;
546         ls->lamp = lamp;
547         ls->u = randu;
548         ls->v = randv;
549
550         if(type == LIGHT_DISTANT) {
551                 /* distant light */
552                 float3 lightD = make_float3(klight->co[0], klight->co[1], klight->co[2]);
553                 float3 D = lightD;
554                 float radius = klight->distant.radius;
555                 float invarea = klight->distant.invarea;
556
557                 if(radius > 0.0f)
558                         D = distant_light_sample(D, radius, randu, randv);
559
560                 ls->P = D;
561                 ls->Ng = D;
562                 ls->D = -D;
563                 ls->t = FLT_MAX;
564
565                 float costheta = dot(lightD, D);
566                 ls->pdf = invarea/(costheta*costheta*costheta);
567                 ls->eval_fac = ls->pdf;
568         }
569 #ifdef __BACKGROUND_MIS__
570         else if(type == LIGHT_BACKGROUND) {
571                 /* infinite area light (e.g. light dome or env light) */
572                 float3 D = -background_light_sample(kg, P, randu, randv, &ls->pdf);
573
574                 ls->P = D;
575                 ls->Ng = D;
576                 ls->D = -D;
577                 ls->t = FLT_MAX;
578                 ls->eval_fac = 1.0f;
579         }
580 #endif
581         else {
582                 ls->P = make_float3(klight->co[0], klight->co[1], klight->co[2]);
583
584                 if(type == LIGHT_POINT || type == LIGHT_SPOT) {
585                         float radius = klight->spot.radius;
586
587                         if(radius > 0.0f)
588                                 /* sphere light */
589                                 ls->P += sphere_light_sample(P, ls->P, radius, randu, randv);
590
591                         ls->D = normalize_len(ls->P - P, &ls->t);
592                         ls->Ng = -ls->D;
593
594                         float invarea = klight->spot.invarea;
595                         ls->eval_fac = (0.25f*M_1_PI_F)*invarea;
596                         ls->pdf = invarea;
597
598                         if(type == LIGHT_SPOT) {
599                                 /* spot light attenuation */
600                                 float3 dir = make_float3(klight->spot.dir[0],
601                                          klight->spot.dir[1],
602                                                          klight->spot.dir[2]);
603                                 ls->eval_fac *= spot_light_attenuation(dir,
604                                                                        klight->spot.spot_angle,
605                                                                        klight->spot.spot_smooth,
606                                                                        ls);
607                                 if(ls->eval_fac == 0.0f) {
608                                         return false;
609                                 }
610                         }
611                         float2 uv = map_to_sphere(ls->Ng);
612                         ls->u = uv.x;
613                         ls->v = uv.y;
614
615                         ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t);
616                 }
617                 else {
618                         /* area light */
619                         float3 axisu = make_float3(klight->area.axisu[0],
620                                                    klight->area.axisu[1],
621                                                    klight->area.axisu[2]);
622                         float3 axisv = make_float3(klight->area.axisv[0],
623                                                    klight->area.axisv[1],
624                                                    klight->area.axisv[2]);
625                         float3 D = make_float3(klight->area.dir[0],
626                                                klight->area.dir[1],
627                                                klight->area.dir[2]);
628                         float invarea = fabsf(klight->area.invarea);
629                         bool is_round = (klight->area.invarea < 0.0f);
630
631                         if(dot(ls->P - P, D) > 0.0f) {
632                                 return false;
633                         }
634
635                         float3 inplane;
636
637                         if(is_round) {
638                                 inplane = ellipse_sample(axisu*0.5f, axisv*0.5f, randu, randv);
639                                 ls->P += inplane;
640                                 ls->pdf = invarea;
641                         }
642                         else {
643                                 inplane = ls->P;
644                                 ls->pdf = rect_light_sample(P, &ls->P,
645                                                             axisu, axisv,
646                                                             randu, randv,
647                                                             true);
648                                 inplane = ls->P - inplane;
649                         }
650
651                         ls->u = dot(inplane, axisu) * (1.0f / dot(axisu, axisu)) + 0.5f;
652                         ls->v = dot(inplane, axisv) * (1.0f / dot(axisv, axisv)) + 0.5f;
653
654                         ls->Ng = D;
655                         ls->D = normalize_len(ls->P - P, &ls->t);
656
657                         ls->eval_fac = 0.25f*invarea;
658                         if(is_round) {
659                                 ls->pdf *= lamp_light_pdf(kg, D, -ls->D, ls->t);
660                         }
661                 }
662         }
663
664         ls->pdf *= kernel_data.integrator.pdf_lights;
665
666         return (ls->pdf > 0.0f);
667 }
668
669 ccl_device bool lamp_light_eval(KernelGlobals *kg, int lamp, float3 P, float3 D, float t, LightSample *ls)
670 {
671         const ccl_global KernelLight *klight = &kernel_tex_fetch(__lights, lamp);
672         LightType type = (LightType)klight->type;
673         ls->type = type;
674         ls->shader = klight->shader_id;
675         ls->object = PRIM_NONE;
676         ls->prim = PRIM_NONE;
677         ls->lamp = lamp;
678         /* todo: missing texture coordinates */
679         ls->u = 0.0f;
680         ls->v = 0.0f;
681
682         if(!(ls->shader & SHADER_USE_MIS))
683                 return false;
684
685         if(type == LIGHT_DISTANT) {
686                 /* distant light */
687                 float radius = klight->distant.radius;
688
689                 if(radius == 0.0f)
690                         return false;
691                 if(t != FLT_MAX)
692                         return false;
693
694                 /* a distant light is infinitely far away, but equivalent to a disk
695                  * shaped light exactly 1 unit away from the current shading point.
696                  *
697                  *     radius              t^2/cos(theta)
698                  *  <---------->           t = sqrt(1^2 + tan(theta)^2)
699                  *       tan(th)           area = radius*radius*pi
700                  *       <----->
701                  *        \    |           (1 + tan(theta)^2)/cos(theta)
702                  *         \   |           (1 + tan(acos(cos(theta)))^2)/cos(theta)
703                  *       t  \th| 1         simplifies to
704                  *           \-|           1/(cos(theta)^3)
705                  *            \|           magic!
706                  *             P
707                  */
708
709                 float3 lightD = make_float3(klight->co[0], klight->co[1], klight->co[2]);
710                 float costheta = dot(-lightD, D);
711                 float cosangle = klight->distant.cosangle;
712
713                 if(costheta < cosangle)
714                         return false;
715
716                 ls->P = -D;
717                 ls->Ng = -D;
718                 ls->D = D;
719                 ls->t = FLT_MAX;
720
721                 /* compute pdf */
722                 float invarea = klight->distant.invarea;
723                 ls->pdf = invarea/(costheta*costheta*costheta);
724                 ls->eval_fac = ls->pdf;
725         }
726         else if(type == LIGHT_POINT || type == LIGHT_SPOT) {
727                 float3 lightP = make_float3(klight->co[0], klight->co[1], klight->co[2]);
728
729                 float radius = klight->spot.radius;
730
731                 /* sphere light */
732                 if(radius == 0.0f)
733                         return false;
734
735                 if(!ray_aligned_disk_intersect(P, D, t,
736                                                lightP, radius, &ls->P, &ls->t))
737                 {
738                         return false;
739                 }
740
741                 ls->Ng = -D;
742                 ls->D = D;
743
744                 float invarea = klight->spot.invarea;
745                 ls->eval_fac = (0.25f*M_1_PI_F)*invarea;
746                 ls->pdf = invarea;
747
748                 if(type == LIGHT_SPOT) {
749                         /* spot light attenuation */
750                         float3 dir = make_float3(klight->spot.dir[0],
751                                                  klight->spot.dir[1],
752                                                  klight->spot.dir[2]);
753                         ls->eval_fac *= spot_light_attenuation(dir,
754                                                                klight->spot.spot_angle,
755                                                                klight->spot.spot_smooth,
756                                                                ls);
757
758                         if(ls->eval_fac == 0.0f)
759                                 return false;
760                 }
761                 float2 uv = map_to_sphere(ls->Ng);
762                 ls->u = uv.x;
763                 ls->v = uv.y;
764
765                 /* compute pdf */
766                 if(ls->t != FLT_MAX)
767                         ls->pdf *= lamp_light_pdf(kg, ls->Ng, -ls->D, ls->t);
768         }
769         else if(type == LIGHT_AREA) {
770                 /* area light */
771                 float invarea = fabsf(klight->area.invarea);
772                 bool is_round = (klight->area.invarea < 0.0f);
773                 if(invarea == 0.0f)
774                         return false;
775
776                 float3 axisu = make_float3(klight->area.axisu[0],
777                                            klight->area.axisu[1],
778                                            klight->area.axisu[2]);
779                 float3 axisv = make_float3(klight->area.axisv[0],
780                                            klight->area.axisv[1],
781                                            klight->area.axisv[2]);
782                 float3 Ng = make_float3(klight->area.dir[0],
783                                         klight->area.dir[1],
784                                         klight->area.dir[2]);
785
786                 /* one sided */
787                 if(dot(D, Ng) >= 0.0f)
788                         return false;
789
790                 float3 light_P = make_float3(klight->co[0], klight->co[1], klight->co[2]);
791
792                 if(!ray_quad_intersect(P, D, 0.0f, t, light_P,
793                                        axisu, axisv, Ng,
794                                        &ls->P, &ls->t,
795                                        &ls->u, &ls->v,
796                                        is_round))
797                 {
798                         return false;
799                 }
800
801                 ls->D = D;
802                 ls->Ng = Ng;
803                 if(is_round) {
804                         ls->pdf = invarea * lamp_light_pdf(kg, Ng, -D, ls->t);
805                 }
806                 else {
807                         ls->pdf = rect_light_sample(P, &light_P, axisu, axisv, 0, 0, false);
808                 }
809                 ls->eval_fac = 0.25f*invarea;
810         }
811         else {
812                 return false;
813         }
814
815         ls->pdf *= kernel_data.integrator.pdf_lights;
816
817         return true;
818 }
819
820 /* Triangle Light */
821
822 /* returns true if the triangle is has motion blur or an instancing transform applied */
823 ccl_device_inline bool triangle_world_space_vertices(KernelGlobals *kg, int object, int prim, float time, float3 V[3])
824 {
825         bool has_motion = false;
826         const int object_flag = kernel_tex_fetch(__object_flag, object);
827
828         if(object_flag & SD_OBJECT_HAS_VERTEX_MOTION && time >= 0.0f) {
829                 motion_triangle_vertices(kg, object, prim, time, V);
830                 has_motion = true;
831         }
832         else {
833                 triangle_vertices(kg, prim, V);
834         }
835
836 #ifdef __INSTANCING__
837         if(!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
838 #  ifdef __OBJECT_MOTION__
839                 float object_time = (time >= 0.0f) ? time : 0.5f;
840                 Transform tfm = object_fetch_transform_motion_test(kg, object, object_time, NULL);
841 #  else
842                 Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
843 #  endif
844                 V[0] = transform_point(&tfm, V[0]);
845                 V[1] = transform_point(&tfm, V[1]);
846                 V[2] = transform_point(&tfm, V[2]);
847                 has_motion = true;
848         }
849 #endif
850         return has_motion;
851 }
852
853 ccl_device_inline float triangle_light_pdf_area(KernelGlobals *kg, const float3 Ng, const float3 I, float t)
854 {
855         float pdf = kernel_data.integrator.pdf_triangles;
856         float cos_pi = fabsf(dot(Ng, I));
857
858         if(cos_pi == 0.0f)
859                 return 0.0f;
860
861         return t*t*pdf/cos_pi;
862 }
863
864 ccl_device_forceinline float triangle_light_pdf(KernelGlobals *kg, ShaderData *sd, float t)
865 {
866         /* A naive heuristic to decide between costly solid angle sampling
867          * and simple area sampling, comparing the distance to the triangle plane
868          * to the length of the edges of the triangle. */
869
870         float3 V[3];
871         bool has_motion = triangle_world_space_vertices(kg, sd->object, sd->prim, sd->time, V);
872
873         const float3 e0 = V[1] - V[0];
874         const float3 e1 = V[2] - V[0];
875         const float3 e2 = V[2] - V[1];
876         const float longest_edge_squared = max(len_squared(e0), max(len_squared(e1), len_squared(e2)));
877         const float3 N = cross(e0, e1);
878         const float distance_to_plane = fabsf(dot(N, sd->I * t))/dot(N, N);
879
880         if(longest_edge_squared > distance_to_plane*distance_to_plane) {
881                 /* sd contains the point on the light source
882                  * calculate Px, the point that we're shading */
883                 const float3 Px = sd->P + sd->I * t;
884                 const float3 v0_p = V[0] - Px;
885                 const float3 v1_p = V[1] - Px;
886                 const float3 v2_p = V[2] - Px;
887
888                 const float3 u01 = safe_normalize(cross(v0_p, v1_p));
889                 const float3 u02 = safe_normalize(cross(v0_p, v2_p));
890                 const float3 u12 = safe_normalize(cross(v1_p, v2_p));
891
892                 const float alpha = fast_acosf(dot(u02, u01));
893                 const float beta = fast_acosf(-dot(u01, u12));
894                 const float gamma = fast_acosf(dot(u02, u12));
895                 const float solid_angle =  alpha + beta + gamma - M_PI_F;
896
897                 /* pdf_triangles is calculated over triangle area, but we're not sampling over its area */
898                 if(UNLIKELY(solid_angle == 0.0f)) {
899                         return 0.0f;
900                 }
901                 else {
902                         float area = 1.0f;
903                         if(has_motion) {
904                                 /* get the center frame vertices, this is what the PDF was calculated from */
905                                 triangle_world_space_vertices(kg, sd->object, sd->prim, -1.0f, V);
906                                 area = triangle_area(V[0], V[1], V[2]);
907                         }
908                         else {
909                                 area = 0.5f * len(N);
910                         }
911                         const float pdf = area * kernel_data.integrator.pdf_triangles;
912                         return pdf / solid_angle;
913                 }
914         }
915         else {
916                 float pdf = triangle_light_pdf_area(kg, sd->Ng, sd->I, t);
917                 if(has_motion) {
918                         const float     area = 0.5f * len(N);
919                         if(UNLIKELY(area == 0.0f)) {
920                                 return 0.0f;
921                         }
922                         /* scale the PDF.
923                          * area = the area the sample was taken from
924                          * area_pre = the are from which pdf_triangles was calculated from */
925                         triangle_world_space_vertices(kg, sd->object, sd->prim, -1.0f, V);
926                         const float area_pre = triangle_area(V[0], V[1], V[2]);
927                         pdf = pdf * area_pre / area;
928                 }
929                 return pdf;
930         }
931 }
932
933 ccl_device_forceinline void triangle_light_sample(KernelGlobals *kg, int prim, int object,
934         float randu, float randv, float time, LightSample *ls, const float3 P)
935 {
936         /* A naive heuristic to decide between costly solid angle sampling
937          * and simple area sampling, comparing the distance to the triangle plane
938          * to the length of the edges of the triangle. */
939
940         float3 V[3];
941         bool has_motion = triangle_world_space_vertices(kg, object, prim, time, V);
942
943         const float3 e0 = V[1] - V[0];
944         const float3 e1 = V[2] - V[0];
945         const float3 e2 = V[2] - V[1];
946         const float longest_edge_squared = max(len_squared(e0), max(len_squared(e1), len_squared(e2)));
947         const float3 N0 = cross(e0, e1);
948         float Nl = 0.0f;
949         ls->Ng = safe_normalize_len(N0, &Nl);
950         float area = 0.5f * Nl;
951
952         /* flip normal if necessary */
953         const int object_flag = kernel_tex_fetch(__object_flag, object);
954         if(object_flag & SD_OBJECT_NEGATIVE_SCALE_APPLIED) {
955                 ls->Ng = -ls->Ng;
956         }
957         ls->eval_fac = 1.0f;
958         ls->shader = kernel_tex_fetch(__tri_shader, prim);
959         ls->object = object;
960         ls->prim = prim;
961         ls->lamp = LAMP_NONE;
962         ls->shader |= SHADER_USE_MIS;
963         ls->type = LIGHT_TRIANGLE;
964
965         float distance_to_plane = fabsf(dot(N0, V[0] - P)/dot(N0, N0));
966
967         if(longest_edge_squared > distance_to_plane*distance_to_plane) {
968                 /* see James Arvo, "Stratified Sampling of Spherical Triangles"
969                  * http://www.graphics.cornell.edu/pubs/1995/Arv95c.pdf */
970
971                 /* project the triangle to the unit sphere
972                  * and calculate its edges and angles */
973                 const float3 v0_p = V[0] - P;
974                 const float3 v1_p = V[1] - P;
975                 const float3 v2_p = V[2] - P;
976
977                 const float3 u01 = safe_normalize(cross(v0_p, v1_p));
978                 const float3 u02 = safe_normalize(cross(v0_p, v2_p));
979                 const float3 u12 = safe_normalize(cross(v1_p, v2_p));
980
981                 const float3 A = safe_normalize(v0_p);
982                 const float3 B = safe_normalize(v1_p);
983                 const float3 C = safe_normalize(v2_p);
984
985                 const float cos_alpha = dot(u02, u01);
986                 const float cos_beta = -dot(u01, u12);
987                 const float cos_gamma = dot(u02, u12);
988
989                 /* calculate dihedral angles */
990                 const float alpha = fast_acosf(cos_alpha);
991                 const float beta = fast_acosf(cos_beta);
992                 const float gamma = fast_acosf(cos_gamma);
993                 /* the area of the unit spherical triangle = solid angle */
994                 const float solid_angle =  alpha + beta + gamma - M_PI_F;
995
996                 /* precompute a few things
997                  * these could be re-used to take several samples
998                  * as they are independent of randu/randv */
999                 const float cos_c = dot(A, B);
1000                 const float sin_alpha = fast_sinf(alpha);
1001                 const float product = sin_alpha * cos_c;
1002
1003                 /* Select a random sub-area of the spherical triangle
1004                  * and calculate the third vertex C_ of that new triangle */
1005                 const float phi = randu * solid_angle - alpha;
1006                 float s, t;
1007                 fast_sincosf(phi, &s, &t);
1008                 const float u = t - cos_alpha;
1009                 const float v = s + product;
1010
1011                 const float3 U = safe_normalize(C - dot(C, A) * A);
1012
1013                 float q = 1.0f;
1014                 const float det = ((v * s + u * t) * sin_alpha);
1015                 if(det != 0.0f) {
1016                         q = ((v * t - u * s) * cos_alpha - v) / det;
1017                 }
1018                 const float temp = max(1.0f - q*q, 0.0f);
1019
1020                 const float3 C_ = safe_normalize(q * A + sqrtf(temp) * U);
1021
1022                 /* Finally, select a random point along the edge of the new triangle
1023                  * That point on the spherical triangle is the sampled ray direction */
1024                 const float z = 1.0f - randv * (1.0f - dot(C_, B));
1025                 ls->D = z * B + safe_sqrtf(1.0f - z*z) * safe_normalize(C_ - dot(C_, B) * B);
1026
1027                 /* calculate intersection with the planar triangle */
1028                 if(!ray_triangle_intersect(P, ls->D, FLT_MAX,
1029 #if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__)
1030                                            (ssef*)V,
1031 #else
1032                                            V[0], V[1], V[2],
1033 #endif
1034                                            &ls->u, &ls->v, &ls->t)) {
1035                         ls->pdf = 0.0f;
1036                         return;
1037                 }
1038
1039                 ls->P = P + ls->D * ls->t;
1040
1041                 /* pdf_triangles is calculated over triangle area, but we're sampling over solid angle */
1042                 if(UNLIKELY(solid_angle == 0.0f)) {
1043                         ls->pdf = 0.0f;
1044                         return;
1045                 }
1046                 else {
1047                         if(has_motion) {
1048                                 /* get the center frame vertices, this is what the PDF was calculated from */
1049                                 triangle_world_space_vertices(kg, object, prim, -1.0f, V);
1050                                 area = triangle_area(V[0], V[1], V[2]);
1051                         }
1052                         const float pdf = area * kernel_data.integrator.pdf_triangles;
1053                         ls->pdf = pdf / solid_angle;
1054                 }
1055         }
1056         else {
1057                 /* compute random point in triangle */
1058                 randu = sqrtf(randu);
1059
1060                 const float u = 1.0f - randu;
1061                 const float v = randv*randu;
1062                 const float t = 1.0f - u - v;
1063                 ls->P = u * V[0] + v * V[1] + t * V[2];
1064                 /* compute incoming direction, distance and pdf */
1065                 ls->D = normalize_len(ls->P - P, &ls->t);
1066                 ls->pdf = triangle_light_pdf_area(kg, ls->Ng, -ls->D, ls->t);
1067                 if(has_motion && area != 0.0f) {
1068                         /* scale the PDF.
1069                          * area = the area the sample was taken from
1070                          * area_pre = the are from which pdf_triangles was calculated from */
1071                         triangle_world_space_vertices(kg, object, prim, -1.0f, V);
1072                         const float area_pre = triangle_area(V[0], V[1], V[2]);
1073                         ls->pdf = ls->pdf * area_pre / area;
1074                 }
1075                 ls->u = u;
1076                 ls->v = v;
1077         }
1078 }
1079
1080 /* Light Distribution */
1081
1082 ccl_device int light_distribution_sample(KernelGlobals *kg, float *randu)
1083 {
1084         /* This is basically std::upper_bound as used by pbrt, to find a point light or
1085          * triangle to emit from, proportional to area. a good improvement would be to
1086          * also sample proportional to power, though it's not so well defined with
1087          * arbitrary shaders. */
1088         int first = 0;
1089         int len = kernel_data.integrator.num_distribution + 1;
1090         float r = *randu;
1091
1092         while(len > 0) {
1093                 int half_len = len >> 1;
1094                 int middle = first + half_len;
1095
1096                 if(r < kernel_tex_fetch(__light_distribution, middle).totarea) {
1097                         len = half_len;
1098                 }
1099                 else {
1100                         first = middle + 1;
1101                         len = len - half_len - 1;
1102                 }
1103         }
1104
1105         /* Clamping should not be needed but float rounding errors seem to
1106          * make this fail on rare occasions. */
1107         int index = clamp(first-1, 0, kernel_data.integrator.num_distribution-1);
1108
1109         /* Rescale to reuse random number. this helps the 2D samples within
1110          * each area light be stratified as well. */
1111         float distr_min = kernel_tex_fetch(__light_distribution, index).totarea;
1112         float distr_max = kernel_tex_fetch(__light_distribution, index+1).totarea;
1113         *randu = (r - distr_min)/(distr_max - distr_min);
1114
1115         return index;
1116 }
1117
1118 /* Generic Light */
1119
1120 ccl_device bool light_select_reached_max_bounces(KernelGlobals *kg, int index, int bounce)
1121 {
1122         return (bounce > kernel_tex_fetch(__lights, index).max_bounces);
1123 }
1124
1125 ccl_device_noinline bool light_sample(KernelGlobals *kg,
1126                                       float randu,
1127                                       float randv,
1128                                       float time,
1129                                       float3 P,
1130                                       int bounce,
1131                                       LightSample *ls)
1132 {
1133         /* sample index */
1134         int index = light_distribution_sample(kg, &randu);
1135
1136         /* fetch light data */
1137         const ccl_global KernelLightDistribution *kdistribution = &kernel_tex_fetch(__light_distribution, index);
1138         int prim = kdistribution->prim;
1139
1140         if(prim >= 0) {
1141                 int object = kdistribution->mesh_light.object_id;
1142                 int shader_flag = kdistribution->mesh_light.shader_flag;
1143
1144                 triangle_light_sample(kg, prim, object, randu, randv, time, ls, P);
1145                 ls->shader |= shader_flag;
1146                 return (ls->pdf > 0.0f);
1147         }
1148         else {
1149                 int lamp = -prim-1;
1150
1151                 if(UNLIKELY(light_select_reached_max_bounces(kg, lamp, bounce))) {
1152                         return false;
1153                 }
1154
1155                 return lamp_light_sample(kg, lamp, randu, randv, P, ls);
1156         }
1157 }
1158
1159 ccl_device int light_select_num_samples(KernelGlobals *kg, int index)
1160 {
1161         return kernel_tex_fetch(__lights, index).samples;
1162 }
1163
1164 CCL_NAMESPACE_END