4 * ***** BEGIN GPL LICENSE BLOCK *****
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software Foundation,
18 * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 * The Original Code is Copyright (C) 1990-1998 NeoGeo BV.
21 * All rights reserved.
23 * Contributors: 2004/2005 Blender Foundation, full recode
25 * ***** END GPL LICENSE BLOCK *****
33 #include "MEM_guardedalloc.h"
35 #include "DNA_material_types.h"
36 #include "DNA_lamp_types.h"
38 #include "BKE_global.h"
40 #include "BKE_utildefines.h"
42 #include "BLI_arithb.h"
43 #include "BLI_blenlib.h"
44 #include "BLI_jitter.h"
49 #include "render_types.h"
50 #include "renderpipeline.h"
51 #include "rendercore.h"
52 #include "renderdatabase.h"
53 #include "pixelblending.h"
54 #include "pixelshading.h"
58 #include "RE_raytrace.h"
63 #define DEPTH_SHADOW_TRA 10
65 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
66 /* defined in pipeline.c, is hardcopy of active dynamic allocated Render */
67 /* only to be used here in this file, it's for speed */
68 extern struct Render R;
69 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */
71 static void vlr_face_coords(RayFace *face, float **v1, float **v2, float **v3, float **v4)
73 VlakRen *vlr= (VlakRen*)face;
75 *v1 = (vlr->v1)? vlr->v1->co: NULL;
76 *v2 = (vlr->v2)? vlr->v2->co: NULL;
77 *v3 = (vlr->v3)? vlr->v3->co: NULL;
78 *v4 = (vlr->v4)? vlr->v4->co: NULL;
81 static int vlr_check_intersect(Isect *is, int ob, RayFace *face)
83 ObjectInstanceRen *obi= RAY_OBJECT_GET((Render*)is->userdata, ob);
84 VlakRen *vlr = (VlakRen*)face;
86 /* for baking selected to active non-traceable materials might still
87 * be in the raytree */
88 if(!(vlr->mat->mode & MA_TRACEBLE))
91 /* I know... cpu cycle waste, might do smarter once */
92 if(is->mode==RE_RAY_MIRROR)
93 return !(vlr->mat->mode & MA_ONLYCAST);
95 return (is->lay & obi->lay);
98 static float *vlr_get_transform(void *userdata, int i)
100 ObjectInstanceRen *obi= RAY_OBJECT_GET((Render*)userdata, i);
102 return (obi->flag & R_TRANSFORMED)? (float*)obi->mat: NULL;
105 void freeraytree(Render *re)
108 RE_ray_tree_free(re->raytree);
113 void makeraytree(Render *re)
115 ObjectInstanceRen *obi;
118 float min[3], max[3], co1[3], co2[3], co3[3], co4[3];
119 double lasttime= PIL_check_seconds_timer();
120 int v, totv = 0, totface = 0;
122 INIT_MINMAX(min, max);
124 /* first min max raytree space */
125 for(obi=re->instancetable.first; obi; obi=obi->next) {
128 if(re->excludeob && obr->ob == re->excludeob)
131 for(v=0;v<obr->totvlak;v++) {
132 if((v & 255)==0) vlr= obr->vlaknodes[v>>8].vlak;
134 /* baking selected to active needs non-traceable too */
135 if((re->flag & R_BAKE_TRACE) || (vlr->mat->mode & MA_TRACEBLE)) {
136 if((vlr->mat->mode & MA_WIRE)==0) {
137 VECCOPY(co1, vlr->v1->co);
138 VECCOPY(co2, vlr->v2->co);
139 VECCOPY(co3, vlr->v3->co);
141 if(obi->flag & R_TRANSFORMED) {
142 Mat4MulVecfl(obi->mat, co1);
143 Mat4MulVecfl(obi->mat, co2);
144 Mat4MulVecfl(obi->mat, co3);
147 DO_MINMAX(co1, min, max);
148 DO_MINMAX(co2, min, max);
149 DO_MINMAX(co3, min, max);
152 VECCOPY(co4, vlr->v4->co);
153 if(obi->flag & R_TRANSFORMED)
154 Mat4MulVecfl(obi->mat, co4);
155 DO_MINMAX(co4, min, max);
164 re->raytree= RE_ray_tree_create(re->r.ocres, totface, min, max,
165 vlr_face_coords, vlr_check_intersect, vlr_get_transform, re);
167 if(min[0] > max[0]) { /* empty raytree */
168 RE_ray_tree_done(re->raytree);
172 for(obi=re->instancetable.first; obi; obi=obi->next) {
175 if(re->excludeob && obr->ob == re->excludeob)
178 for(v=0; v<obr->totvlak; v++, totv++) {
180 double time= PIL_check_seconds_timer();
182 vlr= obr->vlaknodes[v>>8].vlak;
185 if(time-lasttime>1.0f) {
187 sprintf(str, "Filling Octree: %d", totv);
189 re->stats_draw(&re->i);
196 if((re->flag & R_BAKE_TRACE) || (vlr->mat->mode & MA_TRACEBLE))
197 if((vlr->mat->mode & MA_WIRE)==0)
198 RE_ray_tree_add_face(re->raytree, RAY_OBJECT_SET(re, obi), vlr);
202 RE_ray_tree_done(re->raytree);
205 re->stats_draw(&re->i);
208 void shade_ray(Isect *is, ShadeInput *shi, ShadeResult *shr)
210 VlakRen *vlr= (VlakRen*)is->face;
211 ObjectInstanceRen *obi= RAY_OBJECT_GET(&R, is->ob);
214 /* set up view vector */
215 VECCOPY(shi->view, is->vec);
218 shi->co[0]= is->start[0]+is->labda*(shi->view[0]);
219 shi->co[1]= is->start[1]+is->labda*(shi->view[1]);
220 shi->co[2]= is->start[2]+is->labda*(shi->view[2]);
222 Normalize(shi->view);
228 memcpy(&shi->r, &shi->mat->r, 23*sizeof(float)); // note, keep this synced with render_types.h
229 shi->har= shi->mat->har;
231 // Osa structs we leave unchanged now
232 SWAP(int, osatex, shi->osatex);
234 shi->dxco[0]= shi->dxco[1]= shi->dxco[2]= 0.0f;
235 shi->dyco[0]= shi->dyco[1]= shi->dyco[2]= 0.0f;
237 // but, set Osa stuff to zero where it can confuse texture code
238 if(shi->mat->texco & (TEXCO_NORM|TEXCO_REFL) ) {
239 shi->dxno[0]= shi->dxno[1]= shi->dxno[2]= 0.0f;
240 shi->dyno[0]= shi->dyno[1]= shi->dyno[2]= 0.0f;
245 shade_input_set_triangle_i(shi, obi, vlr, 2, 1, 3);
247 shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 3);
250 shade_input_set_triangle_i(shi, obi, vlr, 0, 1, 2);
255 shi->dx_u= shi->dx_v= shi->dy_u= shi->dy_v= 0.0f;
257 shade_input_set_normals(shi);
259 /* point normals to viewing direction */
260 if(INPR(shi->facenor, shi->view) < 0.0f)
261 shade_input_flip_normals(shi);
263 shade_input_set_shade_texco(shi);
265 if(is->mode==RE_RAY_SHADOW_TRA)
266 if(shi->mat->nodetree && shi->mat->use_nodes) {
267 ntreeShaderExecTree(shi->mat->nodetree, shi, shr);
268 shi->mat= vlr->mat; /* shi->mat is being set in nodetree */
271 shade_color(shi, shr);
273 if(shi->mat->nodetree && shi->mat->use_nodes) {
274 ntreeShaderExecTree(shi->mat->nodetree, shi, shr);
275 shi->mat= vlr->mat; /* shi->mat is being set in nodetree */
278 if (shi->mat->material_type == MA_SOLID) shade_material_loop(shi, shr);
279 else if (shi->mat->material_type == MA_VOLUME) shade_volume_loop(shi, shr);
281 /* raytrace likes to separate the spec color */
282 VECSUB(shr->diff, shr->combined, shr->spec);
285 SWAP(int, osatex, shi->osatex); // XXXXX!!!!
289 static int refraction(float *refract, float *n, float *view, float index)
293 VECCOPY(refract, view);
295 dot= view[0]*n[0] + view[1]*n[1] + view[2]*n[2];
299 fac= 1.0f - (1.0f - dot*dot)*index*index;
300 if(fac<= 0.0f) return 0;
301 fac= -dot*index + sqrt(fac);
304 fac= 1.0f - (1.0f - dot*dot)*index*index;
305 if(fac<= 0.0f) return 0;
306 fac= -dot*index - sqrt(fac);
309 refract[0]= index*view[0] + fac*n[0];
310 refract[1]= index*view[1] + fac*n[1];
311 refract[2]= index*view[2] + fac*n[2];
316 /* orn = original face normal */
317 static void reflection(float *ref, float *n, float *view, float *orn)
321 f1= -2.0f*(n[0]*view[0]+ n[1]*view[1]+ n[2]*view[2]);
323 ref[0]= (view[0]+f1*n[0]);
324 ref[1]= (view[1]+f1*n[1]);
325 ref[2]= (view[2]+f1*n[2]);
328 /* test phong normals, then we should prevent vector going to the back */
329 f1= ref[0]*orn[0]+ ref[1]*orn[1]+ ref[2]*orn[2];
340 static void color_combine(float *result, float fac1, float fac2, float *col1, float *col2)
342 float col1t[3], col2t[3];
344 col1t[0]= sqrt(col1[0]);
345 col1t[1]= sqrt(col1[1]);
346 col1t[2]= sqrt(col1[2]);
347 col2t[0]= sqrt(col2[0]);
348 col2t[1]= sqrt(col2[1]);
349 col2t[2]= sqrt(col2[2]);
351 result[0]= (fac1*col1t[0] + fac2*col2t[0]);
352 result[0]*= result[0];
353 result[1]= (fac1*col1t[1] + fac2*col2t[1]);
354 result[1]*= result[1];
355 result[2]= (fac1*col1t[2] + fac2*col2t[2]);
356 result[2]*= result[2];
360 static float shade_by_transmission(Isect *is, ShadeInput *shi, ShadeResult *shr)
362 float dx, dy, dz, d, p;
364 if (0 == (shi->mat->mode & (MA_RAYTRANSP|MA_ZTRA)))
367 if (shi->mat->tx_limit <= 0.0f) {
371 /* shi.co[] calculated by shade_ray() */
372 dx= shi->co[0] - is->start[0];
373 dy= shi->co[1] - is->start[1];
374 dz= shi->co[2] - is->start[2];
375 d= sqrt(dx*dx+dy*dy+dz*dz);
376 if (d > shi->mat->tx_limit)
377 d= shi->mat->tx_limit;
379 p = shi->mat->tx_falloff;
380 if(p < 0.0f) p= 0.0f;
381 else if (p > 10.0f) p= 10.0f;
383 shr->alpha *= pow(d, p);
384 if (shr->alpha > 1.0f)
391 static void ray_fadeout_endcolor(float *col, ShadeInput *origshi, ShadeInput *shi, ShadeResult *shr, Isect *isec, float *vec)
393 /* un-intersected rays get either rendered material color or sky color */
394 if (origshi->mat->fadeto_mir == MA_RAYMIR_FADETOMAT) {
395 VECCOPY(col, shr->combined);
396 } else if (origshi->mat->fadeto_mir == MA_RAYMIR_FADETOSKY) {
397 VECCOPY(shi->view, vec);
398 Normalize(shi->view);
400 shadeSkyView(col, isec->start, shi->view, NULL);
401 shadeSunView(col, shi->view);
405 static void ray_fadeout(Isect *is, ShadeInput *shi, float *col, float *blendcol, float dist_mir)
407 /* if fading out, linear blend against fade color */
410 blendfac = 1.0 - VecLenf(shi->co, is->start)/dist_mir;
412 col[0] = col[0]*blendfac + (1.0 - blendfac)*blendcol[0];
413 col[1] = col[1]*blendfac + (1.0 - blendfac)*blendcol[1];
414 col[2] = col[2]*blendfac + (1.0 - blendfac)*blendcol[2];
417 /* the main recursive tracer itself */
418 static void traceray(ShadeInput *origshi, ShadeResult *origshr, short depth, float *start, float *vec, float *col, ObjectInstanceRen *obi, VlakRen *vlr, int traflag)
423 float f, f1, fr, fg, fb;
424 float ref[3], maxsize=RE_ray_tree_max_size(R.raytree);
425 float dist_mir = origshi->mat->dist_mir;
427 /* Warning, This is not that nice, and possibly a bit slow for every ray,
428 however some variables were not initialized properly in, unless using shade_input_initialize(...), we need to do a memset */
429 memset(&shi, 0, sizeof(ShadeInput));
430 /* end warning! - Campbell */
432 VECCOPY(isec.start, start);
433 if (dist_mir > 0.0) {
434 isec.end[0]= start[0]+dist_mir*vec[0];
435 isec.end[1]= start[1]+dist_mir*vec[1];
436 isec.end[2]= start[2]+dist_mir*vec[2];
438 isec.end[0]= start[0]+maxsize*vec[0];
439 isec.end[1]= start[1]+maxsize*vec[1];
440 isec.end[2]= start[2]+maxsize*vec[2];
442 isec.mode= RE_RAY_MIRROR;
443 isec.faceorig= (RayFace*)vlr;
444 isec.oborig= RAY_OBJECT_SET(&R, obi);
446 if(RE_ray_tree_intersect(R.raytree, &isec)) {
449 shi.mask= origshi->mask;
450 shi.osatex= origshi->osatex;
451 shi.depth= 1; /* only used to indicate tracing */
452 shi.thread= origshi->thread;
453 //shi.sample= 0; // memset above, so dont need this
456 shi.lay= origshi->lay;
457 shi.passflag= SCE_PASS_COMBINED; /* result of tracing needs no pass info */
458 shi.combinedflag= 0xFFFFFF; /* ray trace does all options */
459 //shi.do_preview= 0; // memset above, so dont need this
460 shi.light_override= origshi->light_override;
461 shi.mat_override= origshi->mat_override;
463 memset(&shr, 0, sizeof(ShadeResult));
465 shade_ray(&isec, &shi, &shr);
466 if (traflag & RAY_TRA)
467 d= shade_by_transmission(&isec, &shi, &shr);
471 if(shi.mat->mode_l & (MA_RAYTRANSP|MA_ZTRA) && shr.alpha < 1.0f) {
472 float nf, f, f1, refract[3], tracol[4];
477 tracol[3]= col[3]; // we pass on and accumulate alpha
479 if(shi.mat->mode & MA_RAYTRANSP) {
480 /* odd depths: use normal facing viewer, otherwise flip */
481 if(traflag & RAY_TRAFLIP) {
483 norm[0]= - shi.vn[0];
484 norm[1]= - shi.vn[1];
485 norm[2]= - shi.vn[2];
486 if (!refraction(refract, norm, shi.view, shi.ang))
487 reflection(refract, norm, shi.view, shi.vn);
490 if (!refraction(refract, shi.vn, shi.view, shi.ang))
491 reflection(refract, shi.vn, shi.view, shi.vn);
494 traceray(origshi, origshr, depth-1, shi.co, refract, tracol, shi.obi, shi.vlr, traflag ^ RAY_TRAFLIP);
497 traceray(origshi, origshr, depth-1, shi.co, shi.view, tracol, shi.obi, shi.vlr, 0);
499 f= shr.alpha; f1= 1.0f-f;
500 nf= d * shi.mat->filter;
501 fr= 1.0f+ nf*(shi.r-1.0f);
502 fg= 1.0f+ nf*(shi.g-1.0f);
503 fb= 1.0f+ nf*(shi.b-1.0f);
504 shr.diff[0]= f*shr.diff[0] + f1*fr*tracol[0];
505 shr.diff[1]= f*shr.diff[1] + f1*fg*tracol[1];
506 shr.diff[2]= f*shr.diff[2] + f1*fb*tracol[2];
512 col[3]= f1*tracol[3] + f;
517 if(shi.mat->mode_l & MA_RAYMIRROR) {
519 if(f!=0.0f) f*= fresnel_fac(shi.view, shi.vn, shi.mat->fresnel_mir_i, shi.mat->fresnel_mir);
526 reflection(ref, shi.vn, shi.view, NULL);
527 traceray(origshi, origshr, depth-1, shi.co, ref, mircol, shi.obi, shi.vlr, 0);
532 //color_combine(col, f*fr*(1.0f-shr.spec[0]), f1, col, shr.diff);
533 //col[0]+= shr.spec[0];
534 //col[1]+= shr.spec[1];
535 //col[2]+= shr.spec[2];
541 col[0]= f*fr*(1.0f-shr.spec[0])*mircol[0] + f1*shr.diff[0] + shr.spec[0];
542 col[1]= f*fg*(1.0f-shr.spec[1])*mircol[1] + f1*shr.diff[1] + shr.spec[1];
543 col[2]= f*fb*(1.0f-shr.spec[2])*mircol[2] + f1*shr.diff[2] + shr.spec[2];
546 col[0]= shr.diff[0] + shr.spec[0];
547 col[1]= shr.diff[1] + shr.spec[1];
548 col[2]= shr.diff[2] + shr.spec[2];
551 if (dist_mir > 0.0) {
554 /* max ray distance set, but found an intersection, so fade this color
555 * out towards the sky/material color for a smooth transition */
556 ray_fadeout_endcolor(blendcol, origshi, &shi, origshr, &isec, vec);
557 ray_fadeout(&isec, &shi, col, blendcol, dist_mir);
561 col[0]= shr.diff[0] + shr.spec[0];
562 col[1]= shr.diff[1] + shr.spec[1];
563 col[2]= shr.diff[2] + shr.spec[2];
568 ray_fadeout_endcolor(col, origshi, &shi, origshr, &isec, vec);
572 /* **************** jitter blocks ********** */
574 /* calc distributed planar energy */
576 static void DP_energy(float *table, float *vec, int tot, float xsize, float ysize)
579 float *fp, force[3], result[3];
580 float dx, dy, dist, min;
582 min= MIN2(xsize, ysize);
584 result[0]= result[1]= 0.0f;
586 for(y= -1; y<2; y++) {
588 for(x= -1; x<2; x++) {
591 for(a=0; a<tot; a++, fp+= 2) {
592 force[0]= vec[0] - fp[0]-dx;
593 force[1]= vec[1] - fp[1]-dy;
594 dist= force[0]*force[0] + force[1]*force[1];
595 if(dist < min && dist>0.0f) {
596 result[0]+= force[0]/dist;
597 result[1]+= force[1]/dist;
602 vec[0] += 0.1*min*result[0]/(float)tot;
603 vec[1] += 0.1*min*result[1]/(float)tot;
605 vec[0]= vec[0] - xsize*floor(vec[0]/xsize + 0.5);
606 vec[1]= vec[1] - ysize*floor(vec[1]/ysize + 0.5);
609 // random offset of 1 in 2
610 static void jitter_plane_offset(float *jitter1, float *jitter2, int tot, float sizex, float sizey, float ofsx, float ofsy)
612 float dsizex= sizex*ofsx;
613 float dsizey= sizey*ofsy;
614 float hsizex= 0.5*sizex, hsizey= 0.5*sizey;
617 for(x=tot; x>0; x--, jitter1+=2, jitter2+=2) {
618 jitter2[0]= jitter1[0] + dsizex;
619 jitter2[1]= jitter1[1] + dsizey;
620 if(jitter2[0] > hsizex) jitter2[0]-= sizex;
621 if(jitter2[1] > hsizey) jitter2[1]-= sizey;
625 /* called from convertBlenderScene.c */
626 /* we do this in advance to get consistant random, not alter the render seed, and be threadsafe */
627 void init_jitter_plane(LampRen *lar)
630 int x, iter=12, tot= lar->ray_totsamp;
632 /* test if already initialized */
633 if(lar->jitter) return;
635 /* at least 4, or max threads+1 tables */
636 if(BLENDER_MAX_THREADS < 4) x= 4;
637 else x= BLENDER_MAX_THREADS+1;
638 fp= lar->jitter= MEM_callocN(x*tot*2*sizeof(float), "lamp jitter tab");
640 /* if 1 sample, we leave table to be zero's */
643 /* set per-lamp fixed seed */
646 /* fill table with random locations, area_size large */
647 for(x=0; x<tot; x++, fp+=2) {
648 fp[0]= (BLI_frand()-0.5)*lar->area_size;
649 fp[1]= (BLI_frand()-0.5)*lar->area_sizey;
654 for(x=tot; x>0; x--, fp+=2) {
655 DP_energy(lar->jitter, fp, tot, lar->area_size, lar->area_sizey);
659 /* create the dithered tables (could just check lamp type!) */
660 jitter_plane_offset(lar->jitter, lar->jitter+2*tot, tot, lar->area_size, lar->area_sizey, 0.5f, 0.0f);
661 jitter_plane_offset(lar->jitter, lar->jitter+4*tot, tot, lar->area_size, lar->area_sizey, 0.5f, 0.5f);
662 jitter_plane_offset(lar->jitter, lar->jitter+6*tot, tot, lar->area_size, lar->area_sizey, 0.0f, 0.5f);
665 /* table around origin, -0.5*size to 0.5*size */
666 static float *give_jitter_plane(LampRen *lar, int thread, int xs, int ys)
670 tot= lar->ray_totsamp;
672 if(lar->ray_samp_type & LA_SAMP_JITTER) {
673 /* made it threadsafe */
675 if(lar->xold[thread]!=xs || lar->yold[thread]!=ys) {
676 jitter_plane_offset(lar->jitter, lar->jitter+2*(thread+1)*tot, tot, lar->area_size, lar->area_sizey, BLI_thread_frand(thread), BLI_thread_frand(thread));
677 lar->xold[thread]= xs;
678 lar->yold[thread]= ys;
680 return lar->jitter+2*(thread+1)*tot;
682 if(lar->ray_samp_type & LA_SAMP_DITHER) {
683 return lar->jitter + 2*tot*((xs & 1)+2*(ys & 1));
690 /* **************** QMC sampling *************** */
692 static void halton_sample(double *ht_invprimes, double *ht_nums, double *v)
694 // incremental halton sequence generator, from:
695 // "Instant Radiosity", Keller A.
698 for (i = 0; i < 2; i++)
700 double r = fabs((1.0 - ht_nums[i]) - 1e-10);
702 if (ht_invprimes[i] >= r)
705 double h = ht_invprimes[i];
709 h *= ht_invprimes[i];
712 ht_nums[i] += ((lasth + h) - 1.0);
715 ht_nums[i] += ht_invprimes[i];
717 v[i] = (float)ht_nums[i];
721 /* Generate Hammersley points in [0,1)^2
722 * From Lucille renderer */
723 static void hammersley_create(double *out, int n)
728 for (k = 0; k < n; k++) {
730 for (p = 0.5, kk = k; kk; p *= 0.5, kk >>= 1) {
731 if (kk & 1) { /* kk mod 2 = 1 */
736 out[2 * k + 0] = (double)k / (double)n;
741 static struct QMCSampler *QMC_initSampler(int type, int tot)
743 QMCSampler *qsa = MEM_callocN(sizeof(QMCSampler), "qmc sampler");
744 qsa->samp2d = MEM_callocN(2*sizeof(double)*tot, "qmc sample table");
749 if (qsa->type==SAMP_TYPE_HAMMERSLEY)
750 hammersley_create(qsa->samp2d, qsa->tot);
755 static void QMC_initPixel(QMCSampler *qsa, int thread)
757 if (qsa->type==SAMP_TYPE_HAMMERSLEY)
759 /* hammersley sequence is fixed, already created in QMCSampler init.
760 * per pixel, gets a random offset. We create separate offsets per thread, for write-safety */
761 qsa->offs[thread][0] = 0.5 * BLI_thread_frand(thread);
762 qsa->offs[thread][1] = 0.5 * BLI_thread_frand(thread);
764 else { /* SAMP_TYPE_HALTON */
766 /* generate a new randomised halton sequence per pixel
767 * to alleviate qmc artifacts and make it reproducable
768 * between threads/frames */
769 double ht_invprimes[2], ht_nums[2];
773 ht_nums[0] = BLI_thread_frand(thread);
774 ht_nums[1] = BLI_thread_frand(thread);
775 ht_invprimes[0] = 0.5;
776 ht_invprimes[1] = 1.0/3.0;
778 for (i=0; i< qsa->tot; i++) {
779 halton_sample(ht_invprimes, ht_nums, r);
780 qsa->samp2d[2*i+0] = r[0];
781 qsa->samp2d[2*i+1] = r[1];
786 static void QMC_freeSampler(QMCSampler *qsa)
788 MEM_freeN(qsa->samp2d);
792 static void QMC_getSample(double *s, QMCSampler *qsa, int thread, int num)
794 if (qsa->type == SAMP_TYPE_HAMMERSLEY) {
795 s[0] = fmod(qsa->samp2d[2*num+0] + qsa->offs[thread][0], 1.0f);
796 s[1] = fmod(qsa->samp2d[2*num+1] + qsa->offs[thread][1], 1.0f);
798 else { /* SAMP_TYPE_HALTON */
799 s[0] = qsa->samp2d[2*num+0];
800 s[1] = qsa->samp2d[2*num+1];
804 /* phong weighted disc using 'blur' for exponent, centred on 0,0 */
805 static void QMC_samplePhong(float *vec, QMCSampler *qsa, int thread, int num, float blur)
810 QMC_getSample(s, qsa, thread, num);
813 pz = pow(s[1], blur);
814 sqr = sqrt(1.0f-pz*pz);
816 vec[0] = cos(phi)*sqr;
817 vec[1] = sin(phi)*sqr;
821 /* rect of edge lengths sizex, sizey, centred on 0.0,0.0 i.e. ranging from -sizex/2 to +sizey/2 */
822 static void QMC_sampleRect(float *vec, QMCSampler *qsa, int thread, int num, float sizex, float sizey)
826 QMC_getSample(s, qsa, thread, num);
828 vec[0] = (s[0] - 0.5) * sizex;
829 vec[1] = (s[1] - 0.5) * sizey;
833 /* disc of radius 'radius', centred on 0,0 */
834 static void QMC_sampleDisc(float *vec, QMCSampler *qsa, int thread, int num, float radius)
839 QMC_getSample(s, qsa, thread, num);
844 vec[0] = cos(phi)*sqr* radius/2.0;
845 vec[1] = sin(phi)*sqr* radius/2.0;
849 /* uniform hemisphere sampling */
850 static void QMC_sampleHemi(float *vec, QMCSampler *qsa, int thread, int num)
855 QMC_getSample(s, qsa, thread, num);
860 vec[0] = cos(phi)*sqr;
861 vec[1] = sin(phi)*sqr;
862 vec[2] = 1.f - s[1]*s[1];
865 #if 0 /* currently not used */
866 /* cosine weighted hemisphere sampling */
867 static void QMC_sampleHemiCosine(float *vec, QMCSampler *qsa, int thread, int num)
872 QMC_getSample(s, qsa, thread, num);
875 sqr = s[1]*sqrt(2-s[1]*s[1]);
877 vec[0] = cos(phi)*sqr;
878 vec[1] = sin(phi)*sqr;
879 vec[2] = 1.f - s[1]*s[1];
884 /* called from convertBlenderScene.c */
885 void init_render_qmcsampler(Render *re)
887 re->qmcsamplers= MEM_callocN(sizeof(ListBase)*BLENDER_MAX_THREADS, "QMCListBase");
890 static QMCSampler *get_thread_qmcsampler(Render *re, int thread, int type, int tot)
894 /* create qmc samplers as needed, since recursion makes it hard to
895 * predict how many are needed */
897 for(qsa=re->qmcsamplers[thread].first; qsa; qsa=qsa->next) {
898 if(qsa->type == type && qsa->tot == tot && !qsa->used) {
904 qsa= QMC_initSampler(type, tot);
906 BLI_addtail(&re->qmcsamplers[thread], qsa);
911 static void release_thread_qmcsampler(Render *re, int thread, QMCSampler *qsa)
916 void free_render_qmcsampler(Render *re)
918 QMCSampler *qsa, *next;
921 if(re->qmcsamplers) {
922 for(a=0; a<BLENDER_MAX_THREADS; a++) {
923 for(qsa=re->qmcsamplers[a].first; qsa; qsa=next) {
925 QMC_freeSampler(qsa);
928 re->qmcsamplers[a].first= re->qmcsamplers[a].last= NULL;
931 MEM_freeN(re->qmcsamplers);
932 re->qmcsamplers= NULL;
936 static int adaptive_sample_variance(int samples, float *col, float *colsq, float thresh)
938 float var[3], mean[3];
940 /* scale threshold just to give a bit more precision in input rather than dealing with
941 * tiny tiny numbers in the UI */
944 mean[0] = col[0] / (float)samples;
945 mean[1] = col[1] / (float)samples;
946 mean[2] = col[2] / (float)samples;
948 var[0] = (colsq[0] / (float)samples) - (mean[0]*mean[0]);
949 var[1] = (colsq[1] / (float)samples) - (mean[1]*mean[1]);
950 var[2] = (colsq[2] / (float)samples) - (mean[2]*mean[2]);
952 if ((var[0] * 0.4 < thresh) && (var[1] * 0.3 < thresh) && (var[2] * 0.6 < thresh))
958 static int adaptive_sample_contrast_val(int samples, float prev, float val, float thresh)
960 /* if the last sample's contribution to the total value was below a small threshold
961 * (i.e. the samples taken are very similar), then taking more samples that are probably
962 * going to be the same is wasting effort */
963 if (fabs( prev/(float)(samples-1) - val/(float)samples ) < thresh) {
969 static float get_avg_speed(ShadeInput *shi)
971 float pre_x, pre_y, post_x, post_y, speedavg;
973 pre_x = (shi->winspeed[0] == PASS_VECTOR_MAX)?0.0:shi->winspeed[0];
974 pre_y = (shi->winspeed[1] == PASS_VECTOR_MAX)?0.0:shi->winspeed[1];
975 post_x = (shi->winspeed[2] == PASS_VECTOR_MAX)?0.0:shi->winspeed[2];
976 post_y = (shi->winspeed[3] == PASS_VECTOR_MAX)?0.0:shi->winspeed[3];
978 speedavg = (sqrt(pre_x*pre_x + pre_y*pre_y) + sqrt(post_x*post_x + post_y*post_y)) / 2.0;
983 /* ***************** main calls ************** */
986 static void trace_refract(float *col, ShadeInput *shi, ShadeResult *shr)
988 QMCSampler *qsa=NULL;
991 float samp3d[3], orthx[3], orthy[3];
992 float v_refract[3], v_refract_new[3];
993 float sampcol[4], colsq[4];
995 float blur = pow(1.0 - shi->mat->gloss_tra, 3);
996 short max_samples = shi->mat->samp_gloss_tra;
997 float adapt_thresh = shi->mat->adapt_thresh_tra;
1001 colsq[0] = colsq[1] = colsq[2] = 0.0;
1002 col[0] = col[1] = col[2] = 0.0;
1006 if (adapt_thresh != 0.0) samp_type = SAMP_TYPE_HALTON;
1007 else samp_type = SAMP_TYPE_HAMMERSLEY;
1009 /* all samples are generated per pixel */
1010 qsa = get_thread_qmcsampler(&R, shi->thread, samp_type, max_samples);
1011 QMC_initPixel(qsa, shi->thread);
1016 while (samples < max_samples) {
1017 refraction(v_refract, shi->vn, shi->view, shi->ang);
1019 if (max_samples > 1) {
1020 /* get a quasi-random vector from a phong-weighted disc */
1021 QMC_samplePhong(samp3d, qsa, shi->thread, samples, blur);
1023 VecOrthoBasisf(v_refract, orthx, orthy);
1024 VecMulf(orthx, samp3d[0]);
1025 VecMulf(orthy, samp3d[1]);
1027 /* and perturb the refraction vector in it */
1028 VecAddf(v_refract_new, v_refract, orthx);
1029 VecAddf(v_refract_new, v_refract_new, orthy);
1031 Normalize(v_refract_new);
1033 /* no blurriness, use the original normal */
1034 VECCOPY(v_refract_new, v_refract);
1037 traceray(shi, shr, shi->mat->ray_depth_tra, shi->co, v_refract_new, sampcol, shi->obi, shi->vlr, RAY_TRA|RAY_TRAFLIP);
1039 col[0] += sampcol[0];
1040 col[1] += sampcol[1];
1041 col[2] += sampcol[2];
1042 col[3] += sampcol[3];
1044 /* for variance calc */
1045 colsq[0] += sampcol[0]*sampcol[0];
1046 colsq[1] += sampcol[1]*sampcol[1];
1047 colsq[2] += sampcol[2]*sampcol[2];
1051 /* adaptive sampling */
1052 if (adapt_thresh < 1.0 && samples > max_samples/2)
1054 if (adaptive_sample_variance(samples, col, colsq, adapt_thresh))
1057 /* if the pixel so far is very dark, we can get away with less samples */
1058 if ( (col[0] + col[1] + col[2])/3.0/(float)samples < 0.01 )
1063 col[0] /= (float)samples;
1064 col[1] /= (float)samples;
1065 col[2] /= (float)samples;
1066 col[3] /= (float)samples;
1069 release_thread_qmcsampler(&R, shi->thread, qsa);
1072 static void trace_reflect(float *col, ShadeInput *shi, ShadeResult *shr, float fresnelfac)
1074 QMCSampler *qsa=NULL;
1077 float samp3d[3], orthx[3], orthy[3];
1078 float v_nor_new[3], v_reflect[3];
1079 float sampcol[4], colsq[4];
1081 float blur = pow(1.0 - shi->mat->gloss_mir, 3);
1082 short max_samples = shi->mat->samp_gloss_mir;
1083 float adapt_thresh = shi->mat->adapt_thresh_mir;
1084 float aniso = 1.0 - shi->mat->aniso_gloss_mir;
1088 col[0] = col[1] = col[2] = 0.0;
1089 colsq[0] = colsq[1] = colsq[2] = 0.0;
1092 if (adapt_thresh != 0.0) samp_type = SAMP_TYPE_HALTON;
1093 else samp_type = SAMP_TYPE_HAMMERSLEY;
1095 /* all samples are generated per pixel */
1096 qsa = get_thread_qmcsampler(&R, shi->thread, samp_type, max_samples);
1097 QMC_initPixel(qsa, shi->thread);
1101 while (samples < max_samples) {
1103 if (max_samples > 1) {
1104 /* get a quasi-random vector from a phong-weighted disc */
1105 QMC_samplePhong(samp3d, qsa, shi->thread, samples, blur);
1107 /* find the normal's perpendicular plane, blurring along tangents
1108 * if tangent shading enabled */
1109 if (shi->mat->mode & (MA_TANGENT_V)) {
1110 Crossf(orthx, shi->vn, shi->tang); // bitangent
1111 VECCOPY(orthy, shi->tang);
1112 VecMulf(orthx, samp3d[0]);
1113 VecMulf(orthy, samp3d[1]*aniso);
1115 VecOrthoBasisf(shi->vn, orthx, orthy);
1116 VecMulf(orthx, samp3d[0]);
1117 VecMulf(orthy, samp3d[1]);
1120 /* and perturb the normal in it */
1121 VecAddf(v_nor_new, shi->vn, orthx);
1122 VecAddf(v_nor_new, v_nor_new, orthy);
1123 Normalize(v_nor_new);
1125 /* no blurriness, use the original normal */
1126 VECCOPY(v_nor_new, shi->vn);
1129 if((shi->vlr->flag & R_SMOOTH))
1130 reflection(v_reflect, v_nor_new, shi->view, shi->facenor);
1132 reflection(v_reflect, v_nor_new, shi->view, NULL);
1134 traceray(shi, shr, shi->mat->ray_depth, shi->co, v_reflect, sampcol, shi->obi, shi->vlr, 0);
1137 col[0] += sampcol[0];
1138 col[1] += sampcol[1];
1139 col[2] += sampcol[2];
1141 /* for variance calc */
1142 colsq[0] += sampcol[0]*sampcol[0];
1143 colsq[1] += sampcol[1]*sampcol[1];
1144 colsq[2] += sampcol[2]*sampcol[2];
1148 /* adaptive sampling */
1149 if (adapt_thresh > 0.0 && samples > max_samples/3)
1151 if (adaptive_sample_variance(samples, col, colsq, adapt_thresh))
1154 /* if the pixel so far is very dark, we can get away with less samples */
1155 if ( (col[0] + col[1] + col[2])/3.0/(float)samples < 0.01 )
1158 /* reduce samples when reflection is dim due to low ray mirror blend value or fresnel factor
1159 * and when reflection is blurry */
1160 if (fresnelfac < 0.1 * (blur+1)) {
1163 /* even more for very dim */
1164 if (fresnelfac < 0.05 * (blur+1))
1170 col[0] /= (float)samples;
1171 col[1] /= (float)samples;
1172 col[2] /= (float)samples;
1175 release_thread_qmcsampler(&R, shi->thread, qsa);
1178 /* extern call from render loop */
1179 void ray_trace(ShadeInput *shi, ShadeResult *shr)
1181 float i, f, f1, fr, fg, fb;
1182 float mircol[4], tracol[4];
1186 do_tra= ((shi->mat->mode & (MA_RAYTRANSP)) && shr->alpha!=1.0f);
1187 do_mir= ((shi->mat->mode & MA_RAYMIRROR) && shi->ray_mirror!=0.0f);
1190 /* raytrace mirror amd refract like to separate the spec color */
1191 if(shi->combinedflag & SCE_PASS_SPEC)
1192 VECSUB(diff, shr->combined, shr->spec) /* no ; */
1194 VECCOPY(diff, shr->combined);
1199 trace_refract(tracol, shi, shr);
1201 f= shr->alpha; f1= 1.0f-f;
1202 fr= 1.0f+ shi->mat->filter*(shi->r-1.0f);
1203 fg= 1.0f+ shi->mat->filter*(shi->g-1.0f);
1204 fb= 1.0f+ shi->mat->filter*(shi->b-1.0f);
1206 /* for refract pass */
1207 VECCOPY(olddiff, diff);
1209 diff[0]= f*diff[0] + f1*fr*tracol[0];
1210 diff[1]= f*diff[1] + f1*fg*tracol[1];
1211 diff[2]= f*diff[2] + f1*fb*tracol[2];
1213 if(shi->passflag & SCE_PASS_REFRACT)
1214 VECSUB(shr->refr, diff, olddiff);
1216 if(!(shi->combinedflag & SCE_PASS_REFRACT))
1217 VECSUB(diff, diff, shr->refr);
1219 shr->alpha= tracol[3];
1224 i= shi->ray_mirror*fresnel_fac(shi->view, shi->vn, shi->mat->fresnel_mir_i, shi->mat->fresnel_mir);
1227 trace_reflect(mircol, shi, shr, i);
1233 if(shi->passflag & SCE_PASS_REFLECT) {
1234 /* mirror pass is not blocked out with spec */
1235 shr->refl[0]= fr*mircol[0] - fr*diff[0];
1236 shr->refl[1]= fg*mircol[1] - fg*diff[1];
1237 shr->refl[2]= fb*mircol[2] - fb*diff[2];
1240 if(shi->combinedflag & SCE_PASS_REFLECT) {
1242 f= fr*(1.0f-shr->spec[0]); f1= 1.0f-i;
1243 diff[0]= f*mircol[0] + f1*diff[0];
1245 f= fg*(1.0f-shr->spec[1]); f1= 1.0f-i;
1246 diff[1]= f*mircol[1] + f1*diff[1];
1248 f= fb*(1.0f-shr->spec[2]); f1= 1.0f-i;
1249 diff[2]= f*mircol[2] + f1*diff[2];
1253 /* put back together */
1254 if(shi->combinedflag & SCE_PASS_SPEC)
1255 VECADD(shr->combined, diff, shr->spec) /* no ; */
1257 VECCOPY(shr->combined, diff);
1260 /* color 'shadfac' passes through 'col' with alpha and filter */
1261 /* filter is only applied on alpha defined transparent part */
1262 static void addAlphaLight(float *shadfac, float *col, float alpha, float filter)
1266 fr= 1.0f+ filter*(col[0]-1.0f);
1267 fg= 1.0f+ filter*(col[1]-1.0f);
1268 fb= 1.0f+ filter*(col[2]-1.0f);
1270 shadfac[0]= alpha*col[0] + fr*(1.0f-alpha)*shadfac[0];
1271 shadfac[1]= alpha*col[1] + fg*(1.0f-alpha)*shadfac[1];
1272 shadfac[2]= alpha*col[2] + fb*(1.0f-alpha)*shadfac[2];
1274 shadfac[3]= (1.0f-alpha)*shadfac[3];
1277 static void ray_trace_shadow_tra(Isect *is, int depth, int traflag)
1279 /* ray to lamp, find first face that intersects, check alpha properties,
1280 if it has col[3]>0.0f continue. so exit when alpha is full */
1284 if(RE_ray_tree_intersect(R.raytree, is)) {
1288 /* Warning, This is not that nice, and possibly a bit slow for every ray,
1289 however some variables were not initialized properly in, unless using shade_input_initialize(...), we need to do a memset */
1290 memset(&shi, 0, sizeof(ShadeInput));
1291 /* end warning! - Campbell */
1293 shi.depth= 1; /* only used to indicate tracing */
1297 shi.thread= shi.sample= 0;
1300 shi.combinedflag= 0;
1302 shi.light_override= NULL;
1303 shi.mat_override= NULL;*/
1305 shade_ray(is, &shi, &shr);
1306 if (traflag & RAY_TRA)
1307 d= shade_by_transmission(is, &shi, &shr);
1309 /* mix colors based on shadfac (rgb + amount of light factor) */
1310 addAlphaLight(is->col, shr.diff, shr.alpha, d*shi.mat->filter);
1312 if(depth>0 && is->col[3]>0.0f) {
1314 /* adapt isect struct */
1315 VECCOPY(is->start, shi.co);
1316 is->oborig= RAY_OBJECT_SET(&R, shi.obi);
1317 is->faceorig= (RayFace*)shi.vlr;
1319 ray_trace_shadow_tra(is, depth-1, traflag | RAY_TRA);
1324 /* not used, test function for ambient occlusion (yaf: pathlight) */
1325 /* main problem; has to be called within shading loop, giving unwanted recursion */
1326 int ray_trace_shadow_rad(ShadeInput *ship, ShadeResult *shr)
1328 static int counter=0, only_one= 0;
1329 extern float hashvectf[];
1333 float vec[3], accum[3], div= 0.0f, maxsize= RE_ray_tree_max_size(R.raytree);
1341 accum[0]= accum[1]= accum[2]= 0.0f;
1342 isec.mode= RE_RAY_MIRROR;
1343 isec.faceorig= (RayFace*)ship->vlr;
1344 isec.oborig= RAY_OBJECT_SET(&R, ship->obi);
1346 for(a=0; a<8*8; a++) {
1350 VECCOPY(vec, hashvectf+counter);
1351 if(ship->vn[0]*vec[0]+ship->vn[1]*vec[1]+ship->vn[2]*vec[2]>0.0f) {
1356 VECCOPY(isec.start, ship->co);
1357 isec.end[0]= isec.start[0] + maxsize*vec[0];
1358 isec.end[1]= isec.start[1] + maxsize*vec[1];
1359 isec.end[2]= isec.start[2] + maxsize*vec[2];
1361 if(RE_ray_tree_intersect(R.raytree, &isec)) {
1364 /* Warning, This is not that nice, and possibly a bit slow for every ray,
1365 however some variables were not initialized properly in, unless using shade_input_initialize(...), we need to do a memset */
1366 memset(&shi, 0, sizeof(ShadeInput));
1367 /* end warning! - Campbell */
1369 shade_ray(&isec, &shi, &shr_t);
1370 fac= isec.labda*isec.labda;
1372 accum[0]+= fac*(shr_t.diff[0]+shr_t.spec[0]);
1373 accum[1]+= fac*(shr_t.diff[1]+shr_t.spec[1]);
1374 accum[2]+= fac*(shr_t.diff[2]+shr_t.spec[2]);
1381 shr->diff[0]+= accum[0]/div;
1382 shr->diff[1]+= accum[1]/div;
1383 shr->diff[2]+= accum[2]/div;
1391 /* aolight: function to create random unit sphere vectors for total random sampling */
1392 static void RandomSpherical(float *v)
1395 v[2] = 2.f*BLI_frand()-1.f;
1396 if ((r = 1.f - v[2]*v[2])>0.f) {
1397 float a = 6.283185307f*BLI_frand();
1405 /* calc distributed spherical energy */
1406 static void DS_energy(float *sphere, int tot, float *vec)
1408 float *fp, fac, force[3], res[3];
1411 res[0]= res[1]= res[2]= 0.0f;
1413 for(a=0, fp=sphere; a<tot; a++, fp+=3) {
1414 VecSubf(force, vec, fp);
1415 fac= force[0]*force[0] + force[1]*force[1] + force[2]*force[2];
1418 res[0]+= fac*force[0];
1419 res[1]+= fac*force[1];
1420 res[2]+= fac*force[2];
1425 VecAddf(vec, vec, res);
1430 /* called from convertBlenderScene.c */
1431 /* creates an equally distributed spherical sample pattern */
1432 /* and allocates threadsafe memory */
1433 void init_ao_sphere(World *wrld)
1436 int a, tot, iter= 16;
1438 /* we make twice the amount of samples, because only a hemisphere is used */
1439 tot= 2*wrld->aosamp*wrld->aosamp;
1441 wrld->aosphere= MEM_mallocN(3*tot*sizeof(float), "AO sphere");
1448 for(a=0; a<tot; a++, fp+= 3) {
1449 RandomSpherical(fp);
1453 for(a=0, fp= wrld->aosphere; a<tot; a++, fp+= 3) {
1454 DS_energy(wrld->aosphere, tot, fp);
1459 wrld->aotables= MEM_mallocN(BLENDER_MAX_THREADS*3*tot*sizeof(float), "AO tables");
1462 /* give per thread a table, we have to compare xs ys because of way OSA works... */
1463 static float *threadsafe_table_sphere(int test, int thread, int xs, int ys, int tot)
1465 static int xso[BLENDER_MAX_THREADS], yso[BLENDER_MAX_THREADS];
1466 static int firsttime= 1;
1469 memset(xso, 255, sizeof(xso));
1470 memset(yso, 255, sizeof(yso));
1474 if(xs==xso[thread] && ys==yso[thread]) return R.wrld.aotables+ thread*tot*3;
1475 if(test) return NULL;
1476 xso[thread]= xs; yso[thread]= ys;
1477 return R.wrld.aotables+ thread*tot*3;
1480 static float *sphere_sampler(int type, int resol, int thread, int xs, int ys)
1487 if (type & WO_AORNDSMP) {
1491 // always returns table
1492 sphere= threadsafe_table_sphere(0, thread, xs, ys, tot);
1494 /* total random sampling. NOT THREADSAFE! (should be removed, is not useful) */
1496 for (a=0; a<tot; a++, vec+=3) {
1497 RandomSpherical(vec);
1504 float cosfi, sinfi, cost, sint;
1508 // returns table if xs and ys were equal to last call
1509 sphere= threadsafe_table_sphere(1, thread, xs, ys, tot);
1511 sphere= threadsafe_table_sphere(0, thread, xs, ys, tot);
1514 ang= BLI_thread_frand(thread);
1515 sinfi= sin(ang); cosfi= cos(ang);
1516 ang= BLI_thread_frand(thread);
1517 sint= sin(ang); cost= cos(ang);
1519 vec= R.wrld.aosphere;
1521 for (a=0; a<tot; a++, vec+=3, vec1+=3) {
1522 vec1[0]= cost*cosfi*vec[0] - sinfi*vec[1] + sint*cosfi*vec[2];
1523 vec1[1]= cost*sinfi*vec[0] + cosfi*vec[1] + sint*sinfi*vec[2];
1524 vec1[2]= -sint*vec[0] + cost*vec[2];
1531 static void ray_ao_qmc(ShadeInput *shi, float *shadfac)
1534 QMCSampler *qsa=NULL;
1536 float up[3], side[3], dir[3], nrm[3];
1538 float maxdist = R.wrld.aodist;
1539 float fac=0.0f, prev=0.0f;
1540 float adapt_thresh = G.scene->world->ao_adapt_thresh;
1541 float adapt_speed_fac = G.scene->world->ao_adapt_speed_fac;
1544 int max_samples = R.wrld.aosamp*R.wrld.aosamp;
1546 float dxyview[3], skyadded=0, div;
1549 isec.faceorig= (RayFace*)shi->vlr;
1550 isec.oborig= RAY_OBJECT_SET(&R, shi->obi);
1551 isec.face_last= NULL;
1553 isec.mode= (R.wrld.aomode & WO_AODIST)?RE_RAY_SHADOW_TRA:RE_RAY_SHADOW;
1556 shadfac[0]= shadfac[1]= shadfac[2]= 0.0f;
1558 /* prevent sky colors to be added for only shadow (shadow becomes alpha) */
1559 aocolor= R.wrld.aocolor;
1560 if(shi->mat->mode & MA_ONLYSHADOW)
1561 aocolor= WO_AOPLAIN;
1563 if(aocolor == WO_AOSKYTEX) {
1564 dxyview[0]= 1.0f/(float)R.wrld.aosamp;
1565 dxyview[1]= 1.0f/(float)R.wrld.aosamp;
1569 if(shi->vlr->flag & R_SMOOTH) {
1570 VECCOPY(nrm, shi->vn);
1573 VECCOPY(nrm, shi->facenor);
1576 VecOrthoBasisf(nrm, up, side);
1579 if (R.wrld.ao_samp_method==WO_AOSAMP_HALTON) {
1582 speedfac = get_avg_speed(shi) * adapt_speed_fac;
1583 CLAMP(speedfac, 1.0, 1000.0);
1584 max_samples /= speedfac;
1585 if (max_samples < 5) max_samples = 5;
1587 qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HALTON, max_samples);
1588 } else if (R.wrld.ao_samp_method==WO_AOSAMP_HAMMERSLEY)
1589 qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HAMMERSLEY, max_samples);
1591 QMC_initPixel(qsa, shi->thread);
1593 while (samples < max_samples) {
1595 /* sampling, returns quasi-random vector in unit hemisphere */
1596 QMC_sampleHemi(samp3d, qsa, shi->thread, samples);
1598 dir[0] = (samp3d[0]*up[0] + samp3d[1]*side[0] + samp3d[2]*nrm[0]);
1599 dir[1] = (samp3d[0]*up[1] + samp3d[1]*side[1] + samp3d[2]*nrm[1]);
1600 dir[2] = (samp3d[0]*up[2] + samp3d[1]*side[2] + samp3d[2]*nrm[2]);
1604 VECCOPY(isec.start, shi->co);
1605 isec.end[0] = shi->co[0] - maxdist*dir[0];
1606 isec.end[1] = shi->co[1] - maxdist*dir[1];
1607 isec.end[2] = shi->co[2] - maxdist*dir[2];
1611 if(RE_ray_tree_intersect(R.raytree, &isec)) {
1612 if (R.wrld.aomode & WO_AODIST) fac+= exp(-isec.labda*R.wrld.aodistfac);
1615 else if(aocolor!=WO_AOPLAIN) {
1617 float skyfac, view[3];
1624 if(aocolor==WO_AOSKYCOL) {
1625 skyfac= 0.5*(1.0f+view[0]*R.grvec[0]+ view[1]*R.grvec[1]+ view[2]*R.grvec[2]);
1626 shadfac[0]+= (1.0f-skyfac)*R.wrld.horr + skyfac*R.wrld.zenr;
1627 shadfac[1]+= (1.0f-skyfac)*R.wrld.horg + skyfac*R.wrld.zeng;
1628 shadfac[2]+= (1.0f-skyfac)*R.wrld.horb + skyfac*R.wrld.zenb;
1630 else { /* WO_AOSKYTEX */
1631 shadeSkyView(skycol, isec.start, view, dxyview);
1632 shadeSunView(skycol, shi->view);
1633 shadfac[0]+= skycol[0];
1634 shadfac[1]+= skycol[1];
1635 shadfac[2]+= skycol[2];
1642 if (qsa->type == SAMP_TYPE_HALTON) {
1643 /* adaptive sampling - consider samples below threshold as in shadow (or vice versa) and exit early */
1644 if (adapt_thresh > 0.0 && (samples > max_samples/2) ) {
1646 if (adaptive_sample_contrast_val(samples, prev, fac, adapt_thresh)) {
1653 if(aocolor!=WO_AOPLAIN && skyadded) {
1654 div= (1.0f - fac/(float)samples)/((float)skyadded);
1656 shadfac[0]*= div; // average color times distances/hits formula
1657 shadfac[1]*= div; // average color times distances/hits formula
1658 shadfac[2]*= div; // average color times distances/hits formula
1660 shadfac[0]= shadfac[1]= shadfac[2]= 1.0f - fac/(float)samples;
1664 release_thread_qmcsampler(&R, shi->thread, qsa);
1667 /* extern call from shade_lamp_loop, ambient occlusion calculus */
1668 static void ray_ao_spheresamp(ShadeInput *shi, float *shadfac)
1671 float *vec, *nrm, div, bias, sh=0.0f;
1672 float maxdist = R.wrld.aodist;
1674 int j= -1, tot, actual=0, skyadded=0, aocolor, resol= R.wrld.aosamp;
1676 isec.faceorig= (RayFace*)shi->vlr;
1677 isec.oborig= RAY_OBJECT_SET(&R, shi->obi);
1678 isec.face_last= NULL;
1680 isec.mode= (R.wrld.aomode & WO_AODIST)?RE_RAY_SHADOW_TRA:RE_RAY_SHADOW;
1684 shadfac[0]= shadfac[1]= shadfac[2]= 0.0f;
1686 /* bias prevents smoothed faces to appear flat */
1687 if(shi->vlr->flag & R_SMOOTH) {
1688 bias= G.scene->world->aobias;
1696 /* prevent sky colors to be added for only shadow (shadow becomes alpha) */
1697 aocolor= R.wrld.aocolor;
1698 if(shi->mat->mode & MA_ONLYSHADOW)
1699 aocolor= WO_AOPLAIN;
1701 if(resol>32) resol= 32;
1703 vec= sphere_sampler(R.wrld.aomode, resol, shi->thread, shi->xs, shi->ys);
1705 // warning: since we use full sphere now, and dotproduct is below, we do twice as much
1708 if(aocolor == WO_AOSKYTEX) {
1709 dxyview[0]= 1.0f/(float)resol;
1710 dxyview[1]= 1.0f/(float)resol;
1716 if ((vec[0]*nrm[0] + vec[1]*nrm[1] + vec[2]*nrm[2]) > bias) {
1717 /* only ao samples for mask */
1718 if(R.r.mode & R_OSA) {
1721 if(!(shi->mask & (1<<j))) {
1729 /* always set start/end, RE_ray_tree_intersect clips it */
1730 VECCOPY(isec.start, shi->co);
1731 isec.end[0] = shi->co[0] - maxdist*vec[0];
1732 isec.end[1] = shi->co[1] - maxdist*vec[1];
1733 isec.end[2] = shi->co[2] - maxdist*vec[2];
1736 if(RE_ray_tree_intersect(R.raytree, &isec)) {
1737 if (R.wrld.aomode & WO_AODIST) sh+= exp(-isec.labda*R.wrld.aodistfac);
1740 else if(aocolor!=WO_AOPLAIN) {
1749 if(aocolor==WO_AOSKYCOL) {
1750 fac= 0.5*(1.0f+view[0]*R.grvec[0]+ view[1]*R.grvec[1]+ view[2]*R.grvec[2]);
1751 shadfac[0]+= (1.0f-fac)*R.wrld.horr + fac*R.wrld.zenr;
1752 shadfac[1]+= (1.0f-fac)*R.wrld.horg + fac*R.wrld.zeng;
1753 shadfac[2]+= (1.0f-fac)*R.wrld.horb + fac*R.wrld.zenb;
1755 else { /* WO_AOSKYTEX */
1756 shadeSkyView(skycol, isec.start, view, dxyview);
1757 shadeSunView(skycol, shi->view);
1758 shadfac[0]+= skycol[0];
1759 shadfac[1]+= skycol[1];
1760 shadfac[2]+= skycol[2];
1769 if(actual==0) sh= 1.0f;
1770 else sh = 1.0f - sh/((float)actual);
1772 if(aocolor!=WO_AOPLAIN && skyadded) {
1773 div= sh/((float)skyadded);
1775 shadfac[0]*= div; // average color times distances/hits formula
1776 shadfac[1]*= div; // average color times distances/hits formula
1777 shadfac[2]*= div; // average color times distances/hits formula
1780 shadfac[0]= shadfac[1]= shadfac[2]= sh;
1784 void ray_ao(ShadeInput *shi, float *shadfac)
1786 /* Unfortunately, the unusual way that the sphere sampler calculates roughly twice as many
1787 * samples as are actually traced, and skips them based on bias and OSA settings makes it very difficult
1788 * to reuse code between these two functions. This is the easiest way I can think of to do it
1790 if (ELEM(R.wrld.ao_samp_method, WO_AOSAMP_HAMMERSLEY, WO_AOSAMP_HALTON))
1791 ray_ao_qmc(shi, shadfac);
1792 else if (R.wrld.ao_samp_method == WO_AOSAMP_CONSTANT)
1793 ray_ao_spheresamp(shi, shadfac);
1796 static void ray_shadow_jittered_coords(ShadeInput *shi, int max, float jitco[RE_MAX_OSA][3], int *totjitco)
1798 /* magic numbers for reordering sample positions to give better
1799 * results with adaptive sample, when it usually only takes 4 samples */
1800 int order8[8] = {0, 1, 5, 6, 2, 3, 4, 7};
1801 int order11[11] = {1, 3, 8, 10, 0, 2, 4, 5, 6, 7, 9};
1802 int order16[16] = {1, 3, 9, 12, 0, 6, 7, 8, 13, 2, 4, 5, 10, 11, 14, 15};
1803 int count = count_mask(shi->mask);
1805 /* for better antialising shadow samples are distributed over the subpixel
1806 * sample coordinates, this only works for raytracing depth 0 though */
1807 if(!shi->strand && shi->depth == 0 && count > 1 && count <= max) {
1808 float xs, ys, zs, view[3];
1809 int samp, ordsamp, tot= 0;
1811 for(samp=0; samp<R.osa; samp++) {
1812 if(R.osa == 8) ordsamp = order8[samp];
1813 else if(R.osa == 11) ordsamp = order11[samp];
1814 else if(R.osa == 16) ordsamp = order16[samp];
1815 else ordsamp = samp;
1817 if(shi->mask & (1<<ordsamp)) {
1818 /* zbuffer has this inverse corrected, ensures xs,ys are inside pixel */
1819 xs= (float)shi->scanco[0] + R.jit[ordsamp][0] + 0.5f;
1820 ys= (float)shi->scanco[1] + R.jit[ordsamp][1] + 0.5f;
1823 shade_input_calc_viewco(shi, xs, ys, zs, view, NULL, jitco[tot], NULL, NULL);
1831 VECCOPY(jitco[0], shi->co);
1836 static void ray_shadow_qmc(ShadeInput *shi, LampRen *lar, float *lampco, float *shadfac, Isect *isec)
1838 QMCSampler *qsa=NULL;
1842 float fac=0.0f, vec[3];
1844 float adapt_thresh = lar->adapt_thresh;
1845 int min_adapt_samples=4, max_samples = lar->ray_totsamp;
1847 int do_soft=1, full_osa=0;
1849 float jitco[RE_MAX_OSA][3];
1852 colsq[0] = colsq[1] = colsq[2] = 0.0;
1853 if(isec->mode==RE_RAY_SHADOW_TRA) {
1854 shadfac[0]= shadfac[1]= shadfac[2]= shadfac[3]= 0.0f;
1858 if (lar->ray_totsamp < 2) do_soft = 0;
1859 if ((R.r.mode & R_OSA) && (R.osa > 0) && (shi->vlr->flag & R_FULL_OSA)) full_osa = 1;
1862 if (do_soft) max_samples = max_samples/R.osa + 1;
1863 else max_samples = 1;
1865 if (do_soft) max_samples = lar->ray_totsamp;
1866 else max_samples = (R.osa > 4)?R.osa:5;
1869 ray_shadow_jittered_coords(shi, max_samples, jitco, &totjitco);
1872 if (lar->ray_samp_method==LA_SAMP_HALTON)
1873 qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HALTON, max_samples);
1874 else if (lar->ray_samp_method==LA_SAMP_HAMMERSLEY)
1875 qsa = get_thread_qmcsampler(&R, shi->thread, SAMP_TYPE_HAMMERSLEY, max_samples);
1877 QMC_initPixel(qsa, shi->thread);
1879 VECCOPY(vec, lampco);
1882 while (samples < max_samples) {
1883 isec->faceorig= (RayFace*)shi->vlr;
1884 isec->oborig= RAY_OBJECT_SET(&R, shi->obi);
1886 /* manually jitter the start shading co-ord per sample
1887 * based on the pre-generated OSA texture sampling offsets,
1888 * for anti-aliasing sharp shadow edges. */
1889 co = jitco[samples % totjitco];
1892 /* sphere shadow source */
1893 if (lar->type == LA_LOCAL) {
1894 float ru[3], rv[3], v[3], s[3];
1896 /* calc tangent plane vectors */
1897 v[0] = co[0] - lampco[0];
1898 v[1] = co[1] - lampco[1];
1899 v[2] = co[2] - lampco[2];
1901 VecOrthoBasisf(v, ru, rv);
1903 /* sampling, returns quasi-random vector in area_size disc */
1904 QMC_sampleDisc(samp3d, qsa, shi->thread, samples,lar->area_size);
1906 /* distribute disc samples across the tangent plane */
1907 s[0] = samp3d[0]*ru[0] + samp3d[1]*rv[0];
1908 s[1] = samp3d[0]*ru[1] + samp3d[1]*rv[1];
1909 s[2] = samp3d[0]*ru[2] + samp3d[1]*rv[2];
1914 /* sampling, returns quasi-random vector in [sizex,sizey]^2 plane */
1915 QMC_sampleRect(samp3d, qsa, shi->thread, samples, lar->area_size, lar->area_sizey);
1917 /* align samples to lamp vector */
1918 Mat3MulVecfl(lar->mat, samp3d);
1920 isec->end[0]= vec[0]+samp3d[0];
1921 isec->end[1]= vec[1]+samp3d[1];
1922 isec->end[2]= vec[2]+samp3d[2];
1924 VECCOPY(isec->end, vec);
1928 /* bias away somewhat to avoid self intersection */
1929 float jitbias= 0.5f*(VecLength(shi->dxco) + VecLength(shi->dyco));
1932 VECSUB(v, co, isec->end);
1935 co[0] -= jitbias*v[0];
1936 co[1] -= jitbias*v[1];
1937 co[2] -= jitbias*v[2];
1940 VECCOPY(isec->start, co);
1943 if(isec->mode==RE_RAY_SHADOW_TRA) {
1944 isec->col[0]= isec->col[1]= isec->col[2]= 1.0f;
1947 ray_trace_shadow_tra(isec, DEPTH_SHADOW_TRA, 0);
1948 shadfac[0] += isec->col[0];
1949 shadfac[1] += isec->col[1];
1950 shadfac[2] += isec->col[2];
1951 shadfac[3] += isec->col[3];
1953 /* for variance calc */
1954 colsq[0] += isec->col[0]*isec->col[0];
1955 colsq[1] += isec->col[1]*isec->col[1];
1956 colsq[2] += isec->col[2]*isec->col[2];
1959 if( RE_ray_tree_intersect(R.raytree, isec) ) fac+= 1.0f;
1964 if ((lar->ray_samp_method == LA_SAMP_HALTON)) {
1966 /* adaptive sampling - consider samples below threshold as in shadow (or vice versa) and exit early */
1967 if ((max_samples > min_adapt_samples) && (adapt_thresh > 0.0) && (samples > max_samples / 3)) {
1968 if (isec->mode==RE_RAY_SHADOW_TRA) {
1969 if ((shadfac[3] / samples > (1.0-adapt_thresh)) || (shadfac[3] / samples < adapt_thresh))
1971 else if (adaptive_sample_variance(samples, shadfac, colsq, adapt_thresh))
1974 if ((fac / samples > (1.0-adapt_thresh)) || (fac / samples < adapt_thresh))
1981 if(isec->mode==RE_RAY_SHADOW_TRA) {
1982 shadfac[0] /= samples;
1983 shadfac[1] /= samples;
1984 shadfac[2] /= samples;
1985 shadfac[3] /= samples;
1987 shadfac[3]= 1.0f-fac/samples;
1990 release_thread_qmcsampler(&R, shi->thread, qsa);
1993 static void ray_shadow_jitter(ShadeInput *shi, LampRen *lar, float *lampco, float *shadfac, Isect *isec)
1995 /* area soft shadow */
1997 float fac=0.0f, div=0.0f, vec[3];
2000 if(isec->mode==RE_RAY_SHADOW_TRA) {
2001 shadfac[0]= shadfac[1]= shadfac[2]= shadfac[3]= 0.0f;
2003 else shadfac[3]= 1.0f;
2006 jitlamp= give_jitter_plane(lar, shi->thread, shi->xs, shi->ys);
2008 a= lar->ray_totsamp;
2010 /* this correction to make sure we always take at least 1 sample */
2012 if(a==4) mask |= (mask>>4)|(mask>>8);
2013 else if(a==9) mask |= (mask>>9);
2017 if(R.r.mode & R_OSA) {
2020 if(!(mask & (1<<j))) {
2026 isec->faceorig= (RayFace*)shi->vlr;
2027 isec->oborig= RAY_OBJECT_SET(&R, shi->obi);
2032 Mat3MulVecfl(lar->mat, vec);
2034 /* set start and end, RE_ray_tree_intersect clips it */
2035 VECCOPY(isec->start, shi->co);
2036 isec->end[0]= lampco[0]+vec[0];
2037 isec->end[1]= lampco[1]+vec[1];
2038 isec->end[2]= lampco[2]+vec[2];
2040 if(isec->mode==RE_RAY_SHADOW_TRA) {
2041 /* isec.col is like shadfac, so defines amount of light (0.0 is full shadow) */
2042 isec->col[0]= isec->col[1]= isec->col[2]= 1.0f;
2045 ray_trace_shadow_tra(isec, DEPTH_SHADOW_TRA, 0);
2046 shadfac[0] += isec->col[0];
2047 shadfac[1] += isec->col[1];
2048 shadfac[2] += isec->col[2];
2049 shadfac[3] += isec->col[3];
2051 else if( RE_ray_tree_intersect(R.raytree, isec) ) fac+= 1.0f;
2057 if(isec->mode==RE_RAY_SHADOW_TRA) {
2064 // sqrt makes nice umbra effect
2065 if(lar->ray_samp_type & LA_SAMP_UMBRA)
2066 shadfac[3]= sqrt(1.0f-fac/div);
2068 shadfac[3]= 1.0f-fac/div;
2071 /* extern call from shade_lamp_loop */
2072 void ray_shadow(ShadeInput *shi, LampRen *lar, float *shadfac)
2075 float lampco[3], maxsize;
2078 if(shi->mat->mode & MA_SHADOW_TRA) isec.mode= RE_RAY_SHADOW_TRA;
2079 else isec.mode= RE_RAY_SHADOW;
2081 if(lar->mode & (LA_LAYER|LA_LAYER_SHADOW))
2086 /* only when not mir tracing, first hit optimm */
2088 isec.face_last= (RayFace*)lar->vlr_last[shi->thread];
2089 isec.ob_last= RAY_OBJECT_SET(&R, lar->obi_last[shi->thread]);
2092 isec.face_last= NULL;
2096 if(lar->type==LA_SUN || lar->type==LA_HEMI) {
2097 maxsize= RE_ray_tree_max_size(R.raytree);
2098 lampco[0]= shi->co[0] - maxsize*lar->vec[0];
2099 lampco[1]= shi->co[1] - maxsize*lar->vec[1];
2100 lampco[2]= shi->co[2] - maxsize*lar->vec[2];
2103 VECCOPY(lampco, lar->co);
2106 if (ELEM(lar->ray_samp_method, LA_SAMP_HALTON, LA_SAMP_HAMMERSLEY)) {
2108 ray_shadow_qmc(shi, lar, lampco, shadfac, &isec);
2111 if(lar->ray_totsamp<2) {
2113 isec.faceorig= (RayFace*)shi->vlr;
2114 isec.oborig= RAY_OBJECT_SET(&R, shi->obi);
2115 shadfac[3]= 1.0f; // 1.0=full light
2117 /* set up isec vec */
2118 VECCOPY(isec.start, shi->co);
2119 VECCOPY(isec.end, lampco);
2121 if(isec.mode==RE_RAY_SHADOW_TRA) {
2122 /* isec.col is like shadfac, so defines amount of light (0.0 is full shadow) */
2123 isec.col[0]= isec.col[1]= isec.col[2]= 1.0f;
2126 ray_trace_shadow_tra(&isec, DEPTH_SHADOW_TRA, 0);
2127 QUATCOPY(shadfac, isec.col);
2129 else if(RE_ray_tree_intersect(R.raytree, &isec)) shadfac[3]= 0.0f;
2132 ray_shadow_jitter(shi, lar, lampco, shadfac, &isec);
2136 /* for first hit optim, set last interesected shadow face */
2138 lar->vlr_last[shi->thread]= (VlakRen*)isec.face_last;
2139 lar->obi_last[shi->thread]= RAY_OBJECT_GET(&R, isec.ob_last);
2144 /* only when face points away from lamp, in direction of lamp, trace ray and find first exit point */
2145 static void ray_translucent(ShadeInput *shi, LampRen *lar, float *distfac, float *co)
2148 float lampco[3], maxsize;
2151 isec.mode= RE_RAY_SHADOW_TRA;
2153 if(lar->mode & LA_LAYER) isec.lay= lar->lay; else isec.lay= -1;
2155 if(lar->type==LA_SUN || lar->type==LA_HEMI) {
2156 maxsize= RE_ray_tree_max_size(R.raytree);
2157 lampco[0]= shi->co[0] - maxsize*lar->vec[0];
2158 lampco[1]= shi->co[1] - maxsize*lar->vec[1];
2159 lampco[2]= shi->co[2] - maxsize*lar->vec[2];
2162 VECCOPY(lampco, lar->co);
2165 isec.faceorig= (RayFace*)shi->vlr;
2166 isec.oborig= RAY_OBJECT_SET(&R, shi->obi);
2168 /* set up isec vec */
2169 VECCOPY(isec.start, shi->co);
2170 VECCOPY(isec.end, lampco);
2172 if(RE_ray_tree_intersect(R.raytree, &isec)) {
2176 co[0]= isec.start[0]+isec.labda*(isec.vec[0]);
2177 co[1]= isec.start[1]+isec.labda*(isec.vec[1]);
2178 co[2]= isec.start[2]+isec.labda*(isec.vec[2]);
2180 *distfac= VecLength(isec.vec);