Fix Cycles CUDA performance on CUDA 8.0.
[blender-staging.git] / intern / cycles / kernel / osl / osl_services.cpp
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /* TODO(sergey): There is a bit of headers dependency hell going on
18  * here, so for now we just put here. In the future it might be better
19  * to have dedicated file for such tweaks.
20  */
21 #if (defined(__GNUC__) && !defined(__clang__)) && defined(NDEBUG)
22 #  pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
23 #  pragma GCC diagnostic ignored "-Wuninitialized"
24 #endif
25
26 #include <string.h>
27
28 #include "mesh.h"
29 #include "object.h"
30 #include "scene.h"
31
32 #include "osl_closures.h"
33 #include "osl_globals.h"
34 #include "osl_services.h"
35 #include "osl_shader.h"
36
37 #include "util_foreach.h"
38 #include "util_logging.h"
39 #include "util_string.h"
40
41 #include "kernel_compat_cpu.h"
42 #include "kernel_globals.h"
43 #include "kernel_random.h"
44 #include "kernel_projection.h"
45 #include "kernel_differential.h"
46 #include "kernel_montecarlo.h"
47 #include "kernel_camera.h"
48 #include "kernels/cpu/kernel_cpu_image.h"
49 #include "geom/geom.h"
50 #include "bvh/bvh.h"
51
52 #include "kernel_projection.h"
53 #include "kernel_accumulate.h"
54 #include "kernel_shader.h"
55
56 #ifdef WITH_PTEX
57 #  include <Ptexture.h>
58 #endif
59
60 CCL_NAMESPACE_BEGIN
61
62 /* RenderServices implementation */
63
64 #define COPY_MATRIX44(m1, m2)  { \
65         CHECK_TYPE(m1, OSL::Matrix44*); \
66         CHECK_TYPE(m2, Transform*); \
67         memcpy(m1, m2, sizeof(*m2)); \
68 } (void)0
69
70 /* static ustrings */
71 ustring OSLRenderServices::u_distance("distance");
72 ustring OSLRenderServices::u_index("index");
73 ustring OSLRenderServices::u_world("world");
74 ustring OSLRenderServices::u_camera("camera");
75 ustring OSLRenderServices::u_screen("screen");
76 ustring OSLRenderServices::u_raster("raster");
77 ustring OSLRenderServices::u_ndc("NDC");
78 ustring OSLRenderServices::u_object_location("object:location");
79 ustring OSLRenderServices::u_object_index("object:index");
80 ustring OSLRenderServices::u_geom_dupli_generated("geom:dupli_generated");
81 ustring OSLRenderServices::u_geom_dupli_uv("geom:dupli_uv");
82 ustring OSLRenderServices::u_material_index("material:index");
83 ustring OSLRenderServices::u_object_random("object:random");
84 ustring OSLRenderServices::u_particle_index("particle:index");
85 ustring OSLRenderServices::u_particle_age("particle:age");
86 ustring OSLRenderServices::u_particle_lifetime("particle:lifetime");
87 ustring OSLRenderServices::u_particle_location("particle:location");
88 ustring OSLRenderServices::u_particle_rotation("particle:rotation");
89 ustring OSLRenderServices::u_particle_size("particle:size");
90 ustring OSLRenderServices::u_particle_velocity("particle:velocity");
91 ustring OSLRenderServices::u_particle_angular_velocity("particle:angular_velocity");
92 ustring OSLRenderServices::u_geom_numpolyvertices("geom:numpolyvertices");
93 ustring OSLRenderServices::u_geom_trianglevertices("geom:trianglevertices");
94 ustring OSLRenderServices::u_geom_polyvertices("geom:polyvertices");
95 ustring OSLRenderServices::u_geom_name("geom:name");
96 ustring OSLRenderServices::u_geom_undisplaced("geom:undisplaced");
97 ustring OSLRenderServices::u_is_smooth("geom:is_smooth");
98 #ifdef __HAIR__
99 ustring OSLRenderServices::u_is_curve("geom:is_curve");
100 ustring OSLRenderServices::u_curve_thickness("geom:curve_thickness");
101 ustring OSLRenderServices::u_curve_tangent_normal("geom:curve_tangent_normal");
102 #endif
103 ustring OSLRenderServices::u_path_ray_length("path:ray_length");
104 ustring OSLRenderServices::u_path_ray_depth("path:ray_depth");
105 ustring OSLRenderServices::u_path_transparent_depth("path:transparent_depth");
106 ustring OSLRenderServices::u_path_transmission_depth("path:transmission_depth");
107 ustring OSLRenderServices::u_trace("trace");
108 ustring OSLRenderServices::u_hit("hit");
109 ustring OSLRenderServices::u_hitdist("hitdist");
110 ustring OSLRenderServices::u_N("N");
111 ustring OSLRenderServices::u_Ng("Ng");
112 ustring OSLRenderServices::u_P("P");
113 ustring OSLRenderServices::u_I("I");
114 ustring OSLRenderServices::u_u("u");
115 ustring OSLRenderServices::u_v("v");
116 ustring OSLRenderServices::u_empty;
117
118 OSLRenderServices::OSLRenderServices()
119 {
120         kernel_globals = NULL;
121         osl_ts = NULL;
122
123 #ifdef WITH_PTEX
124         size_t maxmem = 16384 * 1024;
125         ptex_cache = PtexCache::create(0, maxmem);
126 #endif
127 }
128
129 OSLRenderServices::~OSLRenderServices()
130 {
131         if(osl_ts) {
132                 VLOG(2) << "OSL texture system stats:\n"
133                         << osl_ts->getstats();
134         }
135 #ifdef WITH_PTEX
136         ptex_cache->release();
137 #endif
138 }
139
140 void OSLRenderServices::thread_init(KernelGlobals *kernel_globals_, OSL::TextureSystem *osl_ts_)
141 {
142         kernel_globals = kernel_globals_;
143         osl_ts = osl_ts_;
144 }
145
146 bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform, float time)
147 {
148         /* this is only used for shader and object space, we don't really have
149          * a concept of shader space, so we just use object space for both. */
150         if(xform) {
151                 const ShaderData *sd = (const ShaderData *)xform;
152                 KernelGlobals *kg = sd->osl_globals;
153                 int object = sd->object;
154
155                 if(object != OBJECT_NONE) {
156 #ifdef __OBJECT_MOTION__
157                         Transform tfm;
158
159                         if(time == sd->time)
160                                 tfm = sd->ob_tfm;
161                         else
162                                 tfm = object_fetch_transform_motion_test(kg, object, time, NULL);
163 #else
164                         Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
165 #endif
166                         tfm = transform_transpose(tfm);
167                         COPY_MATRIX44(&result, &tfm);
168
169                         return true;
170                 }
171         }
172
173         return false;
174 }
175
176 bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform, float time)
177 {
178         /* this is only used for shader and object space, we don't really have
179          * a concept of shader space, so we just use object space for both. */
180         if(xform) {
181                 const ShaderData *sd = (const ShaderData *)xform;
182                 KernelGlobals *kg = sd->osl_globals;
183                 int object = sd->object;
184
185                 if(object != OBJECT_NONE) {
186 #ifdef __OBJECT_MOTION__
187                         Transform itfm;
188
189                         if(time == sd->time)
190                                 itfm = sd->ob_itfm;
191                         else
192                                 object_fetch_transform_motion_test(kg, object, time, &itfm);
193 #else
194                         Transform itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
195 #endif
196                         itfm = transform_transpose(itfm);
197                         COPY_MATRIX44(&result, &itfm);
198
199                         return true;
200                 }
201         }
202
203         return false;
204 }
205
206 bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from, float time)
207 {
208         KernelGlobals *kg = kernel_globals;
209
210         if(from == u_ndc) {
211                 Transform tfm = transform_transpose(transform_quick_inverse(kernel_data.cam.worldtondc));
212                 COPY_MATRIX44(&result, &tfm);
213                 return true;
214         }
215         else if(from == u_raster) {
216                 Transform tfm = transform_transpose(kernel_data.cam.rastertoworld);
217                 COPY_MATRIX44(&result, &tfm);
218                 return true;
219         }
220         else if(from == u_screen) {
221                 Transform tfm = transform_transpose(kernel_data.cam.screentoworld);
222                 COPY_MATRIX44(&result, &tfm);
223                 return true;
224         }
225         else if(from == u_camera) {
226                 Transform tfm = transform_transpose(kernel_data.cam.cameratoworld);
227                 COPY_MATRIX44(&result, &tfm);
228                 return true;
229         }
230         else if(from == u_world) {
231                 result.makeIdentity();
232                 return true;
233         }
234
235         return false;
236 }
237
238 bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring to, float time)
239 {
240         KernelGlobals *kg = kernel_globals;
241
242         if(to == u_ndc) {
243                 Transform tfm = transform_transpose(kernel_data.cam.worldtondc);
244                 COPY_MATRIX44(&result, &tfm);
245                 return true;
246         }
247         else if(to == u_raster) {
248                 Transform tfm = transform_transpose(kernel_data.cam.worldtoraster);
249                 COPY_MATRIX44(&result, &tfm);
250                 return true;
251         }
252         else if(to == u_screen) {
253                 Transform tfm = transform_transpose(kernel_data.cam.worldtoscreen);
254                 COPY_MATRIX44(&result, &tfm);
255                 return true;
256         }
257         else if(to == u_camera) {
258                 Transform tfm = transform_transpose(kernel_data.cam.worldtocamera);
259                 COPY_MATRIX44(&result, &tfm);
260                 return true;
261         }
262         else if(to == u_world) {
263                 result.makeIdentity();
264                 return true;
265         }
266
267         return false;
268 }
269
270 bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform)
271 {
272         /* this is only used for shader and object space, we don't really have
273          * a concept of shader space, so we just use object space for both. */
274         if(xform) {
275                 const ShaderData *sd = (const ShaderData *)xform;
276                 int object = sd->object;
277
278                 if(object != OBJECT_NONE) {
279 #ifdef __OBJECT_MOTION__
280                         Transform tfm = sd->ob_tfm;
281 #else
282                         KernelGlobals *kg = sd->osl_globals;
283                         Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
284 #endif
285                         tfm = transform_transpose(tfm);
286                         COPY_MATRIX44(&result, &tfm);
287
288                         return true;
289                 }
290         }
291
292         return false;
293 }
294
295 bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, OSL::TransformationPtr xform)
296 {
297         /* this is only used for shader and object space, we don't really have
298          * a concept of shader space, so we just use object space for both. */
299         if(xform) {
300                 const ShaderData *sd = (const ShaderData *)xform;
301                 int object = sd->object;
302
303                 if(object != OBJECT_NONE) {
304 #ifdef __OBJECT_MOTION__
305                         Transform tfm = sd->ob_itfm;
306 #else
307                         KernelGlobals *kg = sd->osl_globals;
308                         Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
309 #endif
310                         tfm = transform_transpose(tfm);
311                         COPY_MATRIX44(&result, &tfm);
312
313                         return true;
314                 }
315         }
316
317         return false;
318 }
319
320 bool OSLRenderServices::get_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring from)
321 {
322         KernelGlobals *kg = kernel_globals;
323
324         if(from == u_ndc) {
325                 Transform tfm = transform_transpose(transform_quick_inverse(kernel_data.cam.worldtondc));
326                 COPY_MATRIX44(&result, &tfm);
327                 return true;
328         }
329         else if(from == u_raster) {
330                 Transform tfm = transform_transpose(kernel_data.cam.rastertoworld);
331                 COPY_MATRIX44(&result, &tfm);
332                 return true;
333         }
334         else if(from == u_screen) {
335                 Transform tfm = transform_transpose(kernel_data.cam.screentoworld);
336                 COPY_MATRIX44(&result, &tfm);
337                 return true;
338         }
339         else if(from == u_camera) {
340                 Transform tfm = transform_transpose(kernel_data.cam.cameratoworld);
341                 COPY_MATRIX44(&result, &tfm);
342                 return true;
343         }
344
345         return false;
346 }
347
348 bool OSLRenderServices::get_inverse_matrix(OSL::ShaderGlobals *sg, OSL::Matrix44 &result, ustring to)
349 {
350         KernelGlobals *kg = kernel_globals;
351         
352         if(to == u_ndc) {
353                 Transform tfm = transform_transpose(kernel_data.cam.worldtondc);
354                 COPY_MATRIX44(&result, &tfm);
355                 return true;
356         }
357         else if(to == u_raster) {
358                 Transform tfm = transform_transpose(kernel_data.cam.worldtoraster);
359                 COPY_MATRIX44(&result, &tfm);
360                 return true;
361         }
362         else if(to == u_screen) {
363                 Transform tfm = transform_transpose(kernel_data.cam.worldtoscreen);
364                 COPY_MATRIX44(&result, &tfm);
365                 return true;
366         }
367         else if(to == u_camera) {
368                 Transform tfm = transform_transpose(kernel_data.cam.worldtocamera);
369                 COPY_MATRIX44(&result, &tfm);
370                 return true;
371         }
372         
373         return false;
374 }
375
376 bool OSLRenderServices::get_array_attribute(OSL::ShaderGlobals *sg, bool derivatives, 
377                                             ustring object, TypeDesc type, ustring name,
378                                             int index, void *val)
379 {
380         return false;
381 }
382
383 static bool set_attribute_float3(float3 f[3], TypeDesc type, bool derivatives, void *val)
384 {
385         if(type == TypeDesc::TypePoint || type == TypeDesc::TypeVector ||
386            type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor)
387         {
388                 float *fval = (float *)val;
389
390                 fval[0] = f[0].x;
391                 fval[1] = f[0].y;
392                 fval[2] = f[0].z;
393
394                 if(derivatives) {
395                         fval[3] = f[1].x;
396                         fval[4] = f[1].y;
397                         fval[5] = f[1].z;
398
399                         fval[6] = f[2].x;
400                         fval[7] = f[2].y;
401                         fval[8] = f[2].z;
402                 }
403
404                 return true;
405         }
406         else if(type == TypeDesc::TypeFloat) {
407                 float *fval = (float *)val;
408                 fval[0] = average(f[0]);
409
410                 if(derivatives) {
411                         fval[1] = average(f[1]);
412                         fval[2] = average(f[2]);
413                 }
414
415                 return true;
416         }
417
418         return false;
419 }
420
421 static bool set_attribute_float3(float3 f, TypeDesc type, bool derivatives, void *val)
422 {
423         float3 fv[3];
424
425         fv[0] = f;
426         fv[1] = make_float3(0.0f, 0.0f, 0.0f);
427         fv[2] = make_float3(0.0f, 0.0f, 0.0f);
428
429         return set_attribute_float3(fv, type, derivatives, val);
430 }
431
432 static bool set_attribute_float(float f[3], TypeDesc type, bool derivatives, void *val)
433 {
434         if(type == TypeDesc::TypePoint || type == TypeDesc::TypeVector ||
435            type == TypeDesc::TypeNormal || type == TypeDesc::TypeColor)
436         {
437                 float *fval = (float *)val;
438                 fval[0] = f[0];
439                 fval[1] = f[1];
440                 fval[2] = f[2];
441
442                 if(derivatives) {
443                         fval[3] = f[1];
444                         fval[4] = f[1];
445                         fval[5] = f[1];
446
447                         fval[6] = f[2];
448                         fval[7] = f[2];
449                         fval[8] = f[2];
450                 }
451
452                 return true;
453         }
454         else if(type == TypeDesc::TypeFloat) {
455                 float *fval = (float *)val;
456                 fval[0] = f[0];
457
458                 if(derivatives) {
459                         fval[1] = f[1];
460                         fval[2] = f[2];
461                 }
462
463                 return true;
464         }
465
466         return false;
467 }
468
469 static bool set_attribute_float(float f, TypeDesc type, bool derivatives, void *val)
470 {
471         float fv[3];
472
473         fv[0] = f;
474         fv[1] = 0.0f;
475         fv[2] = 0.0f;
476
477         return set_attribute_float(fv, type, derivatives, val);
478 }
479
480 static bool set_attribute_int(int i, TypeDesc type, bool derivatives, void *val)
481 {
482         if(type.basetype == TypeDesc::INT && type.aggregate == TypeDesc::SCALAR && type.arraylen == 0) {
483                 int *ival = (int *)val;
484                 ival[0] = i;
485
486                 if(derivatives) {
487                         ival[1] = 0;
488                         ival[2] = 0;
489                 }
490
491                 return true;
492         }
493
494         return false;
495 }
496
497 static bool set_attribute_string(ustring str, TypeDesc type, bool derivatives, void *val)
498 {
499         if(type.basetype == TypeDesc::STRING && type.aggregate == TypeDesc::SCALAR && type.arraylen == 0) {
500                 ustring *sval = (ustring *)val;
501                 sval[0] = str;
502
503                 if(derivatives) {
504                         sval[1] = OSLRenderServices::u_empty;
505                         sval[2] = OSLRenderServices::u_empty;
506                 }
507
508                 return true;
509         }
510
511         return false;
512 }
513
514 static bool set_attribute_float3_3(float3 P[3], TypeDesc type, bool derivatives, void *val)
515 {
516         if(type.vecsemantics == TypeDesc::POINT && type.arraylen >= 3) {
517                 float *fval = (float *)val;
518
519                 fval[0] = P[0].x;
520                 fval[1] = P[0].y;
521                 fval[2] = P[0].z;
522
523                 fval[3] = P[1].x;
524                 fval[4] = P[1].y;
525                 fval[5] = P[1].z;
526
527                 fval[6] = P[2].x;
528                 fval[7] = P[2].y;
529                 fval[8] = P[2].z;
530
531                 if(type.arraylen > 3)
532                         memset(fval + 3*3, 0, sizeof(float)*3*(type.arraylen - 3));
533                 if(derivatives)
534                         memset(fval + type.arraylen*3, 0, sizeof(float)*2*3*type.arraylen);
535
536                 return true;
537         }
538
539         return false;
540 }
541
542 static bool set_attribute_matrix(const Transform& tfm, TypeDesc type, void *val)
543 {
544         if(type == TypeDesc::TypeMatrix) {
545                 Transform transpose = transform_transpose(tfm);
546                 memcpy(val, &transpose, sizeof(Transform));
547                 return true;
548         }
549
550         return false;
551 }
552
553 static bool get_mesh_element_attribute(KernelGlobals *kg, const ShaderData *sd, const OSLGlobals::Attribute& attr,
554                                const TypeDesc& type, bool derivatives, void *val)
555 {
556         if(attr.type == TypeDesc::TypePoint || attr.type == TypeDesc::TypeVector ||
557            attr.type == TypeDesc::TypeNormal || attr.type == TypeDesc::TypeColor)
558         {
559                 float3 fval[3];
560                 fval[0] = primitive_attribute_float3(kg, sd, attr.desc,
561                                                      (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
562                 return set_attribute_float3(fval, type, derivatives, val);
563         }
564         else if(attr.type == TypeDesc::TypeFloat) {
565                 float fval[3];
566                 fval[0] = primitive_attribute_float(kg, sd, attr.desc,
567                                                     (derivatives) ? &fval[1] : NULL, (derivatives) ? &fval[2] : NULL);
568                 return set_attribute_float(fval, type, derivatives, val);
569         }
570         else {
571                 return false;
572         }
573 }
574
575 static bool get_mesh_attribute(KernelGlobals *kg, const ShaderData *sd, const OSLGlobals::Attribute& attr,
576                                const TypeDesc& type, bool derivatives, void *val)
577 {
578         if(attr.type == TypeDesc::TypeMatrix) {
579                 Transform tfm = primitive_attribute_matrix(kg, sd, attr.desc);
580                 return set_attribute_matrix(tfm, type, val);
581         }
582         else {
583                 return false;
584         }
585 }
586
587 static void get_object_attribute(const OSLGlobals::Attribute& attr, bool derivatives, void *val)
588 {
589         size_t datasize = attr.value.datasize();
590
591         memcpy(val, attr.value.data(), datasize);
592         if(derivatives)
593                 memset((char *)val + datasize, 0, datasize * 2);
594 }
595
596 bool OSLRenderServices::get_object_standard_attribute(KernelGlobals *kg, ShaderData *sd, ustring name,
597                                                       TypeDesc type, bool derivatives, void *val)
598 {
599         /* todo: turn this into hash table? */
600
601         /* Object Attributes */
602         if(name == u_object_location) {
603                 float3 f = object_location(kg, sd);
604                 return set_attribute_float3(f, type, derivatives, val);
605         }
606         else if(name == u_object_index) {
607                 float f = object_pass_id(kg, sd->object);
608                 return set_attribute_float(f, type, derivatives, val);
609         }
610         else if(name == u_geom_dupli_generated) {
611                 float3 f = object_dupli_generated(kg, sd->object);
612                 return set_attribute_float3(f, type, derivatives, val);
613         }
614         else if(name == u_geom_dupli_uv) {
615                 float3 f = object_dupli_uv(kg, sd->object);
616                 return set_attribute_float3(f, type, derivatives, val);
617         }
618         else if(name == u_material_index) {
619                 float f = shader_pass_id(kg, sd);
620                 return set_attribute_float(f, type, derivatives, val);
621         }
622         else if(name == u_object_random) {
623                 float f = object_random_number(kg, sd->object);
624                 return set_attribute_float(f, type, derivatives, val);
625         }
626
627         /* Particle Attributes */
628         else if(name == u_particle_index) {
629                 int particle_id = object_particle_id(kg, sd->object);
630                 float f = particle_index(kg, particle_id);
631                 return set_attribute_float(f, type, derivatives, val);
632         }
633         else if(name == u_particle_age) {
634                 int particle_id = object_particle_id(kg, sd->object);
635                 float f = particle_age(kg, particle_id);
636                 return set_attribute_float(f, type, derivatives, val);
637         }
638         else if(name == u_particle_lifetime) {
639                 int particle_id = object_particle_id(kg, sd->object);
640                 float f = particle_lifetime(kg, particle_id);
641                 return set_attribute_float(f, type, derivatives, val);
642         }
643         else if(name == u_particle_location) {
644                 int particle_id = object_particle_id(kg, sd->object);
645                 float3 f = particle_location(kg, particle_id);
646                 return set_attribute_float3(f, type, derivatives, val);
647         }
648 #if 0   /* unsupported */
649         else if(name == u_particle_rotation) {
650                 int particle_id = object_particle_id(kg, sd->object);
651                 float4 f = particle_rotation(kg, particle_id);
652                 return set_attribute_float4(f, type, derivatives, val);
653         }
654 #endif
655         else if(name == u_particle_size) {
656                 int particle_id = object_particle_id(kg, sd->object);
657                 float f = particle_size(kg, particle_id);
658                 return set_attribute_float(f, type, derivatives, val);
659         }
660         else if(name == u_particle_velocity) {
661                 int particle_id = object_particle_id(kg, sd->object);
662                 float3 f = particle_velocity(kg, particle_id);
663                 return set_attribute_float3(f, type, derivatives, val);
664         }
665         else if(name == u_particle_angular_velocity) {
666                 int particle_id = object_particle_id(kg, sd->object);
667                 float3 f = particle_angular_velocity(kg, particle_id);
668                 return set_attribute_float3(f, type, derivatives, val);
669         }
670         
671         /* Geometry Attributes */
672         else if(name == u_geom_numpolyvertices) {
673                 return set_attribute_int(3, type, derivatives, val);
674         }
675         else if((name == u_geom_trianglevertices || name == u_geom_polyvertices)
676 #ifdef __HAIR__
677                      && sd->type & PRIMITIVE_ALL_TRIANGLE)
678 #else
679                 )
680 #endif
681         {
682                 float3 P[3];
683
684                 if(sd->type & PRIMITIVE_TRIANGLE)
685                         triangle_vertices(kg, sd->prim, P);
686                 else
687                         motion_triangle_vertices(kg, sd->object, sd->prim, sd->time, P);
688
689                 if(!(sd->flag & SD_TRANSFORM_APPLIED)) {
690                         object_position_transform(kg, sd, &P[0]);
691                         object_position_transform(kg, sd, &P[1]);
692                         object_position_transform(kg, sd, &P[2]);
693                 }
694
695                 return set_attribute_float3_3(P, type, derivatives, val);
696         }
697         else if(name == u_geom_name) {
698                 ustring object_name = kg->osl->object_names[sd->object];
699                 return set_attribute_string(object_name, type, derivatives, val);
700         }
701         else if(name == u_is_smooth) {
702                 float f = ((sd->shader & SHADER_SMOOTH_NORMAL) != 0);
703                 return set_attribute_float(f, type, derivatives, val);
704         }
705 #ifdef __HAIR__
706         /* Hair Attributes */
707         else if(name == u_is_curve) {
708                 float f = (sd->type & PRIMITIVE_ALL_CURVE) != 0;
709                 return set_attribute_float(f, type, derivatives, val);
710         }
711         else if(name == u_curve_thickness) {
712                 float f = curve_thickness(kg, sd);
713                 return set_attribute_float(f, type, derivatives, val);
714         }
715         else if(name == u_curve_tangent_normal) {
716                 float3 f = curve_tangent_normal(kg, sd);
717                 return set_attribute_float3(f, type, derivatives, val);
718         }
719 #endif
720         else
721                 return false;
722 }
723
724 bool OSLRenderServices::get_background_attribute(KernelGlobals *kg, ShaderData *sd, ustring name,
725                                                  TypeDesc type, bool derivatives, void *val)
726 {
727         if(name == u_path_ray_length) {
728                 /* Ray Length */
729                 float f = sd->ray_length;
730                 return set_attribute_float(f, type, derivatives, val);
731         }
732         else if(name == u_path_ray_depth) {
733                 /* Ray Depth */
734                 PathState *state = sd->osl_path_state;
735                 int f = state->bounce;
736                 return set_attribute_int(f, type, derivatives, val);
737         }
738         else if(name == u_path_transparent_depth) {
739                 /* Transparent Ray Depth */
740                 PathState *state = sd->osl_path_state;
741                 int f = state->transparent_bounce;
742                 return set_attribute_int(f, type, derivatives, val);
743         }
744         else if(name == u_path_transmission_depth) {
745                 /* Transmission Ray Depth */
746                 PathState *state = sd->osl_path_state;
747                 int f = state->transmission_bounce;
748                 return set_attribute_int(f, type, derivatives, val);
749         }
750         else if(name == u_ndc) {
751                 /* NDC coordinates with special exception for otho */
752                 OSLThreadData *tdata = kg->osl_tdata;
753                 OSL::ShaderGlobals *globals = &tdata->globals;
754                 float3 ndc[3];
755
756                 if((globals->raytype & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) {
757                         ndc[0] = camera_world_to_ndc(kg, sd, sd->ray_P);
758
759                         if(derivatives) {
760                                 ndc[1] = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dx) - ndc[0];
761                                 ndc[2] = camera_world_to_ndc(kg, sd, sd->ray_P + sd->ray_dP.dy) - ndc[0];
762                         }
763                 }
764                 else {
765                         ndc[0] = camera_world_to_ndc(kg, sd, sd->P);
766
767                         if(derivatives) {
768                                 ndc[1] = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dx) - ndc[0];
769                                 ndc[2] = camera_world_to_ndc(kg, sd, sd->P + sd->dP.dy) - ndc[0];
770                         }
771                 }
772
773                 return set_attribute_float3(ndc, type, derivatives, val);
774         }
775         else
776                 return false;
777 }
778
779 bool OSLRenderServices::get_attribute(OSL::ShaderGlobals *sg, bool derivatives, ustring object_name,
780                                       TypeDesc type, ustring name, void *val)
781 {
782         if(sg->renderstate == NULL)
783                 return false;
784
785         ShaderData *sd = (ShaderData *)(sg->renderstate);
786         return get_attribute(sd, derivatives, object_name, type, name, val);
787 }
788
789 bool OSLRenderServices::get_attribute(ShaderData *sd, bool derivatives, ustring object_name,
790                                       TypeDesc type, ustring name, void *val)
791 {
792         KernelGlobals *kg = sd->osl_globals;
793         int prim_type = 0;
794         int object;
795
796         /* lookup of attribute on another object */
797         if(object_name != u_empty) {
798                 OSLGlobals::ObjectNameMap::iterator it = kg->osl->object_name_map.find(object_name);
799
800                 if(it == kg->osl->object_name_map.end())
801                         return false;
802
803                 object = it->second;
804         }
805         else {
806                 object = sd->object;
807                 prim_type = attribute_primitive_type(kg, sd);
808
809                 if(object == OBJECT_NONE)
810                         return get_background_attribute(kg, sd, name, type, derivatives, val);
811         }
812
813         /* find attribute on object */
814         object = object*ATTR_PRIM_TYPES + prim_type;
815         OSLGlobals::AttributeMap& attribute_map = kg->osl->attribute_map[object];
816         OSLGlobals::AttributeMap::iterator it = attribute_map.find(name);
817
818         if(it != attribute_map.end()) {
819                 const OSLGlobals::Attribute& attr = it->second;
820
821                 if(attr.desc.element != ATTR_ELEMENT_OBJECT) {
822                         /* triangle and vertex attributes */
823                         if(get_mesh_element_attribute(kg, sd, attr, type, derivatives, val))
824                                 return true;
825                         else
826                                 return get_mesh_attribute(kg, sd, attr, type, derivatives, val);
827                 }
828                 else {
829                         /* object attribute */
830                         get_object_attribute(attr, derivatives, val);
831                         return true;
832                 }
833         }
834         else {
835                 /* not found in attribute, check standard object info */
836                 bool is_std_object_attribute = get_object_standard_attribute(kg, sd, name, type, derivatives, val);
837
838                 if(is_std_object_attribute)
839                         return true;
840
841                 return get_background_attribute(kg, sd, name, type, derivatives, val);
842         }
843
844         return false;
845 }
846
847 bool OSLRenderServices::get_userdata(bool derivatives, ustring name, TypeDesc type, 
848                                      OSL::ShaderGlobals *sg, void *val)
849 {
850         return false; /* disabled by lockgeom */
851 }
852
853 bool OSLRenderServices::has_userdata(ustring name, TypeDesc type, OSL::ShaderGlobals *sg)
854 {
855         return false; /* never called by OSL */
856 }
857
858 bool OSLRenderServices::texture(ustring filename,
859                                 TextureHandle *texture_handle,
860                                 TexturePerthread *texture_thread_info,
861                                 TextureOpt &options,
862                                 OSL::ShaderGlobals *sg,
863                                 float s, float t,
864                                 float dsdx, float dtdx, float dsdy, float dtdy,
865                                 int nchannels,
866                                 float *result,
867                                 float *dresultds,
868                                 float *dresultdt)
869 {
870         OSL::TextureSystem *ts = osl_ts;
871         ShaderData *sd = (ShaderData *)(sg->renderstate);
872         KernelGlobals *kg = sd->osl_globals;
873
874         if(texture_thread_info == NULL) {
875                 OSLThreadData *tdata = kg->osl_tdata;
876                 texture_thread_info = tdata->oiio_thread_info;
877         }
878
879 #ifdef WITH_PTEX
880         /* todo: this is just a quick hack, only works with particular files and options */
881         if(string_endswith(filename.string(), ".ptx")) {
882                 float2 uv;
883                 int faceid;
884
885                 if(!primitive_ptex(kg, sd, &uv, &faceid))
886                         return false;
887
888                 float u = uv.x;
889                 float v = uv.y;
890                 float dudx = 0.0f;
891                 float dvdx = 0.0f;
892                 float dudy = 0.0f;
893                 float dvdy = 0.0f;
894
895                 Ptex::String error;
896                 PtexPtr<PtexTexture> r(ptex_cache->get(filename.c_str(), error));
897
898                 if(!r) {
899                         //std::cerr << error.c_str() << std::endl;
900                         return false;
901                 }
902
903                 bool mipmaplerp = false;
904                 float sharpness = 1.0f;
905                 PtexFilter::Options opts(PtexFilter::f_bicubic, mipmaplerp, sharpness);
906                 PtexPtr<PtexFilter> f(PtexFilter::getFilter(r, opts));
907
908                 f->eval(result, options.firstchannel, nchannels, faceid, u, v, dudx, dvdx, dudy, dvdy);
909
910                 for(int c = r->numChannels(); c < nchannels; c++)
911                         result[c] = result[0];
912
913                 return true;
914         }
915 #endif
916         bool status;
917
918         if(filename.length() && filename[0] == '@') {
919                 int slot = atoi(filename.c_str() + 1);
920                 float4 rgba = kernel_tex_image_interp(slot, s, 1.0f - t);
921
922                 result[0] = rgba[0];
923                 if(nchannels > 1)
924                         result[1] = rgba[1];
925                 if(nchannels > 2)
926                         result[2] = rgba[2];
927                 if(nchannels > 3)
928                         result[3] = rgba[3];
929                 status = true;
930         }
931         else {
932                 if(texture_handle != NULL) {
933                         status = ts->texture(texture_handle,
934                                              texture_thread_info,
935                                              options,
936                                              s, t,
937                                              dsdx, dtdx,
938                                              dsdy, dtdy,
939                                              nchannels,
940                                              result,
941                                              dresultds, dresultdt);
942                 }
943                 else {
944                         status = ts->texture(filename,
945                                              options,
946                                              s, t,
947                                              dsdx, dtdx,
948                                              dsdy, dtdy,
949                                              nchannels,
950                                              result,
951                                              dresultds, dresultdt);
952                 }
953         }
954
955         if(!status) {
956                 if(nchannels == 3 || nchannels == 4) {
957                         result[0] = 1.0f;
958                         result[1] = 0.0f;
959                         result[2] = 1.0f;
960
961                         if(nchannels == 4)
962                                 result[3] = 1.0f;
963                 }
964                 /* This might be slow, but prevents error messages leak and
965                  * other nasty stuff happening.
966                  */
967                 string err = ts->geterror();
968                 (void)err;
969         }
970
971         return status;
972 }
973
974 bool OSLRenderServices::texture3d(ustring filename,
975                                   TextureHandle *texture_handle,
976                                   TexturePerthread *texture_thread_info,
977                                   TextureOpt &options,
978                                   OSL::ShaderGlobals *sg,
979                                   const OSL::Vec3 &P,
980                                   const OSL::Vec3 &dPdx,
981                                   const OSL::Vec3 &dPdy,
982                                   const OSL::Vec3 &dPdz,
983                                   int nchannels,
984                                   float *result,
985                                   float *dresultds,
986                                   float *dresultdt,
987                                   float *dresultdr)
988 {
989         OSL::TextureSystem *ts = osl_ts;
990         ShaderData *sd = (ShaderData *)(sg->renderstate);
991         KernelGlobals *kg = sd->osl_globals;
992
993         if(texture_thread_info == NULL) {
994                 OSLThreadData *tdata = kg->osl_tdata;
995                 texture_thread_info = tdata->oiio_thread_info;
996         }
997
998         bool status;
999         if(filename.length() && filename[0] == '@') {
1000                 int slot = atoi(filename.c_str() + 1);
1001                 float4 rgba = kernel_tex_image_interp_3d(slot, P.x, P.y, P.z);
1002
1003                 result[0] = rgba[0];
1004                 if(nchannels > 1)
1005                         result[1] = rgba[1];
1006                 if(nchannels > 2)
1007                         result[2] = rgba[2];
1008                 if(nchannels > 3)
1009                         result[3] = rgba[3];
1010                 status = true;
1011         }
1012         else {
1013                 if(texture_handle != NULL) {
1014                         status = ts->texture3d(texture_handle,
1015                                                texture_thread_info,
1016                                                options,
1017                                                P,
1018                                                dPdx, dPdy, dPdz,
1019                                                nchannels,
1020                                                result,
1021                                                dresultds, dresultdt, dresultdr);
1022                 }
1023                 else {
1024                         status = ts->texture3d(filename,
1025                                                options,
1026                                                P,
1027                                                dPdx, dPdy, dPdz,
1028                                                nchannels,
1029                                                result,
1030                                                dresultds, dresultdt, dresultdr);
1031                 }
1032         }
1033
1034         if(!status) {
1035                 if(nchannels == 3 || nchannels == 4) {
1036                         result[0] = 1.0f;
1037                         result[1] = 0.0f;
1038                         result[2] = 1.0f;
1039
1040                         if(nchannels == 4)
1041                                 result[3] = 1.0f;
1042                 }
1043                 /* This might be slow, but prevents error messages leak and
1044                  * other nasty stuff happening.
1045                  */
1046                 string err = ts->geterror();
1047                 (void)err;
1048         }
1049
1050         return status;
1051 }
1052
1053 bool OSLRenderServices::environment(ustring filename, TextureOpt &options,
1054                                     OSL::ShaderGlobals *sg, const OSL::Vec3 &R,
1055                                     const OSL::Vec3 &dRdx, const OSL::Vec3 &dRdy,
1056                                     int nchannels, float *result)
1057 {
1058         OSL::TextureSystem *ts = osl_ts;
1059         ShaderData *sd = (ShaderData *)(sg->renderstate);
1060         KernelGlobals *kg = sd->osl_globals;
1061         OSLThreadData *tdata = kg->osl_tdata;
1062         OIIO::TextureSystem::Perthread *thread_info = tdata->oiio_thread_info;
1063
1064         OIIO::TextureSystem::TextureHandle *th = ts->get_texture_handle(filename, thread_info);
1065
1066         bool status = ts->environment(th, thread_info,
1067                                       options, R, dRdx, dRdy,
1068                                       nchannels, result);
1069
1070         if(!status) {
1071                 if(nchannels == 3 || nchannels == 4) {
1072                         result[0] = 1.0f;
1073                         result[1] = 0.0f;
1074                         result[2] = 1.0f;
1075
1076                         if(nchannels == 4)
1077                                 result[3] = 1.0f;
1078                 }
1079         }
1080
1081         return status;
1082 }
1083
1084 bool OSLRenderServices::get_texture_info(OSL::ShaderGlobals *sg, ustring filename, int subimage,
1085                                          ustring dataname,
1086                                          TypeDesc datatype, void *data)
1087 {
1088         OSL::TextureSystem *ts = osl_ts;
1089         return ts->get_texture_info(filename, subimage, dataname, datatype, data);
1090 }
1091
1092 int OSLRenderServices::pointcloud_search(OSL::ShaderGlobals *sg, ustring filename, const OSL::Vec3 &center,
1093                                          float radius, int max_points, bool sort,
1094                                          size_t *out_indices, float *out_distances, int derivs_offset)
1095 {
1096         return 0;
1097 }
1098
1099 int OSLRenderServices::pointcloud_get(OSL::ShaderGlobals *sg, ustring filename, size_t *indices, int count,
1100                                       ustring attr_name, TypeDesc attr_type, void *out_data)
1101 {
1102         return 0;
1103 }
1104
1105 bool OSLRenderServices::pointcloud_write(OSL::ShaderGlobals *sg,
1106                                          ustring filename, const OSL::Vec3 &pos,
1107                                          int nattribs, const ustring *names,
1108                                          const TypeDesc *types,
1109                                          const void **data)
1110 {
1111         return false;
1112 }
1113
1114 bool OSLRenderServices::trace(TraceOpt &options, OSL::ShaderGlobals *sg,
1115         const OSL::Vec3 &P, const OSL::Vec3 &dPdx,
1116         const OSL::Vec3 &dPdy, const OSL::Vec3 &R,
1117         const OSL::Vec3 &dRdx, const OSL::Vec3 &dRdy)
1118 {
1119         /* todo: options.shader support, maybe options.traceset */
1120         ShaderData *sd = (ShaderData *)(sg->renderstate);
1121
1122         /* setup ray */
1123         Ray ray;
1124
1125         ray.P = TO_FLOAT3(P);
1126         ray.D = TO_FLOAT3(R);
1127         ray.t = (options.maxdist == 1.0e30f)? FLT_MAX: options.maxdist - options.mindist;
1128         ray.time = sd->time;
1129
1130         if(options.mindist == 0.0f) {
1131                 /* avoid self-intersections */
1132                 if(ray.P == sd->P) {
1133                         bool transmit = (dot(sd->Ng, ray.D) < 0.0f);
1134                         ray.P = ray_offset(sd->P, (transmit)? -sd->Ng: sd->Ng);
1135                 }
1136         }
1137         else {
1138                 /* offset for minimum distance */
1139                 ray.P += options.mindist*ray.D;
1140         }
1141
1142         /* ray differentials */
1143         ray.dP.dx = TO_FLOAT3(dPdx);
1144         ray.dP.dy = TO_FLOAT3(dPdy);
1145         ray.dD.dx = TO_FLOAT3(dRdx);
1146         ray.dD.dy = TO_FLOAT3(dRdy);
1147
1148         /* allocate trace data */
1149         OSLTraceData *tracedata = (OSLTraceData*)sg->tracedata;
1150         tracedata->ray = ray;
1151         tracedata->setup = false;
1152         tracedata->init = true;
1153         tracedata->sd.osl_globals = sd->osl_globals;
1154
1155         /* raytrace */
1156         return scene_intersect(sd->osl_globals, ray, PATH_RAY_ALL_VISIBILITY, &tracedata->isect, NULL, 0.0f, 0.0f);
1157 }
1158
1159
1160 bool OSLRenderServices::getmessage(OSL::ShaderGlobals *sg, ustring source, ustring name,
1161         TypeDesc type, void *val, bool derivatives)
1162 {
1163         OSLTraceData *tracedata = (OSLTraceData*)sg->tracedata;
1164
1165         if(source == u_trace && tracedata->init) {
1166                 if(name == u_hit) {
1167                         return set_attribute_int((tracedata->isect.prim != PRIM_NONE), type, derivatives, val);
1168                 }
1169                 else if(tracedata->isect.prim != PRIM_NONE) {
1170                         if(name == u_hitdist) {
1171                                 float f[3] = {tracedata->isect.t, 0.0f, 0.0f};
1172                                 return set_attribute_float(f, type, derivatives, val);
1173                         }
1174                         else {
1175                                 ShaderData *sd = &tracedata->sd;
1176                                 KernelGlobals *kg = sd->osl_globals;
1177
1178                                 if(!tracedata->setup) {
1179                                         /* lazy shader data setup */
1180                                         shader_setup_from_ray(kg, sd, &tracedata->isect, &tracedata->ray);
1181                                         tracedata->setup = true;
1182                                 }
1183
1184                                 if(name == u_N) {
1185                                         return set_attribute_float3(sd->N, type, derivatives, val);
1186                                 }
1187                                 else if(name == u_Ng) {
1188                                         return set_attribute_float3(sd->Ng, type, derivatives, val);
1189                                 }
1190                                 else if(name == u_P) {
1191                                         float3 f[3] = {sd->P, sd->dP.dx, sd->dP.dy};
1192                                         return set_attribute_float3(f, type, derivatives, val);
1193                                 }
1194                                 else if(name == u_I) {
1195                                         float3 f[3] = {sd->I, sd->dI.dx, sd->dI.dy};
1196                                         return set_attribute_float3(f, type, derivatives, val);
1197                                 }
1198                                 else if(name == u_u) {
1199                                         float f[3] = {sd->u, sd->du.dx, sd->du.dy};
1200                                         return set_attribute_float(f, type, derivatives, val);
1201                                 }
1202                                 else if(name == u_v) {
1203                                         float f[3] = {sd->v, sd->dv.dx, sd->dv.dy};
1204                                         return set_attribute_float(f, type, derivatives, val);
1205                                 }
1206
1207                                 return get_attribute(sd, derivatives, u_empty, type, name, val);
1208                         }
1209                 }
1210         }
1211
1212         return false;
1213 }
1214
1215 CCL_NAMESPACE_END