Cycles: Some more inline policy tweaks for CUDA 8
authorSergey Sharybin <sergey.vfx@gmail.com>
Tue, 2 Aug 2016 13:04:34 +0000 (15:04 +0200)
committerSergey Sharybin <sergey.vfx@gmail.com>
Tue, 2 Aug 2016 13:13:34 +0000 (15:13 +0200)
Makes it so toolkit does exactly the same decision about what to inline,
but unfortunately it has really barely visible difference on GTX-980.

intern/cycles/kernel/closure/bsdf.h
intern/cycles/kernel/kernel_path_branched.h
intern/cycles/kernel/kernel_shader.h
intern/cycles/kernel/svm/svm_attribute.h
intern/cycles/kernel/svm/svm_wireframe.h

index 55bdf3ecbb4724c82480a7addb3a0c091a9381f0..86e1a7f317f8cf94d55cbe1c5b3198faae96c9b1 100644 (file)
@@ -144,7 +144,16 @@ ccl_device_inline int bsdf_sample(KernelGlobals *kg,
        return label;
 }
 
-ccl_device float3 bsdf_eval(KernelGlobals *kg, ShaderData *sd, const ShaderClosure *sc, const float3 omega_in, float *pdf)
+#ifndef __KERNEL_CUDS__
+ccl_device
+#else
+ccl_device_inline
+#endif
+float3 bsdf_eval(KernelGlobals *kg,
+                 ShaderData *sd,
+                 const ShaderClosure *sc,
+                 const float3 omega_in,
+                 float *pdf)
 {
        float3 eval;
 
index 56516967d8f60e9765bcf13468206ba848ce8838..64f1468eacfad33391000d832d7576c5865b19de 100644 (file)
@@ -18,13 +18,13 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __BRANCHED_PATH__
 
-ccl_device void kernel_branched_path_ao(KernelGlobals *kg,
-                                        ShaderData *sd,
-                                        ShaderData *emission_sd,
-                                        PathRadiance *L,
-                                        PathState *state,
-                                        RNG *rng,
-                                        float3 throughput)
+ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg,
+                                               ShaderData *sd,
+                                               ShaderData *emission_sd,
+                                               PathRadiance *L,
+                                               PathState *state,
+                                               RNG *rng,
+                                               float3 throughput)
 {
        int num_samples = kernel_data.integrator.ao_samples;
        float num_samples_inv = 1.0f/num_samples;
index bb3fe933b2c11093e363fdc18d992a06cce5c710..98d321c9c16792cf7a9248646add276db4317eea 100644 (file)
@@ -149,7 +149,12 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
 /* ShaderData setup from BSSRDF scatter */
 
 #ifdef __SUBSURFACE__
-ccl_device void shader_setup_from_subsurface(
+#  ifndef __KERNEL_CUDS__
+ccl_device
+#  else
+ccl_device_inline
+#  endif
+void shader_setup_from_subsurface(
         KernelGlobals *kg,
         ShaderData *sd,
         const Intersection *isect,
@@ -533,12 +538,18 @@ ccl_device_inline void _shader_bsdf_multi_eval_branched(KernelGlobals *kg,
 }
 #endif
 
-ccl_device void shader_bsdf_eval(KernelGlobals *kg,
-                                 ShaderData *sd,
-                                 const float3 omega_in,
-                                 BsdfEval *eval,
-                                 float light_pdf,
-                                 bool use_mis)
+
+#ifndef __KERNEL_CUDS__
+ccl_device
+#else
+ccl_device_inline
+#endif
+void shader_bsdf_eval(KernelGlobals *kg,
+                      ShaderData *sd,
+                      const float3 omega_in,
+                      BsdfEval *eval,
+                      float light_pdf,
+                      bool use_mis)
 {
        bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
 
index ff92920c610feb0cf0e930356f0beb64a5b80b7e..bd6013e9205a93dc1905e01db41e18d4b9b07518 100644 (file)
@@ -87,7 +87,12 @@ ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, u
        }
 }
 
-ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
+#ifndef __KERNEL_CUDS__
+ccl_device
+#else
+ccl_device_noinline
+#endif
+void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
 {
        NodeAttributeType type, mesh_type;
        AttributeElement elem;
@@ -123,10 +128,15 @@ ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *
        }
 }
 
-ccl_device void svm_node_attr_bump_dy(KernelGlobals *kg,
-                                      ShaderData *sd,
-                                      float *stack,
-                                      uint4 node)
+#ifndef __KERNEL_CUDS__
+ccl_device
+#else
+ccl_device_noinline
+#endif
+void svm_node_attr_bump_dy(KernelGlobals *kg,
+                           ShaderData *sd,
+                           float *stack,
+                           uint4 node)
 {
        NodeAttributeType type, mesh_type;
        AttributeElement elem;
index 30ccd523addec184f5f59a232ded1a9a586b0a3d..6eed9bc1a99a1dc560ed300833895fb36b7329a6 100644 (file)
@@ -34,11 +34,11 @@ CCL_NAMESPACE_BEGIN
 
 /* Wireframe Node */
 
-ccl_device float wireframe(KernelGlobals *kg,
-                           ShaderData *sd,
-                           float size,
-                           int pixel_size,
-                           float3 *P)
+ccl_device_inline float wireframe(KernelGlobals *kg,
+                                  ShaderData *sd,
+                                  float size,
+                                  int pixel_size,
+                                  float3 *P)
 {
 #ifdef __HAIR__
        if(ccl_fetch(sd, prim) != PRIM_NONE && ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE)