Cycles: Solve speed regression of classroom scene after principled commit
authorSergey Sharybin <sergey.vfx@gmail.com>
Fri, 21 Apr 2017 10:56:54 +0000 (12:56 +0200)
committerSergey Sharybin <sergey.vfx@gmail.com>
Fri, 21 Apr 2017 12:41:42 +0000 (14:41 +0200)
This way we can skip it from compiling into OpenCL kernels by making
this shader compile-time feature.

intern/cycles/device/device.cpp
intern/cycles/device/device.h
intern/cycles/kernel/closure/bsdf.h
intern/cycles/kernel/closure/bssrdf.h
intern/cycles/kernel/kernel_subsurface.h
intern/cycles/kernel/kernel_types.h
intern/cycles/kernel/svm/svm_closure.h
intern/cycles/render/shader.cpp

index 968af447e2918d3358f89dce305be25742676c1b..c024021b4b3ab75a5eebf5b510535de0849c1631 100644 (file)
@@ -66,6 +66,8 @@ std::ostream& operator <<(std::ostream &os,
           << string_from_bool(requested_features.use_patch_evaluation) << std::endl;
        os << "Use Transparent Shadows: "
           << string_from_bool(requested_features.use_transparent) << std::endl;
+       os << "Use Principled BSDF: "
+          << string_from_bool(requested_features.use_principled) << std::endl;
        return os;
 }
 
index ac06e5617957aad061f8a55ee2f63b5ab6bcadd1..6051dd8b3eb5d6af97639fe1af98e2e52a903168 100644 (file)
@@ -124,6 +124,9 @@ public:
        /* Use various shadow tricks, such as shadow catcher. */
        bool use_shadow_tricks;
 
+       /* Per-uber shader usage flags. */
+       bool use_principled;
+
        DeviceRequestedFeatures()
        {
                /* TODO(sergey): Find more meaningful defaults. */
@@ -141,6 +144,7 @@ public:
                use_patch_evaluation = false;
                use_transparent = false;
                use_shadow_tricks = false;
+               use_principled = false;
        }
 
        bool modified(const DeviceRequestedFeatures& requested_features)
@@ -158,7 +162,8 @@ public:
                         use_integrator_branched == requested_features.use_integrator_branched &&
                         use_patch_evaluation == requested_features.use_patch_evaluation &&
                         use_transparent == requested_features.use_transparent &&
-                        use_shadow_tricks == requested_features.use_shadow_tricks);
+                        use_shadow_tricks == requested_features.use_shadow_tricks &&
+                        use_principled == requested_features.use_principled);
        }
 
        /* Convert the requested features structure to a build options,
@@ -205,6 +210,9 @@ public:
                if(!use_shadow_tricks) {
                        build_options += " -D__NO_SHADOW_TRICKS__";
                }
+               if(!use_principled) {
+                       build_options += " -D__NO_PRINCIPLED__";
+               }
                return build_options;
        }
 };
index 0302fa9b43e35a283d24da588a57d61967d4b3e6..a6bba8bf74d57f5a7fe4806abc26b93972ca60cb 100644 (file)
@@ -137,6 +137,7 @@ ccl_device_forceinline int bsdf_sample(KernelGlobals *kg,
                        label = bsdf_hair_transmission_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
                                eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
                        break;
+#ifdef __PRINCIPLED__
                case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
                case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID:
                        label = bsdf_principled_diffuse_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
@@ -146,6 +147,7 @@ ccl_device_forceinline int bsdf_sample(KernelGlobals *kg,
                        label = bsdf_principled_sheen_sample(sc, sd->Ng, sd->I, sd->dI.dx, sd->dI.dy, randu, randv,
                                eval, omega_in, &domega_in->dx, &domega_in->dy, pdf);
                        break;
+#endif  /* __PRINCIPLED__ */
 #endif
 #ifdef __VOLUME__
                case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
@@ -243,6 +245,7 @@ float3 bsdf_eval(KernelGlobals *kg,
                        case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
                                eval = bsdf_hair_transmission_eval_reflect(sc, sd->I, omega_in, pdf);
                                break;
+#ifdef __PRINCIPLED__
                        case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
                        case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID:
                                eval = bsdf_principled_diffuse_eval_reflect(sc, sd->I, omega_in, pdf);
@@ -250,6 +253,7 @@ float3 bsdf_eval(KernelGlobals *kg,
                        case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID:
                                eval = bsdf_principled_sheen_eval_reflect(sc, sd->I, omega_in, pdf);
                                break;
+#endif  /* __PRINCIPLED__ */
 #endif
 #ifdef __VOLUME__
                        case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
@@ -323,6 +327,7 @@ float3 bsdf_eval(KernelGlobals *kg,
                        case CLOSURE_BSDF_HAIR_TRANSMISSION_ID:
                                eval = bsdf_hair_transmission_eval_transmit(sc, sd->I, omega_in, pdf);
                                break;
+#ifdef __PRINCIPLED__
                        case CLOSURE_BSDF_PRINCIPLED_DIFFUSE_ID:
                        case CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID:
                                eval = bsdf_principled_diffuse_eval_transmit(sc, sd->I, omega_in, pdf);
@@ -330,6 +335,7 @@ float3 bsdf_eval(KernelGlobals *kg,
                        case CLOSURE_BSDF_PRINCIPLED_SHEEN_ID:
                                eval = bsdf_principled_sheen_eval_transmit(sc, sd->I, omega_in, pdf);
                                break;
+#endif  /* __PRINCIPLED__ */
 #endif
 #ifdef __VOLUME__
                        case CLOSURE_VOLUME_HENYEY_GREENSTEIN_ID:
index 8363cef53c8bcc875e195288f4339cead04a6160..f9236a6e52c72e736eebf9cc9a08fa14eeb56d65 100644 (file)
@@ -362,6 +362,7 @@ ccl_device int bssrdf_setup(Bssrdf *bssrdf, ClosureType type)
        if(bssrdf->radius < BSSRDF_MIN_RADIUS) {
                /* revert to diffuse BSDF if radius too small */
                int flag;
+#ifdef __PRINCIPLED__
                if(type == CLOSURE_BSSRDF_PRINCIPLED_ID) {
                        float roughness = bssrdf->roughness;
                        float3 N = bssrdf->N;
@@ -377,7 +378,9 @@ ccl_device int bssrdf_setup(Bssrdf *bssrdf, ClosureType type)
                        flag = bsdf_principled_diffuse_setup(bsdf);
                        bsdf->type = CLOSURE_BSDF_BSSRDF_PRINCIPLED_ID;
                }
-               else {
+               else
+#endif  /* __PRINCIPLED__ */
+               {
                        DiffuseBsdf *bsdf = (DiffuseBsdf*)bssrdf;
                        bsdf->N = bssrdf->N;
                        flag = bsdf_diffuse_setup(bsdf);
@@ -391,7 +394,9 @@ ccl_device int bssrdf_setup(Bssrdf *bssrdf, ClosureType type)
                bssrdf->sharpness = saturate(bssrdf->sharpness);
                bssrdf->type = type;
 
-               if(type == CLOSURE_BSSRDF_BURLEY_ID || type == CLOSURE_BSSRDF_PRINCIPLED_ID) {
+               if(type == CLOSURE_BSSRDF_BURLEY_ID ||
+                  type == CLOSURE_BSSRDF_PRINCIPLED_ID)
+               {
                        bssrdf_burley_setup(bssrdf);
                }
 
index fb4a054dcd0188ea961ec796d389360f9ac8adb8..f58eef7a368fdbb58e6586918b79fcf9fa1fd527 100644 (file)
@@ -149,6 +149,7 @@ ccl_device void subsurface_scatter_setup_diffuse_bsdf(ShaderData *sd, ShaderClos
 
        if(hit) {
                Bssrdf *bssrdf = (Bssrdf *)sc;
+#ifdef __PRINCIPLED__
                if(bssrdf->type == CLOSURE_BSSRDF_PRINCIPLED_ID) {
                        PrincipledDiffuseBsdf *bsdf = (PrincipledDiffuseBsdf*)bsdf_alloc(sd, sizeof(PrincipledDiffuseBsdf), weight);
 
@@ -164,6 +165,7 @@ ccl_device void subsurface_scatter_setup_diffuse_bsdf(ShaderData *sd, ShaderClos
                }
                else if(CLOSURE_IS_BSDF_BSSRDF(bssrdf->type) ||
                        CLOSURE_IS_BSSRDF(bssrdf->type))
+#endif  /* __PRINCIPLED__ */
                {
                        DiffuseBsdf *bsdf = (DiffuseBsdf*)bsdf_alloc(sd, sizeof(DiffuseBsdf), weight);
 
index 9d5cad3f3e10c4b8d7eba07eb1b32a51a9fa9467..a744c1d827795786035ca863522bf2318d6ea549 100644 (file)
@@ -78,6 +78,7 @@ CCL_NAMESPACE_BEGIN
 #    define __OSL__
 #  endif
 #  define __SUBSURFACE__
+#  define __PRINCIPLED__
 #  define __CMJ__
 #  define __VOLUME__
 #  define __VOLUME_SCATTER__
@@ -94,6 +95,7 @@ CCL_NAMESPACE_BEGIN
 #  define __VOLUME__
 #  define __VOLUME_SCATTER__
 #  define __SUBSURFACE__
+#  define __PRINCIPLED__
 #  define __SHADOW_RECORD_ALL__
 #  ifndef __SPLIT_KERNEL__
 #    define __BRANCHED_PATH__
@@ -109,6 +111,7 @@ CCL_NAMESPACE_BEGIN
 #    define __KERNEL_SHADING__
 #    define __KERNEL_ADV_SHADING__
 #    define __SUBSURFACE__
+#    define __PRINCIPLED__
 #    define __VOLUME__
 #    define __VOLUME_SCATTER__
 #    define __SHADOW_RECORD_ALL__
@@ -134,6 +137,7 @@ CCL_NAMESPACE_BEGIN
 #    define __KERNEL_SHADING__
 #    define __KERNEL_ADV_SHADING__
 #    define __SUBSURFACE__
+#    define __PRINCIPLED__
 #    define __VOLUME__
 #    define __VOLUME_SCATTER__
 #    define __SHADOW_RECORD_ALL__
@@ -222,6 +226,9 @@ CCL_NAMESPACE_BEGIN
 #ifdef __NO_SHADOW_TRICKS__
 #  undef __SHADOW_TRICKS__
 #endif
+#ifdef __NO_PRINCIPLED__
+#  undef __PRINCIPLED__
+#endif
 
 /* Random Numbers */
 
index 9a3689a94f404951d1ca22d00b36119a6d8a054e..407f8e784c0a096b934e63e5bc3a3e81c737dda7 100644 (file)
@@ -76,6 +76,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
        float param2 = (stack_valid(param2_offset))? stack_load_float(stack, param2_offset): __uint_as_float(node.w);
 
        switch(type) {
+#ifdef __PRINCIPLED__
                case CLOSURE_BSDF_PRINCIPLED_ID: {
                        uint specular_offset, roughness_offset, specular_tint_offset, anisotropic_offset, sheen_offset,
                                sheen_tint_offset, clearcoat_offset, clearcoat_gloss_offset, eta_offset, transparency_offset,
@@ -408,6 +409,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg, ShaderData *sd, float *
 
                        break;
                }
+#endif  /* __PRINCIPLED__ */
                case CLOSURE_BSDF_DIFFUSE_ID: {
                        float3 weight = sd->svm_closure_weight * mix_weight;
                        OrenNayarBsdf *bsdf = (OrenNayarBsdf*)bsdf_alloc(sd, sizeof(OrenNayarBsdf), weight);
index 23eee1916bd95951acec7d9581d0edfcd8fdf6dd..a7d42a4b4a029748c4fd4cea723f702e013bc5b4 100644 (file)
@@ -569,6 +569,9 @@ void ShaderManager::get_requested_graph_features(ShaderGraph *graph,
                        if(CLOSURE_IS_VOLUME(bsdf_node->closure)) {
                                requested_features->nodes_features |= NODE_FEATURE_VOLUME;
                        }
+                       else if(CLOSURE_IS_PRINCIPLED(bsdf_node->closure)) {
+                               requested_features->use_principled = true;
+                       }
                }
                if(node->has_surface_bssrdf()) {
                        requested_features->use_subsurface = true;