Cycles: Speedup transparent shadows in split kernel
authorHristo Gueorguiev <prem.nirved@gmail.com>
Wed, 8 Mar 2017 15:26:39 +0000 (16:26 +0100)
committerSergey Sharybin <sergey.vfx@gmail.com>
Thu, 9 Mar 2017 16:09:37 +0000 (17:09 +0100)
This commit enables record-all transparent shadows rays.

Perfromance results:

               R9 290 render time (without synchronization), seconds
                        Before    After   Change
BMW                      261.5    262.5   +0.4 %
Classroom                869.6    867.3   -0.3 %
Fishy Cat                657.4    639.8   -2.7 %
Koro                    1909.8    692.8  -63.7 %
Pabellon Barcelona      1633.3   1238.0  -24.2 %
Pabellon Barcelona(*)   1158.1    903.8  -22.0 %

(*) without glossy connected to volume

intern/cycles/kernel/kernel_shadow.h
intern/cycles/kernel/kernel_types.h

index 68a7ccfd90351a6854eadea7481bdbf7c2256300..4efc6c8118daad1407174bd8ca720963b4525eec 100644 (file)
@@ -152,7 +152,13 @@ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg,
                int bounce = state->transparent_bounce;
                Intersection *isect = hits;
 #    ifdef __VOLUME__
-               PathState ps = *state;
+#      ifdef __SPLIT_KERNEL__
+               ccl_addr_space PathState *ps = &kernel_split_state.state_shadow[ccl_global_id(1) * ccl_global_size(0) + ccl_global_id(0)];
+#      else
+               PathState ps_object;
+               PathState *ps = &ps_object;
+#      endif
+               *ps = *state;
 #    endif
                sort_intersections(hits, num_hits);
                for(int hit = 0; hit < num_hits; hit++, isect++) {
@@ -171,7 +177,7 @@ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg,
                                                           shadow_sd,
                                                           state,
 #ifdef __VOLUME__
-                                                          &ps,
+                                                          ps,
 #endif
                                                           isect,
                                                           ray,
@@ -188,8 +194,8 @@ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg,
                }
 #    ifdef __VOLUME__
                /* Attenuation for last line segment towards light. */
-               if(ps.volume_stack[0].shader != SHADER_NONE) {
-                       kernel_volume_shadow(kg, shadow_sd, &ps, ray, &throughput);
+               if(ps->volume_stack[0].shader != SHADER_NONE) {
+                       kernel_volume_shadow(kg, shadow_sd, ps, ray, &throughput);
                }
 #    endif
                *shadow = throughput;
@@ -214,7 +220,10 @@ ccl_device bool shadow_blocked_transparent_all(KernelGlobals *kg,
                                                uint max_hits,
                                                float3 *shadow)
 {
-#    ifdef __KERNEL_CUDA__
+#    ifdef __SPLIT_KERNEL__
+       Intersection hits_[SHADOW_STACK_MAX_HITS];
+       Intersection *hits = &hits_[0];
+#    elif defined(__KERNEL_CUDA__)
        Intersection *hits = kg->hits_stack;
 #    else
        Intersection hits_stack[SHADOW_STACK_MAX_HITS];
index 6c18cab6406bace6d3a2d299fcc0ed825bdc7019..cac710c5ff3ce5328f192257d28f14c93e859e8e 100644 (file)
@@ -80,9 +80,9 @@ CCL_NAMESPACE_BEGIN
 #  define __CMJ__
 #  define __VOLUME__
 #  define __VOLUME_SCATTER__
+#  define __SHADOW_RECORD_ALL__
 #  ifndef __SPLIT_KERNEL__
 #    define __VOLUME_DECOUPLED__
-#    define __SHADOW_RECORD_ALL__
 #    define __VOLUME_RECORD_ALL__
 #  endif
 #endif  /* __KERNEL_CPU__ */
@@ -131,6 +131,7 @@ CCL_NAMESPACE_BEGIN
 #    define __SUBSURFACE__
 #    define __VOLUME__
 #    define __VOLUME_SCATTER__
+#    define __SHADOW_RECORD_ALL__
 #  endif  /* __KERNEL_OPENCL_AMD__ */
 
 #  ifdef __KERNEL_OPENCL_INTEL_CPU__