Code refactor: use DeviceInfo to enable QBVH and decoupled volume shading.
authorBrecht Van Lommel <brechtvanlommel@gmail.com>
Sun, 8 Oct 2017 02:32:25 +0000 (04:32 +0200)
committerBrecht Van Lommel <brechtvanlommel@gmail.com>
Sun, 8 Oct 2017 11:17:33 +0000 (13:17 +0200)
18 files changed:
intern/cycles/blender/blender_session.cpp
intern/cycles/blender/blender_sync.cpp
intern/cycles/blender/blender_sync.h
intern/cycles/device/device.cpp
intern/cycles/device/device.h
intern/cycles/device/device_cpu.cpp
intern/cycles/device/device_cuda.cpp
intern/cycles/device/device_network.cpp
intern/cycles/device/device_opencl.cpp
intern/cycles/kernel/kernel_path.h
intern/cycles/kernel/kernel_path_branched.h
intern/cycles/kernel/kernel_types.h
intern/cycles/kernel/kernel_volume.h
intern/cycles/render/integrator.cpp
intern/cycles/render/mesh.cpp
intern/cycles/render/mesh.h
intern/cycles/render/scene.h
intern/cycles/render/shader.cpp

index 12de3da..9e54b7d 100644 (file)
@@ -115,8 +115,7 @@ void BlenderSession::create()
 void BlenderSession::create_session()
 {
        SessionParams session_params = BlenderSync::get_session_params(b_engine, b_userpref, b_scene, background);
-       bool is_cpu = session_params.device.type == DEVICE_CPU;
-       SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background, is_cpu);
+       SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
        bool session_pause = BlenderSync::get_session_pause(b_scene, background);
 
        /* reset status/progress */
@@ -141,7 +140,7 @@ void BlenderSession::create_session()
        session->set_pause(session_pause);
 
        /* create sync */
-       sync = new BlenderSync(b_engine, b_data, b_scene, scene, !background, session->progress, is_cpu);
+       sync = new BlenderSync(b_engine, b_data, b_scene, scene, !background, session->progress);
        BL::Object b_camera_override(b_engine.camera_override());
        if(b_v3d) {
                if(session_pause == false) {
@@ -179,8 +178,7 @@ void BlenderSession::reset_session(BL::BlendData& b_data_, BL::Scene& b_scene_)
        b_scene = b_scene_;
 
        SessionParams session_params = BlenderSync::get_session_params(b_engine, b_userpref, b_scene, background);
-       const bool is_cpu = session_params.device.type == DEVICE_CPU;
-       SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background, is_cpu);
+       SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
 
        width = render_resolution_x(b_render);
        height = render_resolution_y(b_render);
@@ -211,7 +209,7 @@ void BlenderSession::reset_session(BL::BlendData& b_data_, BL::Scene& b_scene_)
        session->stats.mem_peak = session->stats.mem_used;
 
        /* sync object should be re-created */
-       sync = new BlenderSync(b_engine, b_data, b_scene, scene, !background, session->progress, is_cpu);
+       sync = new BlenderSync(b_engine, b_data, b_scene, scene, !background, session->progress);
 
        /* for final render we will do full data sync per render layer, only
         * do some basic syncing here, no objects or materials for speed */
@@ -736,8 +734,7 @@ void BlenderSession::synchronize()
 
        /* on session/scene parameter changes, we recreate session entirely */
        SessionParams session_params = BlenderSync::get_session_params(b_engine, b_userpref, b_scene, background);
-       const bool is_cpu = session_params.device.type == DEVICE_CPU;
-       SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background, is_cpu);
+       SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
        bool session_pause = BlenderSync::get_session_pause(b_scene, background);
 
        if(session->params.modified(session_params) ||
index 42e3721..2e3301c 100644 (file)
@@ -47,8 +47,7 @@ BlenderSync::BlenderSync(BL::RenderEngine& b_engine,
                          BL::Scene& b_scene,
                          Scene *scene,
                          bool preview,
-                         Progress &progress,
-                         bool is_cpu)
+                         Progress &progress)
 : b_engine(b_engine),
   b_data(b_data),
   b_scene(b_scene),
@@ -62,7 +61,6 @@ BlenderSync::BlenderSync(BL::RenderEngine& b_engine,
   scene(scene),
   preview(preview),
   experimental(false),
-  is_cpu(is_cpu),
   dicing_rate(1.0f),
   max_subdivisions(12),
   progress(progress)
@@ -613,8 +611,7 @@ array<Pass> BlenderSync::sync_render_passes(BL::RenderLayer& b_rlay,
 /* Scene Parameters */
 
 SceneParams BlenderSync::get_scene_params(BL::Scene& b_scene,
-                                          bool background,
-                                          bool is_cpu)
+                                          bool background)
 {
        BL::RenderSettings r = b_scene.render();
        SceneParams params;
@@ -654,15 +651,7 @@ SceneParams BlenderSync::get_scene_params(BL::Scene& b_scene,
                params.texture_limit = 0;
        }
 
-#if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
-       if(is_cpu) {
-               params.use_qbvh = DebugFlags().cpu.qbvh && system_cpu_support_sse2();
-       }
-       else
-#endif
-       {
-               params.use_qbvh = false;
-       }
+       params.use_qbvh = DebugFlags().cpu.qbvh;
 
        return params;
 }
index 4ec4642..11e279b 100644 (file)
@@ -54,8 +54,7 @@ public:
                    BL::Scene& b_scene,
                    Scene *scene,
                    bool preview,
-                   Progress &progress,
-                   bool is_cpu);
+                   Progress &progress);
        ~BlenderSync();
 
        /* sync */
@@ -83,8 +82,7 @@ public:
 
        /* get parameters */
        static SceneParams get_scene_params(BL::Scene& b_scene,
-                                           bool background,
-                                           bool is_cpu);
+                                           bool background);
        static SessionParams get_session_params(BL::RenderEngine& b_engine,
                                                BL::UserPreferences& b_userpref,
                                                BL::Scene& b_scene,
@@ -177,7 +175,6 @@ private:
        Scene *scene;
        bool preview;
        bool experimental;
-       bool is_cpu;
 
        float dicing_rate;
        int max_subdivisions;
index f64436a..5332944 100644 (file)
@@ -379,10 +379,14 @@ DeviceInfo Device::get_multi_device(vector<DeviceInfo> subdevices)
        info.num = 0;
 
        info.has_bindless_textures = true;
+       info.has_volume_decoupled = true;
+       info.has_qbvh = true;
        foreach(DeviceInfo &device, subdevices) {
                assert(device.type == info.multi_devices[0].type);
 
                info.has_bindless_textures &= device.has_bindless_textures;
+               info.has_volume_decoupled &= device.has_volume_decoupled;
+               info.has_qbvh &= device.has_qbvh;
        }
 
        return info;
index 0e0a007..c134fc9 100644 (file)
@@ -55,6 +55,8 @@ public:
        bool display_device;
        bool advanced_shading;
        bool has_bindless_textures; /* flag for GPU and Multi device */
+       bool has_volume_decoupled;
+       bool has_qbvh;
        bool use_split_kernel; /* Denotes if the device is going to run cycles using split-kernel */
        vector<DeviceInfo> multi_devices;
 
@@ -66,6 +68,8 @@ public:
                display_device = false;
                advanced_shading = true;
                has_bindless_textures = false;
+               has_volume_decoupled = false;
+               has_qbvh = false;
                use_split_kernel = false;
        }
 
index ac6d324..a17caab 100644 (file)
@@ -1024,6 +1024,8 @@ void device_cpu_info(vector<DeviceInfo>& devices)
        info.id = "CPU";
        info.num = 0;
        info.advanced_shading = true;
+       info.has_qbvh = system_cpu_support_sse2();
+       info.has_volume_decoupled = true;
 
        devices.insert(devices.begin(), info);
 }
index dcbe603..56a56c5 100644 (file)
@@ -2128,6 +2128,8 @@ void device_cuda_info(vector<DeviceInfo>& devices)
 
                info.advanced_shading = (major >= 2);
                info.has_bindless_textures = (major >= 3);
+               info.has_volume_decoupled = false;
+               info.has_qbvh = false;
 
                int pci_location[3] = {0, 0, 0};
                cuDeviceGetAttribute(&pci_location[0], CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, num);
index deea59f..ced10c9 100644 (file)
@@ -343,7 +343,11 @@ void device_network_info(vector<DeviceInfo>& devices)
        info.description = "Network Device";
        info.id = "NETWORK";
        info.num = 0;
-       info.advanced_shading = true; /* todo: get this info from device */
+
+       /* todo: get this info from device */
+       info.advanced_shading = true;
+       info.has_volume_decoupled = false;
+       info.has_qbvh = false;
 
        devices.push_back(info);
 }
index 9d89dec..5808a31 100644 (file)
@@ -123,6 +123,8 @@ void device_opencl_info(vector<DeviceInfo>& devices)
                info.advanced_shading = OpenCLInfo::kernel_use_advanced_shading(platform_name);
                info.use_split_kernel = OpenCLInfo::kernel_use_split(platform_name,
                                                                     device_type);
+               info.has_volume_decoupled = false;
+               info.has_qbvh = false;
                info.id = string("OPENCL_") + platform_name + "_" + device_name + "_" + hardware_id;
                devices.push_back(info);
                num_devices++;
index 6b6c560..652777a 100644 (file)
@@ -170,87 +170,90 @@ ccl_device_forceinline VolumeIntegrateResult kernel_path_volume(
        if(!hit) {
                kernel_volume_clean_stack(kg, state->volume_stack);
        }
+
+       if(state->volume_stack[0].shader == SHADER_NONE) {
+               return VOLUME_PATH_ATTENUATED;
+       }
+
        /* volume attenuation, emission, scatter */
-       if(state->volume_stack[0].shader != SHADER_NONE) {
-               Ray volume_ray = *ray;
-               volume_ray.t = (hit)? isect->t: FLT_MAX;
+       Ray volume_ray = *ray;
+       volume_ray.t = (hit)? isect->t: FLT_MAX;
 
-               bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
+       bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
 
 #  ifdef __VOLUME_DECOUPLED__
-               int sampling_method = volume_stack_sampling_method(kg, state->volume_stack);
-               bool direct = (state->flag & PATH_RAY_CAMERA) != 0;
-               bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, direct, sampling_method);
+       int sampling_method = volume_stack_sampling_method(kg, state->volume_stack);
+       bool direct = (state->flag & PATH_RAY_CAMERA) != 0;
+       bool decoupled = kernel_volume_use_decoupled(kg, heterogeneous, direct, sampling_method);
 
-               if(decoupled) {
-                       /* cache steps along volume for repeated sampling */
-                       VolumeSegment volume_segment;
+       if(decoupled) {
+               /* cache steps along volume for repeated sampling */
+               VolumeSegment volume_segment;
 
-                       shader_setup_from_volume(kg, sd, &volume_ray);
-                       kernel_volume_decoupled_record(kg, state,
-                               &volume_ray, sd, &volume_segment, heterogeneous);
+               shader_setup_from_volume(kg, sd, &volume_ray);
+               kernel_volume_decoupled_record(kg, state,
+                       &volume_ray, sd, &volume_segment, heterogeneous);
 
-                       volume_segment.sampling_method = sampling_method;
+               volume_segment.sampling_method = sampling_method;
 
-                       /* emission */
-                       if(volume_segment.closure_flag & SD_EMISSION)
-                               path_radiance_accum_emission(L, state, *throughput, volume_segment.accum_emission);
+               /* emission */
+               if(volume_segment.closure_flag & SD_EMISSION)
+                       path_radiance_accum_emission(L, state, *throughput, volume_segment.accum_emission);
 
-                       /* scattering */
-                       VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
+               /* scattering */
+               VolumeIntegrateResult result = VOLUME_PATH_ATTENUATED;
 
-                       if(volume_segment.closure_flag & SD_SCATTER) {
-                               int all = kernel_data.integrator.sample_all_lights_indirect;
+               if(volume_segment.closure_flag & SD_SCATTER) {
+                       int all = kernel_data.integrator.sample_all_lights_indirect;
 
-                               /* direct light sampling */
-                               kernel_branched_path_volume_connect_light(kg, sd,
-                                       emission_sd, *throughput, state, L, all,
-                                       &volume_ray, &volume_segment);
+                       /* direct light sampling */
+                       kernel_branched_path_volume_connect_light(kg, sd,
+                               emission_sd, *throughput, state, L, all,
+                               &volume_ray, &volume_segment);
 
-                               /* indirect sample. if we use distance sampling and take just
-                                * one sample for direct and indirect light, we could share
-                                * this computation, but makes code a bit complex */
-                               float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
-                               float rscatter = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
+                       /* indirect sample. if we use distance sampling and take just
+                        * one sample for direct and indirect light, we could share
+                        * this computation, but makes code a bit complex */
+                       float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
+                       float rscatter = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
 
-                               result = kernel_volume_decoupled_scatter(kg,
-                                       state, &volume_ray, sd, throughput,
-                                       rphase, rscatter, &volume_segment, NULL, true);
-                       }
+                       result = kernel_volume_decoupled_scatter(kg,
+                               state, &volume_ray, sd, throughput,
+                               rphase, rscatter, &volume_segment, NULL, true);
+               }
 
-                       /* free cached steps */
-                       kernel_volume_decoupled_free(kg, &volume_segment);
+               /* free cached steps */
+               kernel_volume_decoupled_free(kg, &volume_segment);
 
-                       if(result == VOLUME_PATH_SCATTERED) {
-                               if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray))
-                                       return VOLUME_PATH_SCATTERED;
-                               else
-                                       return VOLUME_PATH_MISSED;
-                       }
-                       else {
-                               *throughput *= volume_segment.accum_transmittance;
-                       }
+               if(result == VOLUME_PATH_SCATTERED) {
+                       if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray))
+                               return VOLUME_PATH_SCATTERED;
+                       else
+                               return VOLUME_PATH_MISSED;
                }
-               else
+               else {
+                       *throughput *= volume_segment.accum_transmittance;
+               }
+       }
+       else
 #  endif  /* __VOLUME_DECOUPLED__ */
-               {
-                       /* integrate along volume segment with distance sampling */
-                       VolumeIntegrateResult result = kernel_volume_integrate(
-                               kg, state, sd, &volume_ray, L, throughput, heterogeneous);
+       {
+               /* integrate along volume segment with distance sampling */
+               VolumeIntegrateResult result = kernel_volume_integrate(
+                       kg, state, sd, &volume_ray, L, throughput, heterogeneous);
 
 #  ifdef __VOLUME_SCATTER__
-                       if(result == VOLUME_PATH_SCATTERED) {
-                               /* direct lighting */
-                               kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L);
-
-                               /* indirect light bounce */
-                               if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray))
-                                       return VOLUME_PATH_SCATTERED;
-                               else
-                                       return VOLUME_PATH_MISSED;
-                       }
-#  endif  /* __VOLUME_SCATTER__ */
+               if(result == VOLUME_PATH_SCATTERED) {
+                       /* direct lighting */
+                       kernel_path_volume_connect_light(kg, sd, emission_sd, *throughput, state, L);
+
+                       /* indirect light bounce */
+                       if(kernel_path_volume_bounce(kg, sd, throughput, state, &L->state, ray))
+                               return VOLUME_PATH_SCATTERED;
+                       else
+                               return VOLUME_PATH_MISSED;
                }
+#  endif  /* __VOLUME_SCATTER__ */
        }
 
        return VOLUME_PATH_ATTENUATED;
index 2597d68..42df7e8 100644 (file)
@@ -64,6 +64,164 @@ ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg,
 
 #ifndef __SPLIT_KERNEL__
 
+#ifdef __VOLUME__
+ccl_device_forceinline void kernel_branched_path_volume(
+       KernelGlobals *kg,
+       ShaderData *sd,
+       PathState *state,
+       Ray *ray,
+       float3 *throughput,
+       ccl_addr_space Intersection *isect,
+       bool hit,
+       ShaderData *indirect_sd,
+       ShaderData *emission_sd,
+       PathRadiance *L)
+{
+       /* Sanitize volume stack. */
+       if(!hit) {
+               kernel_volume_clean_stack(kg, state->volume_stack);
+       }
+
+       if(state->volume_stack[0].shader == SHADER_NONE) {
+               return;
+       }
+
+       /* volume attenuation, emission, scatter */
+       Ray volume_ray = *ray;
+       volume_ray.t = (hit)? isect->t: FLT_MAX;
+
+       bool heterogeneous = volume_stack_is_heterogeneous(kg, state->volume_stack);
+
+#  ifdef __VOLUME_DECOUPLED__
+       /* decoupled ray marching only supported on CPU */
+       if(kernel_data.integrator.volume_decoupled) {
+               /* cache steps along volume for repeated sampling */
+               VolumeSegment volume_segment;
+
+               shader_setup_from_volume(kg, sd, &volume_ray);
+               kernel_volume_decoupled_record(kg, state,
+                       &volume_ray, sd, &volume_segment, heterogeneous);
+
+               /* direct light sampling */
+               if(volume_segment.closure_flag & SD_SCATTER) {
+                       volume_segment.sampling_method = volume_stack_sampling_method(kg, state->volume_stack);
+
+                       int all = kernel_data.integrator.sample_all_lights_direct;
+
+                       kernel_branched_path_volume_connect_light(kg, sd,
+                               emission_sd, *throughput, state, L, all,
+                               &volume_ray, &volume_segment);
+
+                       /* indirect light sampling */
+                       int num_samples = kernel_data.integrator.volume_samples;
+                       float num_samples_inv = 1.0f/num_samples;
+
+                       for(int j = 0; j < num_samples; j++) {
+                               PathState ps = *state;
+                               Ray pray = *ray;
+                               float3 tp = *throughput;
+
+                               /* branch RNG state */
+                               path_state_branch(&ps, j, num_samples);
+
+                               /* scatter sample. if we use distance sampling and take just one
+                                * sample for direct and indirect light, we could share this
+                                * computation, but makes code a bit complex */
+                               float rphase = path_state_rng_1D(kg, &ps, PRNG_PHASE_CHANNEL);
+                               float rscatter = path_state_rng_1D(kg, &ps, PRNG_SCATTER_DISTANCE);
+
+                               VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
+                                       &ps, &pray, sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
+
+                               if(result == VOLUME_PATH_SCATTERED &&
+                                  kernel_path_volume_bounce(kg,
+                                                            sd,
+                                                            &tp,
+                                                            &ps,
+                                                            &L->state,
+                                                            &pray))
+                               {
+                                       kernel_path_indirect(kg,
+                                                            indirect_sd,
+                                                            emission_sd,
+                                                            &pray,
+                                                            tp*num_samples_inv,
+                                                            &ps,
+                                                            L);
+
+                                       /* for render passes, sum and reset indirect light pass variables
+                                        * for the next samples */
+                                       path_radiance_sum_indirect(L);
+                                       path_radiance_reset_indirect(L);
+                               }
+                       }
+               }
+
+               /* emission and transmittance */
+               if(volume_segment.closure_flag & SD_EMISSION)
+                       path_radiance_accum_emission(L, state, *throughput, volume_segment.accum_emission);
+               *throughput *= volume_segment.accum_transmittance;
+
+               /* free cached steps */
+               kernel_volume_decoupled_free(kg, &volume_segment);
+       }
+       else
+#  endif  /* __VOLUME_DECOUPLED__ */
+       {
+               /* GPU: no decoupled ray marching, scatter probalistically */
+               int num_samples = kernel_data.integrator.volume_samples;
+               float num_samples_inv = 1.0f/num_samples;
+
+               /* todo: we should cache the shader evaluations from stepping
+                * through the volume, for now we redo them multiple times */
+
+               for(int j = 0; j < num_samples; j++) {
+                       PathState ps = *state;
+                       Ray pray = *ray;
+                       float3 tp = (*throughput) * num_samples_inv;
+
+                       /* branch RNG state */
+                       path_state_branch(&ps, j, num_samples);
+
+                       VolumeIntegrateResult result = kernel_volume_integrate(
+                               kg, &ps, sd, &volume_ray, L, &tp, heterogeneous);
+
+#  ifdef __VOLUME_SCATTER__
+                       if(result == VOLUME_PATH_SCATTERED) {
+                               /* todo: support equiangular, MIS and all light sampling.
+                                * alternatively get decoupled ray marching working on the GPU */
+                               kernel_path_volume_connect_light(kg, sd, emission_sd, tp, state, L);
+
+                               if(kernel_path_volume_bounce(kg,
+                                                            sd,
+                                                            &tp,
+                                                            &ps,
+                                                            &L->state,
+                                                            &pray))
+                               {
+                                       kernel_path_indirect(kg,
+                                                            indirect_sd,
+                                                            emission_sd,
+                                                            &pray,
+                                                            tp,
+                                                            &ps,
+                                                            L);
+
+                                       /* for render passes, sum and reset indirect light pass variables
+                                        * for the next samples */
+                                       path_radiance_sum_indirect(L);
+                                       path_radiance_reset_indirect(L);
+                               }
+                       }
+# endif  /* __VOLUME_SCATTER__ */
+               }
+
+               /* todo: avoid this calculation using decoupled ray marching */
+               kernel_volume_shadow(kg, emission_sd, state, &volume_ray, throughput);
+       }
+}
+#endif  /* __VOLUME__ */
+
 /* bounce off surface and integrate indirect light */
 ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGlobals *kg,
        ShaderData *sd, ShaderData *indirect_sd, ShaderData *emission_sd,
@@ -293,142 +451,17 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
                bool hit = kernel_path_scene_intersect(kg, &state, &ray, &isect, L);
 
 #ifdef __VOLUME__
-               /* Sanitize volume stack. */
-               if(!hit) {
-                       kernel_volume_clean_stack(kg, state.volume_stack);
-               }
-               /* volume attenuation, emission, scatter */
-               if(state.volume_stack[0].shader != SHADER_NONE) {
-                       Ray volume_ray = ray;
-                       volume_ray.t = (hit)? isect.t: FLT_MAX;
-                       
-                       bool heterogeneous = volume_stack_is_heterogeneous(kg, state.volume_stack);
-
-#ifdef __VOLUME_DECOUPLED__
-                       /* decoupled ray marching only supported on CPU */
-
-                       /* cache steps along volume for repeated sampling */
-                       VolumeSegment volume_segment;
-
-                       shader_setup_from_volume(kg, &sd, &volume_ray);
-                       kernel_volume_decoupled_record(kg, &state,
-                               &volume_ray, &sd, &volume_segment, heterogeneous);
-
-                       /* direct light sampling */
-                       if(volume_segment.closure_flag & SD_SCATTER) {
-                               volume_segment.sampling_method = volume_stack_sampling_method(kg, state.volume_stack);
-
-                               int all = kernel_data.integrator.sample_all_lights_direct;
-
-                               kernel_branched_path_volume_connect_light(kg, &sd,
-                                       &emission_sd, throughput, &state, L, all,
-                                       &volume_ray, &volume_segment);
-
-                               /* indirect light sampling */
-                               int num_samples = kernel_data.integrator.volume_samples;
-                               float num_samples_inv = 1.0f/num_samples;
-
-                               for(int j = 0; j < num_samples; j++) {
-                                       PathState ps = state;
-                                       Ray pray = ray;
-                                       float3 tp = throughput;
-
-                                       /* branch RNG state */
-                                       path_state_branch(&ps, j, num_samples);
-
-                                       /* scatter sample. if we use distance sampling and take just one
-                                        * sample for direct and indirect light, we could share this
-                                        * computation, but makes code a bit complex */
-                                       float rphase = path_state_rng_1D(kg, &ps, PRNG_PHASE_CHANNEL);
-                                       float rscatter = path_state_rng_1D(kg, &ps, PRNG_SCATTER_DISTANCE);
-
-                                       VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
-                                               &ps, &pray, &sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
-
-                                       if(result == VOLUME_PATH_SCATTERED &&
-                                          kernel_path_volume_bounce(kg,
-                                                                    &sd,
-                                                                    &tp,
-                                                                    &ps,
-                                                                    &L->state,
-                                                                    &pray))
-                                       {
-                                               kernel_path_indirect(kg,
-                                                                    &indirect_sd,
-                                                                    &emission_sd,
-                                                                    &pray,
-                                                                    tp*num_samples_inv,
-                                                                    &ps,
-                                                                    L);
-
-                                               /* for render passes, sum and reset indirect light pass variables
-                                                * for the next samples */
-                                               path_radiance_sum_indirect(L);
-                                               path_radiance_reset_indirect(L);
-                                       }
-                               }
-                       }
-
-                       /* emission and transmittance */
-                       if(volume_segment.closure_flag & SD_EMISSION)
-                               path_radiance_accum_emission(L, &state, throughput, volume_segment.accum_emission);
-                       throughput *= volume_segment.accum_transmittance;
-
-                       /* free cached steps */
-                       kernel_volume_decoupled_free(kg, &volume_segment);
-#else
-                       /* GPU: no decoupled ray marching, scatter probalistically */
-                       int num_samples = kernel_data.integrator.volume_samples;
-                       float num_samples_inv = 1.0f/num_samples;
-
-                       /* todo: we should cache the shader evaluations from stepping
-                        * through the volume, for now we redo them multiple times */
-
-                       for(int j = 0; j < num_samples; j++) {
-                               PathState ps = state;
-                               Ray pray = ray;
-                               float3 tp = throughput * num_samples_inv;
-
-                               /* branch RNG state */
-                               path_state_branch(&ps, j, num_samples);
-
-                               VolumeIntegrateResult result = kernel_volume_integrate(
-                                       kg, &ps, &sd, &volume_ray, L, &tp, heterogeneous);
-
-#ifdef __VOLUME_SCATTER__
-                               if(result == VOLUME_PATH_SCATTERED) {
-                                       /* todo: support equiangular, MIS and all light sampling.
-                                        * alternatively get decoupled ray marching working on the GPU */
-                                       kernel_path_volume_connect_light(kg, &sd, &emission_sd, tp, &state, L);
-
-                                       if(kernel_path_volume_bounce(kg,
-                                                                    &sd,
-                                                                    &tp,
-                                                                    &ps,
-                                                                    &L->state,
-                                                                    &pray))
-                                       {
-                                               kernel_path_indirect(kg,
-                                                                    &indirect_sd,
-                                                                    &emission_sd,
-                                                                    &pray,
-                                                                    tp,
-                                                                    &ps,
-                                                                    L);
-
-                                               /* for render passes, sum and reset indirect light pass variables
-                                                * for the next samples */
-                                               path_radiance_sum_indirect(L);
-                                               path_radiance_reset_indirect(L);
-                                       }
-                               }
-#endif  /* __VOLUME_SCATTER__ */
-                       }
-
-                       /* todo: avoid this calculation using decoupled ray marching */
-                       kernel_volume_shadow(kg, &emission_sd, &state, &volume_ray, &throughput);
-#endif  /* __VOLUME_DECOUPLED__ */
-               }
+               /* Volume integration. */
+               kernel_branched_path_volume(kg,
+                                           &sd,
+                                           &state,
+                                           &ray,
+                                           &throughput,
+                                           &isect,
+                                           hit,
+                                           &indirect_sd,
+                                           &emission_sd,
+                                           L);
 #endif  /* __VOLUME__ */
 
                /* Shade background. */
index 19c77c1..f76d6c2 100644 (file)
@@ -1262,6 +1262,7 @@ typedef struct KernelIntegrator {
 
        /* branched path */
        int branched;
+       int volume_decoupled;
        int diffuse_samples;
        int glossy_samples;
        int transmission_samples;
@@ -1287,7 +1288,6 @@ typedef struct KernelIntegrator {
        float light_inv_rr_threshold;
 
        int start_sample;
-       int pad1;
 } KernelIntegrator;
 static_assert_align(KernelIntegrator, 16);
 
index d9c310a..5905fb3 100644 (file)
@@ -1026,6 +1026,9 @@ ccl_device bool kernel_volume_use_decoupled(KernelGlobals *kg, bool heterogeneou
        /* decoupled ray marching for heterogeneous volumes not supported on the GPU,
         * which also means equiangular and multiple importance sampling is not
         * support for that case */
+       if(!kernel_data.integrator.volume_decoupled)
+               return false;
+
 #ifdef __KERNEL_GPU__
        if(heterogeneous)
                return false;
index 15b728d..b268478 100644 (file)
@@ -145,6 +145,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
        kintegrator->sample_clamp_indirect = (sample_clamp_indirect == 0.0f)? FLT_MAX: sample_clamp_indirect*3.0f;
 
        kintegrator->branched = (method == BRANCHED_PATH);
+       kintegrator->volume_decoupled = device->info.has_volume_decoupled;
        kintegrator->diffuse_samples = diffuse_samples;
        kintegrator->glossy_samples = glossy_samples;
        kintegrator->transmission_samples = transmission_samples;
index 84537bf..c02a522 100644 (file)
@@ -1016,7 +1016,8 @@ void Mesh::pack_patches(uint *patch_data, uint vert_offset, uint face_offset, ui
        }
 }
 
-void Mesh::compute_bvh(DeviceScene *dscene,
+void Mesh::compute_bvh(Device *device,
+                       DeviceScene *dscene,
                        SceneParams *params,
                        Progress *progress,
                        int n,
@@ -1050,7 +1051,7 @@ void Mesh::compute_bvh(DeviceScene *dscene,
 
                        BVHParams bparams;
                        bparams.use_spatial_split = params->use_bvh_spatial_split;
-                       bparams.use_qbvh = params->use_qbvh;
+                       bparams.use_qbvh = params->use_qbvh && device->info.has_qbvh;
                        bparams.use_unaligned_nodes = dscene->data.bvh.have_curves &&
                                                      params->use_bvh_unaligned_nodes;
                        bparams.num_motion_triangle_steps = params->num_bvh_time_steps;
@@ -1814,18 +1815,18 @@ void MeshManager::device_update_bvh(Device *device, DeviceScene *dscene, Scene *
        /* bvh build */
        progress.set_status("Updating Scene BVH", "Building");
 
-       VLOG(1) << (scene->params.use_qbvh ? "Using QBVH optimization structure"
-                                          : "Using regular BVH optimization structure");
-
        BVHParams bparams;
        bparams.top_level = true;
-       bparams.use_qbvh = scene->params.use_qbvh;
+       bparams.use_qbvh = scene->params.use_qbvh && device->info.has_qbvh;
        bparams.use_spatial_split = scene->params.use_bvh_spatial_split;
        bparams.use_unaligned_nodes = dscene->data.bvh.have_curves &&
                                      scene->params.use_bvh_unaligned_nodes;
        bparams.num_motion_triangle_steps = scene->params.num_bvh_time_steps;
        bparams.num_motion_curve_steps = scene->params.num_bvh_time_steps;
 
+       VLOG(1) << (bparams.use_qbvh ? "Using QBVH optimization structure"
+                                    : "Using regular BVH optimization structure");
+
        delete bvh;
        bvh = BVH::create(bparams, scene->objects);
        bvh->build(progress);
@@ -1879,7 +1880,7 @@ void MeshManager::device_update_bvh(Device *device, DeviceScene *dscene, Scene *
        }
 
        dscene->data.bvh.root = pack.root_index;
-       dscene->data.bvh.use_qbvh = scene->params.use_qbvh;
+       dscene->data.bvh.use_qbvh = bparams.use_qbvh;
        dscene->data.bvh.use_bvh_steps = (scene->params.num_bvh_time_steps != 0);
 }
 
@@ -2084,6 +2085,7 @@ void MeshManager::device_update(Device *device, DeviceScene *dscene, Scene *scen
                if(mesh->need_update) {
                        pool.push(function_bind(&Mesh::compute_bvh,
                                                mesh,
+                                               device,
                                                dscene,
                                                &scene->params,
                                                &progress,
index 043ce9d..9a51ca7 100644 (file)
@@ -282,7 +282,8 @@ public:
        void pack_curves(Scene *scene, float4 *curve_key_co, float4 *curve_data, size_t curvekey_offset);
        void pack_patches(uint *patch_data, uint vert_offset, uint face_offset, uint corner_offset);
 
-       void compute_bvh(DeviceScene *dscene,
+       void compute_bvh(Device *device,
+                        DeviceScene *dscene,
                         SceneParams *params,
                         Progress *progress,
                         int n,
index 0194327..a1966af 100644 (file)
@@ -149,7 +149,7 @@ public:
                use_bvh_spatial_split = false;
                use_bvh_unaligned_nodes = true;
                num_bvh_time_steps = 0;
-               use_qbvh = false;
+               use_qbvh = true;
                persistent_data = false;
                texture_limit = 0;
        }
index 8648753..3992ada 100644 (file)
@@ -451,10 +451,12 @@ void ShaderManager::device_update_common(Device *device,
                        flag |= SD_HETEROGENEOUS_VOLUME;
                if(shader->has_bssrdf_bump)
                        flag |= SD_HAS_BSSRDF_BUMP;
-               if(shader->volume_sampling_method == VOLUME_SAMPLING_EQUIANGULAR)
-                       flag |= SD_VOLUME_EQUIANGULAR;
-               if(shader->volume_sampling_method == VOLUME_SAMPLING_MULTIPLE_IMPORTANCE)
-                       flag |= SD_VOLUME_MIS;
+               if(device->info.has_volume_decoupled) {
+                       if(shader->volume_sampling_method == VOLUME_SAMPLING_EQUIANGULAR)
+                               flag |= SD_VOLUME_EQUIANGULAR;
+                       if(shader->volume_sampling_method == VOLUME_SAMPLING_MULTIPLE_IMPORTANCE)
+                               flag |= SD_VOLUME_MIS;
+               }
                if(shader->volume_interpolation_method == VOLUME_INTERPOLATION_CUBIC)
                        flag |= SD_VOLUME_CUBIC;
                if(shader->has_bump)