Cycles: combined CPU + GPU rendering support.
authorBrecht Van Lommel <brechtvanlommel@gmail.com>
Sat, 21 Oct 2017 16:58:59 +0000 (18:58 +0200)
committerBrecht Van Lommel <brechtvanlommel@gmail.com>
Sat, 21 Oct 2017 18:13:44 +0000 (20:13 +0200)
CPU rendering will be restricted to a BVH2, which is not ideal for raytracing
performance but can be shared with the GPU. Decoupled volume shading will be
disabled to match GPU volume sampling.

The number of CPU rendering threads is reduced to leave one core dedicated to
each GPU. Viewport rendering will also only use GPU rendering still. So along
with the BVH2 usage, perfect scaling should not be expected.

Go to User Preferences > System to enable the CPU to render alongside the GPU.

Differential Revision: https://developer.blender.org/D2873

intern/cycles/blender/addon/properties.py
intern/cycles/blender/blender_sync.cpp
intern/cycles/device/device.cpp
intern/cycles/device/device.h
intern/cycles/device/device_cpu.cpp
intern/cycles/device/opencl/opencl_util.cpp

index 7b16ef1d5433eed869efff523eae0f6581174ecc..2e149527066aa9955c66a8da3c844cbfa6170536 100644 (file)
@@ -1351,8 +1351,9 @@ class CyclesPreferences(bpy.types.AddonPreferences):
 
         cuda_devices = []
         opencl_devices = []
+        cpu_devices = []
         for device in device_list:
-            if not device[1] in {'CUDA', 'OPENCL'}:
+            if not device[1] in {'CUDA', 'OPENCL', 'CPU'}:
                 continue
 
             entry = None
@@ -1361,18 +1362,28 @@ class CyclesPreferences(bpy.types.AddonPreferences):
                 if dev.id == device[2] and dev.type == device[1]:
                     entry = dev
                     break
-            # Create new entry if no existing one was found
             if not entry:
+                # Create new entry if no existing one was found
                 entry = self.devices.add()
                 entry.id   = device[2]
                 entry.name = device[0]
                 entry.type = device[1]
+                entry.use  = entry.type != 'CPU'
+            elif entry.name != device[0]:
+                # Update name in case it changed
+                entry.name = device[0]
 
             # Sort entries into lists
             if entry.type == 'CUDA':
                 cuda_devices.append(entry)
             elif entry.type == 'OPENCL':
                 opencl_devices.append(entry)
+            else:
+                cpu_devices.append(entry)
+
+        cuda_devices.extend(cpu_devices)
+        opencl_devices.extend(cpu_devices)
+
         return cuda_devices, opencl_devices
 
 
index 2e3301c42096872e5c61a034259ad822cae8a302..5eddf189468a6c3a16fa73670eef3cfcc9d6136f 100644 (file)
@@ -675,6 +675,15 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine& b_engine,
        /* feature set */
        params.experimental = (get_enum(cscene, "feature_set") != 0);
 
+       /* threads */
+       if(b_scene.render().threads_mode() == BL::RenderSettings::threads_mode_FIXED)
+               params.threads = b_scene.render().threads();
+       else
+               params.threads = 0;
+
+       /* Background */
+       params.background = background;
+
        /* device type */
        vector<DeviceInfo>& devices = Device::available_devices();
        
@@ -703,12 +712,28 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine& b_engine,
                        }
                }
 
-               int compute_device = get_enum(b_preferences, "compute_device_type");
+               enum ComputeDevice {
+                       COMPUTE_DEVICE_CPU = 0,
+                       COMPUTE_DEVICE_CUDA = 1,
+                       COMPUTE_DEVICE_OPENCL = 2,
+                       COMPUTE_DEVICE_NUM = 3,
+               };
 
-               if(compute_device != 0) {
+               ComputeDevice compute_device = (ComputeDevice)get_enum(b_preferences,
+                                                                      "compute_device_type",
+                                                                      COMPUTE_DEVICE_NUM,
+                                                                      COMPUTE_DEVICE_CPU);
+
+               if(compute_device != COMPUTE_DEVICE_CPU) {
                        vector<DeviceInfo> used_devices;
                        RNA_BEGIN(&b_preferences, device, "devices") {
-                               if(get_enum(device, "type") == compute_device && get_boolean(device, "use")) {
+                               ComputeDevice device_type = (ComputeDevice)get_enum(device,
+                                                                                   "type",
+                                                                                   COMPUTE_DEVICE_NUM,
+                                                                                   COMPUTE_DEVICE_CPU);
+
+                               if(get_boolean(device, "use") &&
+                                  (device_type == compute_device || device_type == COMPUTE_DEVICE_CPU)) {
                                        string id = get_string(device, "id");
                                        foreach(DeviceInfo& info, devices) {
                                                if(info.id == id) {
@@ -723,15 +748,14 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine& b_engine,
                                params.device = used_devices[0];
                        }
                        else if(used_devices.size() > 1) {
-                               params.device = Device::get_multi_device(used_devices);
+                               params.device = Device::get_multi_device(used_devices,
+                                                                        params.threads,
+                                                                        params.background);
                        }
                        /* Else keep using the CPU device that was set before. */
                }
        }
 
-       /* Background */
-       params.background = background;
-
        /* samples */
        int samples = get_int(cscene, "samples");
        int aa_samples = get_int(cscene, "aa_samples");
@@ -791,15 +815,10 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine& b_engine,
                params.tile_order = TILE_BOTTOM_TO_TOP;
        }
 
+       /* other parameters */
        params.start_resolution = get_int(cscene, "preview_start_resolution");
        params.pixel_size = b_engine.get_preview_pixel_size(b_scene);
 
-       /* other parameters */
-       if(b_scene.render().threads_mode() == BL::RenderSettings::threads_mode_FIXED)
-               params.threads = b_scene.render().threads();
-       else
-               params.threads = 0;
-
        params.cancel_timeout = (double)get_float(cscene, "debug_cancel_timeout");
        params.reset_timeout = (double)get_float(cscene, "debug_reset_timeout");
        params.text_timeout = (double)get_float(cscene, "debug_text_timeout");
index 7b0875965f8e083908300216af92b37bfe76cc01..f31cacd8ec1bd794e2f215526fb60b611272dd7f 100644 (file)
 #include "util/util_debug.h"
 #include "util/util_foreach.h"
 #include "util/util_half.h"
+#include "util/util_logging.h"
 #include "util/util_math.h"
 #include "util/util_opengl.h"
 #include "util/util_time.h"
+#include "util/util_system.h"
 #include "util/util_types.h"
 #include "util/util_vector.h"
 #include "util/util_string.h"
@@ -365,7 +367,7 @@ string Device::device_capabilities()
        return capabilities;
 }
 
-DeviceInfo Device::get_multi_device(vector<DeviceInfo> subdevices)
+DeviceInfo Device::get_multi_device(const vector<DeviceInfo>& subdevices, int threads, bool background)
 {
        assert(subdevices.size() > 1);
 
@@ -373,18 +375,38 @@ DeviceInfo Device::get_multi_device(vector<DeviceInfo> subdevices)
        info.type = DEVICE_MULTI;
        info.id = "MULTI";
        info.description = "Multi Device";
-       info.multi_devices = subdevices;
        info.num = 0;
 
        info.has_bindless_textures = true;
        info.has_volume_decoupled = true;
        info.has_qbvh = true;
-       foreach(DeviceInfo &device, subdevices) {
-               assert(device.type == info.multi_devices[0].type);
-
+       foreach(const DeviceInfo &device, subdevices) {
                info.has_bindless_textures &= device.has_bindless_textures;
                info.has_volume_decoupled &= device.has_volume_decoupled;
                info.has_qbvh &= device.has_qbvh;
+
+               if(device.type == DEVICE_CPU && subdevices.size() > 1) {
+                       if(background) {
+                               int orig_cpu_threads = (threads)? threads: system_cpu_thread_count();
+                               int cpu_threads = max(orig_cpu_threads - (subdevices.size() - 1), 0);
+
+                               if(cpu_threads >= 1) {
+                                       DeviceInfo cpu_device = device;
+                                       cpu_device.cpu_threads = cpu_threads;
+                                       info.multi_devices.push_back(cpu_device);
+                               }
+
+                               VLOG(1) << "CPU render threads reduced from "
+                                               << orig_cpu_threads << " to " << cpu_threads
+                                               << ", to dedicate to GPU.";
+                       }
+                       else {
+                               VLOG(1) << "CPU render threads disabled for interactive render.";
+                       }
+               }
+               else {
+                       info.multi_devices.push_back(device);
+               }
        }
 
        return info;
index 5cd9cf467699c1d07235618acf928e1088ac5c0c..f400eeb3e6b6ec2d5ca5d2b5fdb8899507a9304e 100644 (file)
@@ -58,6 +58,7 @@ public:
        bool has_volume_decoupled;
        bool has_qbvh;
        bool use_split_kernel; /* Denotes if the device is going to run cycles using split-kernel */
+       int cpu_threads;
        vector<DeviceInfo> multi_devices;
 
        DeviceInfo()
@@ -65,6 +66,7 @@ public:
                type = DEVICE_CPU;
                id = "CPU";
                num = 0;
+               cpu_threads = 0;
                display_device = false;
                advanced_shading = true;
                has_bindless_textures = false;
@@ -345,7 +347,9 @@ public:
        static vector<DeviceType>& available_types();
        static vector<DeviceInfo>& available_devices();
        static string device_capabilities();
-       static DeviceInfo get_multi_device(vector<DeviceInfo> subdevices);
+       static DeviceInfo get_multi_device(const vector<DeviceInfo>& subdevices,
+                                          int threads,
+                                          bool background);
 
        /* Tag devices lists for update. */
        static void tag_update();
index a17caabc85096c6b972aa66a0bb74d48a01168de..af1bbc0db18f01472e31058e0099b60c2ecb9c5b 100644 (file)
@@ -207,8 +207,8 @@ public:
              KERNEL_NAME_EVAL(cpu_avx, name), \
              KERNEL_NAME_EVAL(cpu_avx2, name)
 
-       CPUDevice(DeviceInfo& info, Stats &stats, bool background)
-       : Device(info, stats, background),
+       CPUDevice(DeviceInfo& info_, Stats &stats_, bool background_)
+       : Device(info_, stats_, background_),
 #define REGISTER_KERNEL(name) name ## _kernel(KERNEL_FUNCTIONS(name))
          REGISTER_KERNEL(path_trace),
          REGISTER_KERNEL(convert_to_half_float),
@@ -229,6 +229,9 @@ public:
          REGISTER_KERNEL(data_init)
 #undef REGISTER_KERNEL
        {
+               if(info.cpu_threads == 0) {
+                       info.cpu_threads = TaskScheduler::num_threads();
+               }
 
 #ifdef WITH_OSL
                kernel_globals.osl = &osl_globals;
@@ -237,7 +240,6 @@ public:
                if(use_split_kernel) {
                        VLOG(1) << "Will be using split kernel.";
                }
-
                need_texture_info = false;
 
 #define REGISTER_SPLIT_KERNEL(name) split_kernels[#name] = KernelFunctions<void(*)(KernelGlobals*, KernelData*)>(KERNEL_FUNCTIONS(name))
@@ -271,7 +273,7 @@ public:
 
        virtual bool show_samples() const
        {
-               return (TaskScheduler::num_threads() == 1);
+               return (info.cpu_threads == 1);
        }
 
        void load_texture_info()
@@ -826,9 +828,9 @@ public:
        int get_split_task_count(DeviceTask& task)
        {
                if(task.type == DeviceTask::SHADER)
-                       return task.get_subtask_count(TaskScheduler::num_threads(), 256);
+                       return task.get_subtask_count(info.cpu_threads, 256);
                else
-                       return task.get_subtask_count(TaskScheduler::num_threads());
+                       return task.get_subtask_count(info.cpu_threads);
        }
 
        void task_add(DeviceTask& task)
@@ -840,9 +842,9 @@ public:
                list<DeviceTask> tasks;
 
                if(task.type == DeviceTask::SHADER)
-                       task.split(tasks, TaskScheduler::num_threads(), 256);
+                       task.split(tasks, info.cpu_threads, 256);
                else
-                       task.split(tasks, TaskScheduler::num_threads());
+                       task.split(tasks, info.cpu_threads);
 
                foreach(DeviceTask& task, tasks)
                        task_pool.push(new CPUDeviceTask(this, task));
index 7d5173a5f1ddbec78254063b79bcca78cb5e3a26..459d512172f62e9ae09bddbc5503c713256916ca 100644 (file)
@@ -1080,6 +1080,7 @@ cl_device_type OpenCLInfo::get_device_type(cl_device_id device_id)
 
 string OpenCLInfo::get_readable_device_name(cl_device_id device_id)
 {
+       string name = "";
        char board_name[1024];
        size_t length = 0;
        if(clGetDeviceInfo(device_id,
@@ -1089,11 +1090,21 @@ string OpenCLInfo::get_readable_device_name(cl_device_id device_id)
                           &length) == CL_SUCCESS)
        {
                if(length != 0 && board_name[0] != '\0') {
-                       return board_name;
+                       name = board_name;
                }
        }
+
        /* Fallback to standard device name API. */
-       return get_device_name(device_id);
+       if(name.empty()) {
+               name = get_device_name(device_id);
+       }
+
+       /* Distinguish from our native CPU device. */
+       if(get_device_type(device_id) & CL_DEVICE_TYPE_CPU) {
+               name += " (OpenCL)";
+       }
+
+       return name;
 }
 
 bool OpenCLInfo::get_driver_version(cl_device_id device_id,