Merge branch 'master' into blender2.8
authorSergey Sharybin <sergey.vfx@gmail.com>
Tue, 2 May 2017 13:29:00 +0000 (15:29 +0200)
committerSergey Sharybin <sergey.vfx@gmail.com>
Tue, 2 May 2017 13:29:00 +0000 (15:29 +0200)
CMakeLists.txt
intern/cycles/device/device_cuda.cpp

index 9c18710fd485e2859b14a1c95c4fc469e253e65a..89e4e76a8fd3da19ee6824b9d72d2172043f9552 100644 (file)
@@ -369,7 +369,7 @@ mark_as_advanced(WITH_LIBMV_SCHUR_SPECIALIZATIONS)
 
 # Logging/unbit test libraries.
 option(WITH_SYSTEM_GFLAGS   "Use system-wide Gflags instead of a bundled one" OFF)
-option(WITH_SYSTEM_GFLOG    "Use system-wide Glog instead of a bundled one" OFF)
+option(WITH_SYSTEM_GLOG     "Use system-wide Glog instead of a bundled one" OFF)
 mark_as_advanced(WITH_SYSTEM_GFLAGS)
 mark_as_advanced(WITH_SYSTEM_GLOG)
 
index 3532e640eaafa4077cdd17a1aa869dc4a911428c..e497ec6b0e160711a0bcca88f69108402865df80 100644 (file)
@@ -119,6 +119,7 @@ public:
        int cuDevId;
        int cuDevArchitecture;
        bool first_error;
+       CUDASplitKernel *split_kernel;
 
        struct PixelMem {
                GLuint cuPBO;
@@ -221,6 +222,8 @@ public:
                cuDevice = 0;
                cuContext = 0;
 
+               split_kernel = NULL;
+
                need_bindless_mapping = false;
 
                /* intialize */
@@ -260,6 +263,8 @@ public:
        {
                task_pool.stop();
 
+               delete split_kernel;
+
                if(info.has_bindless_textures) {
                        tex_free(bindless_mapping);
                }
@@ -1357,12 +1362,14 @@ public:
                                        requested_features.max_closure = 64;
                                }
 
-                               CUDASplitKernel split_kernel(this);
-                               split_kernel.load_kernels(requested_features);
+                               if(split_kernel == NULL) {
+                                       split_kernel = new CUDASplitKernel(this);
+                                       split_kernel->load_kernels(requested_features);
+                               }
 
                                while(task->acquire_tile(this, tile)) {
                                        device_memory void_buffer;
-                                       split_kernel.path_trace(task, tile, void_buffer, void_buffer);
+                                       split_kernel->path_trace(task, tile, void_buffer, void_buffer);
 
                                        task->release_tile(tile);
 
@@ -1648,7 +1655,8 @@ int2 CUDASplitKernel::split_kernel_global_size(device_memory& kg, device_memory&
                << string_human_readable_size(free) << ").";
 
        size_t num_elements = max_elements_for_max_buffer_size(kg, data, free / 2);
-       int2 global_size = make_int2(round_down((int)sqrt(num_elements), 32), (int)sqrt(num_elements));
+       size_t side = round_down((int)sqrt(num_elements), 32);
+       int2 global_size = make_int2(side, round_down(num_elements / side, 16));
        VLOG(1) << "Global size: " << global_size << ".";
        return global_size;
 }