Cycles: Fix CUDA split kernel
authorSergey Sharybin <sergey.vfx@gmail.com>
Tue, 2 May 2017 13:03:51 +0000 (15:03 +0200)
committerSergey Sharybin <sergey.vfx@gmail.com>
Tue, 2 May 2017 13:03:51 +0000 (15:03 +0200)
Global size y needs to be a multiple of 16.

intern/cycles/device/device_cuda.cpp

index acfb3e1d8f455d20e8e60f3cce82539486e7a187..a971170318ed274c808a5593f6a6c3cf501a03ae 100644 (file)
@@ -1634,7 +1634,8 @@ int2 CUDASplitKernel::split_kernel_global_size(device_memory& kg, device_memory&
                << string_human_readable_size(free) << ").";
 
        size_t num_elements = max_elements_for_max_buffer_size(kg, data, free / 2);
-       int2 global_size = make_int2(round_down((int)sqrt(num_elements), 32), (int)sqrt(num_elements));
+       size_t side = round_down((int)sqrt(num_elements), 32);
+       int2 global_size = make_int2(side, round_down(num_elements / side, 16));
        VLOG(1) << "Global size: " << global_size << ".";
        return global_size;
 }