Merge branch 'master' into 28
authorCampbell Barton <ideasman42@gmail.com>
Wed, 12 Apr 2017 04:23:47 +0000 (14:23 +1000)
committerCampbell Barton <ideasman42@gmail.com>
Wed, 12 Apr 2017 04:23:47 +0000 (14:23 +1000)
intern/cycles/device/device_cuda.cpp
intern/cycles/device/device_split_kernel.cpp
intern/cycles/device/device_split_kernel.h
source/blender/editors/interface/interface_layout.c
source/blender/makesdna/DNA_meshdata_types.h
source/blender/makesrna/intern/makesrna.c

index 4c1a49878f5d80dcc5d20e96b4fa8d1b22c56e5b..ef283c9d455010577236524bdfa028b6ced54d9c 100644 (file)
@@ -1613,10 +1613,23 @@ int2 CUDASplitKernel::split_kernel_local_size()
        return make_int2(32, 1);
 }
 
-int2 CUDASplitKernel::split_kernel_global_size(device_memory& /*kg*/, device_memory& /*data*/, DeviceTask * /*task*/)
+int2 CUDASplitKernel::split_kernel_global_size(device_memory& kg, device_memory& data, DeviceTask * /*task*/)
 {
-       /* TODO(mai): implement something here to detect ideal work size */
-       return make_int2(256, 256);
+       size_t free;
+       size_t total;
+
+       device->cuda_push_context();
+       cuda_assert(cuMemGetInfo(&free, &total));
+       device->cuda_pop_context();
+
+       VLOG(1) << "Maximum device allocation size: "
+               << string_human_readable_number(free) << " bytes. ("
+               << string_human_readable_size(free) << ").";
+
+       size_t num_elements = max_elements_for_max_buffer_size(kg, data, free / 2);
+       int2 global_size = make_int2(round_down((int)sqrt(num_elements), 32), (int)sqrt(num_elements));
+       VLOG(1) << "Global size: " << global_size << ".";
+       return global_size;
 }
 
 bool device_cuda_init(void)
index fa641161c0508b29eb1e9c88b5476b3921b1239f..981ec74fe56039233607e3f7bedc173405e500e1 100644 (file)
@@ -128,26 +128,27 @@ bool DeviceSplitKernel::path_trace(DeviceTask *task,
                local_size[1] = lsize[1];
        }
 
-       /* Set gloabl size */
-       size_t global_size[2];
-       {
-               int2 gsize = split_kernel_global_size(kgbuffer, kernel_data, task);
-
-               /* Make sure that set work size is a multiple of local
-                * work size dimensions.
-                */
-               global_size[0] = round_up(gsize[0], local_size[0]);
-               global_size[1] = round_up(gsize[1], local_size[1]);
-       }
-
        /* Number of elements in the global state buffer */
        int num_global_elements = global_size[0] * global_size[1];
-       assert(num_global_elements % WORK_POOL_SIZE == 0);
 
        /* Allocate all required global memory once. */
        if(first_tile) {
                first_tile = false;
 
+               /* Set gloabl size */
+               {
+                       int2 gsize = split_kernel_global_size(kgbuffer, kernel_data, task);
+
+                       /* Make sure that set work size is a multiple of local
+                        * work size dimensions.
+                        */
+                       global_size[0] = round_up(gsize[0], local_size[0]);
+                       global_size[1] = round_up(gsize[1], local_size[1]);
+               }
+
+               num_global_elements = global_size[0] * global_size[1];
+               assert(num_global_elements % WORK_POOL_SIZE == 0);
+
                /* Calculate max groups */
 
                /* Denotes the maximum work groups possible w.r.t. current requested tile size. */
index 15a94953a11419a65ecbe3902b9928f371e7cc9b..55548122c0c84d965caf139185e3c545c827cfbe 100644 (file)
@@ -95,6 +95,9 @@ private:
        /* Marked True in constructor and marked false at the end of path_trace(). */
        bool first_tile;
 
+       /* Cached global size */
+       size_t global_size[2];
+
 public:
        explicit DeviceSplitKernel(Device* device);
        virtual ~DeviceSplitKernel();
index 9b6547cf8a1b8c80344e373fe37cf37be5482ec1..30a2094fee717475f0c21106e9d9932eef570419 100644 (file)
@@ -1274,7 +1274,8 @@ static void ui_item_rna_size(
        if (!w) {
                if (type == PROP_ENUM && icon_only) {
                        w = ui_text_icon_width(layout, "", ICON_BLANK1, 0);
-                       w += 0.6f * UI_UNIT_X;
+                       if (index != RNA_ENUM_VALUE)
+                               w += 0.6f * UI_UNIT_X;
                }
                else {
                        w = ui_text_icon_width(layout, name, icon, 0);
index 621807d111cc7b4a0447a8b91ad1f814b428045e..3676066a39927b6afd2f2f397796341be70ae986 100644 (file)
@@ -164,8 +164,8 @@ typedef struct MLoop {
  *     MEdge *ed = &medge[mloop[lt->tri[j]].e];
  *     unsigned int tri_edge[2]  = {mloop[lt->tri[j]].v, mloop[lt->tri[j_next]].v};
  *
- *     if (ELEM(ed->v1, tri_edge[0], tri_edge[1]) &&
- *         ELEM(ed->v2, tri_edge[0], tri_edge[1]))
+ *     if (((ed->v1 == tri_edge[0]) && (ed->v1 == tri_edge[1])) ||
+ *         ((ed->v1 == tri_edge[1]) && (ed->v1 == tri_edge[0])))
  *     {
  *         printf("real edge found %u %u\n", tri_edge[0], tri_edge[1]);
  *     }
index 4552c7730975c70d12dc9be84fcc40cb68caf303..9d68c05dda04c086e99c5e2222f0c40faa3f26f6 100644 (file)
@@ -507,7 +507,7 @@ static void rna_float_print(FILE *f, float num)
 {
        if (num == -FLT_MAX) fprintf(f, "-FLT_MAX");
        else if (num == FLT_MAX) fprintf(f, "FLT_MAX");
-       else if ((int64_t)num == num) fprintf(f, "%.1ff", num);
+       else if ((ABS(num) < INT64_MAX) && ((int64_t)num == num)) fprintf(f, "%.1ff", num);
        else fprintf(f, "%.10ff", num);
 }