}
int2 CPUSplitKernel::split_kernel_global_size(device_memory& /*kg*/, device_memory& /*data*/, DeviceTask * /*task*/) {
- return make_int2(64, 1);
+ return make_int2(1, 1);
}
uint64_t CPUSplitKernel::state_buffer_size(device_memory& kernel_globals, device_memory& /*data*/, size_t num_threads) {
/* Calculate max groups */
/* Denotes the maximum work groups possible w.r.t. current requested tile size. */
- unsigned int max_work_groups = num_global_elements / WORK_POOL_SIZE + 1;
+ unsigned int work_pool_size = (device->info.type == DEVICE_CPU) ? WORK_POOL_SIZE_CPU : WORK_POOL_SIZE_GPU;
+ unsigned int max_work_groups = num_global_elements / work_pool_size + 1;
/* Allocate work_pool_wgs memory. */
work_pool_wgs.resize(max_work_groups * sizeof(unsigned int));
#define VOLUME_STACK_SIZE 16
-#define WORK_POOL_SIZE 64
+#define WORK_POOL_SIZE_GPU 64
+#define WORK_POOL_SIZE_CPU 1
+#ifdef __KERNEL_GPU__
+# define WORK_POOL_SIZE WORK_POOL_SIZE_GPU
+#else
+# define WORK_POOL_SIZE WORK_POOL_SIZE_CPU
+#endif
/* device capabilities */
#ifdef __KERNEL_CPU__