#include "util_cuda.h"
#include "util_debug.h"
+#include "util_foreach.h"
#include "util_math.h"
#include "util_opencl.h"
#include "util_opengl.h"
{
}
-void DeviceTask::split(ThreadQueue<DeviceTask>& tasks, int num)
+void DeviceTask::split_max_size(list<DeviceTask>& tasks, int max_size)
+{
+ int num;
+
+ if(type == DISPLACE) {
+ num = (displace_w + max_size - 1)/max_size;
+ }
+ else {
+ max_size = max(1, max_size/w);
+ num = (h + max_size - 1)/max_size;
+ }
+
+ split(tasks, num);
+}
+
+void DeviceTask::split(ThreadQueue<DeviceTask>& queue, int num)
+{
+ list<DeviceTask> tasks;
+ split(tasks, num);
+
+ foreach(DeviceTask& task, tasks)
+ queue.push(task);
+}
+
+void DeviceTask::split(list<DeviceTask>& tasks, int num)
{
if(type == DISPLACE) {
num = min(displace_w, num);
task.displace_x = tx;
task.displace_w = tw;
- tasks.push(task);
+ tasks.push_back(task);
}
}
else {
task.y = ty;
task.h = th;
- tasks.push(task);
+ tasks.push_back(task);
}
}
}
#include "device_memory.h"
+#include "util_list.h"
#include "util_string.h"
#include "util_thread.h"
#include "util_types.h"
int displace_x, displace_w;
DeviceTask(Type type = PATH_TRACE);
+
+ void split(list<DeviceTask>& tasks, int num);
void split(ThreadQueue<DeviceTask>& tasks, int num);
+ void split_max_size(list<DeviceTask>& tasks, int max_size);
};
/* Device */
#include "device.h"
#include "device_intern.h"
+#include "util_foreach.h"
#include "util_map.h"
#include "util_math.h"
#include "util_md5.h"
map<string, device_memory*> mem_map;
device_ptr null_mem;
bool device_initialized;
+ string platform_name;
const char *opencl_error_string(cl_int err)
{
if(opencl_error(ciErr))
return;
+ char name[256];
+ clGetPlatformInfo(cpPlatform, CL_PLATFORM_NAME, sizeof(name), &name, NULL);
+ platform_name = name;
+
cxContext = clCreateContext(0, 1, &cdDevice, NULL, NULL, &ciErr);
if(opencl_error(ciErr))
return;
{
char version[256];
- int major, minor, req_major = 1, req_minor = 0;
+ int major, minor, req_major = 1, req_minor = 1;
clGetPlatformInfo(cpPlatform, CL_PLATFORM_VERSION, sizeof(version), &version, NULL);
{
string build_options = " -cl-fast-relaxed-math ";
- /* Full Shading only on NVIDIA cards at the moment */
- char vendor[256];
-
- clGetPlatformInfo(cpPlatform, CL_PLATFORM_NAME, sizeof(vendor), &vendor, NULL);
- string name = vendor;
-
- if(name == "NVIDIA CUDA")
- build_options += "-D__KERNEL_SHADING__ -D__MULTI_CLOSURE__ ";
+ /* full shading only on NVIDIA cards at the moment */
+ if(platform_name == "NVIDIA CUDA")
+ build_options += "-D__KERNEL_SHADING__ -D__MULTI_CLOSURE__ -cl-nv-maxrregcount=24 -cl-nv-verbose ";
+ if(platform_name == "Apple")
+ build_options += " -D__CL_NO_FLOAT3__ ";
return build_options;
}
opencl_assert(clFinish(cqCommandQueue));
}
- void task_add(DeviceTask& task)
+ void task_add(DeviceTask& maintask)
{
- if(task.type == DeviceTask::TONEMAP)
- tonemap(task);
- else if(task.type == DeviceTask::PATH_TRACE)
- path_trace(task);
+ list<DeviceTask> tasks;
+
+ /* arbitrary limit to work around apple ATI opencl issue */
+ if(platform_name == "Apple")
+ maintask.split_max_size(tasks, 76800);
+ else
+ tasks.push_back(maintask);
+
+ DeviceTask task;
+
+ foreach(DeviceTask& task, tasks) {
+ if(task.type == DeviceTask::TONEMAP)
+ tonemap(task);
+ else if(task.type == DeviceTask::PATH_TRACE)
+ path_trace(task);
+ }
}
void task_wait()
#set(KERNEL_PREPROCESSED ${CMAKE_CURRENT_BINARY_DIR}/kernel_preprocessed.cl)
#add_custom_command(
# OUTPUT ${KERNEL_PREPROCESSED}
-# COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DWITH_OPENCL -o ${KERNEL_PREPROCESSED}
+# COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -o ${KERNEL_PREPROCESSED}
# DEPENDS ${SRC_KERNEL} ${SRC_UTIL_HEADERS})
#add_custom_target(cycles_kernel_preprocess ALL DEPENDS ${KERNEL_PREPROCESSED})
#delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel)
/* no namespaces in opencl */
#define CCL_NAMESPACE_BEGIN
#define CCL_NAMESPACE_END
-#define WITH_OPENCL
+
+#ifdef __CL_NO_FLOAT3__
+#define float3 float4
+#endif
+
+#ifdef __CL_NOINLINE__
+#define __noinline __attribute__((noinline))
+#else
+#define __noinline
+#endif
/* in opencl all functions are device functions, so leave this empty */
#define __device
-#define __device_inline
-#define __device_noinline
+#define __device_inline __device
+#define __device_noinline __device __noinline
/* no assert in opencl */
#define kernel_assert(cond)
#endif
#define make_float2(x, y) ((float2)(x, y))
+#ifdef __CL_NO_FLOAT3__
+#define make_float3(x, y, z) ((float4)(x, y, z, 0.0))
+#else
#define make_float3(x, y, z) ((float3)(x, y, z))
+#endif
#define make_float4(x, y, z, w) ((float4)(x, y, z, w))
#define make_int2(x, y) ((int2)(x, y))
#define make_int3(x, y, z) ((int3)(x, y, z))