Cycles OpenCL: patch #35514 by Doug Gale
authorBrecht Van Lommel <brechtvanlommel@pandora.be>
Mon, 27 May 2013 16:21:07 +0000 (16:21 +0000)
committerBrecht Van Lommel <brechtvanlommel@pandora.be>
Mon, 27 May 2013 16:21:07 +0000 (16:21 +0000)
* Support using devices from all OpenCL platforms, so that you can use e.g. both
  Intel and NVidia OpenCL implementations if you have them installed.
* Fix compile error due to missing fmodf after recent math node change.
* Enable advanced shading for Intel OpenCL.
* CYCLES_OPENCL_DEBUG environment variable for generating debug symbols so you
  can debug with gdb. This crashes the compiler with Intel OpenCL on Linux though.
  To make this work the preprocessed kernel source code is written out, as gdb
  needs this.
* Show OpenCL compiler warnings even if the build succeeded.
* Some small fixes to initialize cdDevice to NULL, add missing NULL check when
  creating buffer and add missing space at end of build options for Apple OpenCL.
* Fix crash with multi device + opencl, now e.g. CPU + GPU render should work.

I did a few tweaks to the code and also:

* Fix viewport render failing sometimes with Apple CPU OpenCL, was not taking
  workgroup size limits into account properly.
* Add compile error when advanced shading in the Blender binary and OpenCL kernel
  are not in sync.

intern/cycles/device/device_opencl.cpp
intern/cycles/kernel/kernel_compat_opencl.h
intern/cycles/kernel/kernel_types.h
intern/cycles/util/util_path.cpp
intern/cycles/util/util_path.h

index 4608c06c3d505c9cc3f1ef845e9d6f2838724185..8e14c281155c0d70be99d4d6e9f9b2ba04c79028 100644 (file)
@@ -38,7 +38,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-#define CL_MEM_PTR(p) ((cl_mem)(unsigned long)(p))
+#define CL_MEM_PTR(p) ((cl_mem)(uintptr_t)(p))
 
 static cl_device_type opencl_device_type()
 {
@@ -57,7 +57,57 @@ static cl_device_type opencl_device_type()
                        return CL_DEVICE_TYPE_ACCELERATOR;
        }
 
-       return CL_DEVICE_TYPE_GPU|CL_DEVICE_TYPE_ACCELERATOR;
+       return CL_DEVICE_TYPE_ALL;
+}
+
+static bool opencl_kernel_use_debug()
+{
+       return (getenv("CYCLES_OPENCL_DEBUG") != NULL);
+}
+
+static bool opencl_kernel_use_advanced_shading(const string& platform)
+{
+       /* keep this in sync with kernel_types.h! */
+       if(platform == "NVIDIA CUDA")
+               return false;
+       else if(platform == "Apple")
+               return false;
+       else if(platform == "AMD Accelerated Parallel Processing")
+               return false;
+       else if(platform == "Intel(R) OpenCL")
+               return true;
+
+       return false;
+}
+
+static string opencl_kernel_build_options(const string& platform, const string *debug_src = NULL)
+{
+       string build_options = " -cl-fast-relaxed-math ";
+
+       if(platform == "NVIDIA CUDA")
+               build_options += "-D__KERNEL_OPENCL_NVIDIA__ -cl-nv-maxrregcount=24 -cl-nv-verbose ";
+
+       else if(platform == "Apple")
+               build_options += "-D__KERNEL_OPENCL_APPLE__ -Wno-missing-prototypes ";
+
+       else if(platform == "AMD Accelerated Parallel Processing")
+               build_options += "-D__KERNEL_OPENCL_AMD__ ";
+
+       else if(platform == "Intel(R) OpenCL") {
+               build_options += "-D__KERNEL_OPENCL_INTEL_CPU__";
+
+               /* options for gdb source level kernel debugging. this segfaults on linux currently */
+               if(opencl_kernel_use_debug() && debug_src)
+                       build_options += "-g -s \"" + *debug_src + "\"";
+       }
+
+       if(opencl_kernel_use_debug())
+               build_options += "-D__KERNEL_OPENCL_DEBUG__ ";
+
+       if (opencl_kernel_use_advanced_shading(platform))
+               build_options += "-D__KERNEL_OPENCL_NEED_ADVANCED_SHADING__ ";
+       
+       return build_options;
 }
 
 class OpenCLDevice : public Device
@@ -72,9 +122,14 @@ public:
        cl_kernel ckPathTraceKernel;
        cl_kernel ckFilmConvertKernel;
        cl_int ciErr;
-       map<string, device_vector<uchar>*> const_mem_map;
-       map<string, device_memory*> mem_map;
+
+       typedef map<string, device_vector<uchar>*> ConstMemMap;
+       typedef map<string, device_ptr> MemMap;
+
+       ConstMemMap const_mem_map;
+       MemMap mem_map;
        device_ptr null_mem;
+
        bool device_initialized;
        string platform_name;
 
@@ -169,6 +224,7 @@ public:
        {
                background = background_;
                cpPlatform = NULL;
+               cdDevice = NULL;
                cxContext = NULL;
                cqCommandQueue = NULL;
                cpProgram = NULL;
@@ -189,38 +245,64 @@ public:
                        return;
                }
 
-               ciErr = clGetPlatformIDs(1, &cpPlatform, NULL);
+               vector<cl_platform_id> platforms(num_platforms, NULL);
+
+               ciErr = clGetPlatformIDs(num_platforms, &platforms[0], NULL);
                if(opencl_error(ciErr))
                        return;
 
-               char name[256];
-               clGetPlatformInfo(cpPlatform, CL_PLATFORM_NAME, sizeof(name), &name, NULL);
-               platform_name = name;
+               int num_base = 0;
+               int total_devices = 0;
 
-               /* get devices */
-               vector<cl_device_id> device_ids;
-               cl_uint num_devices;
+               for (int platform = 0; platform < num_platforms; platform++) {
+                       cl_uint num_devices;
 
-               if(opencl_error(clGetDeviceIDs(cpPlatform, opencl_device_type(), 0, NULL, &num_devices)))
-                       return;
+                       if(opencl_error(clGetDeviceIDs(platforms[platform], opencl_device_type(), 0, NULL, &num_devices)))
+                               return;
 
-               if(info.num > num_devices) {
-                       if(num_devices == 0)
-                               opencl_error("OpenCL: no devices found.");
-                       else
-                               opencl_error("OpenCL: specified device not found.");
-                       return;
+                       total_devices += num_devices;
+
+                       if(info.num - num_base >= num_devices) {
+                               /* num doesn't refer to a device in this platform */
+                               num_base += num_devices;
+                               continue;
+                       }
+
+                       /* device is in this platform */
+                       cpPlatform = platforms[platform];
+
+                       /* get devices */
+                       vector<cl_device_id> device_ids(num_devices, NULL);
+
+                       if(opencl_error(clGetDeviceIDs(cpPlatform, opencl_device_type(), num_devices, &device_ids[0], NULL)))
+                               return;
+
+                       cdDevice = device_ids[info.num - num_base];
+
+                       char name[256];
+                       clGetPlatformInfo(cpPlatform, CL_PLATFORM_NAME, sizeof(name), &name, NULL);
+                       platform_name = name;
+
+                       break;
                }
 
-               device_ids.resize(num_devices);
-               
-               if(opencl_error(clGetDeviceIDs(cpPlatform, opencl_device_type(), num_devices, &device_ids[0], NULL)))
+               if(total_devices == 0) {
+                       opencl_error("OpenCL: no devices found.");
                        return;
+               }
+               else if (!cdDevice) {
+                       opencl_error("OpenCL: specified device not found.");
+                       return;
+               }
 
-               cdDevice = device_ids[info.num];
+               /* Create context properties array to specify platform */
+               const cl_context_properties context_props[] = {
+                       CL_CONTEXT_PLATFORM, (cl_context_properties)cpPlatform,
+                       0, 0
+               };
 
                /* create context */
-               cxContext = clCreateContext(0, 1, &cdDevice, NULL, NULL, &ciErr);
+               cxContext = clCreateContext(context_props, 1, &cdDevice, NULL, NULL, &ciErr);
                if(opencl_error(ciErr))
                        return;
 
@@ -229,6 +311,9 @@ public:
                        return;
 
                null_mem = (device_ptr)clCreateBuffer(cxContext, CL_MEM_READ_ONLY, 1, NULL, &ciErr);
+               if(opencl_error(ciErr))
+                       return;
+
                device_initialized = true;
        }
 
@@ -265,7 +350,7 @@ public:
                return true;
        }
 
-       bool load_binary(const string& kernel_path, const string& clbin)
+       bool load_binary(const string& kernel_path, const string& clbin, const string *debug_src = NULL)
        {
                /* read binary into memory */
                vector<uint8_t> binary;
@@ -288,7 +373,7 @@ public:
                        return false;
                }
 
-               if(!build_kernel(kernel_path))
+               if(!build_kernel(kernel_path, debug_src))
                        return false;
 
                return true;
@@ -315,51 +400,35 @@ public:
                return true;
        }
 
-       string kernel_build_options()
-       {
-               string build_options = " -cl-fast-relaxed-math ";
-               
-               if(platform_name == "NVIDIA CUDA")
-                       build_options += "-D__KERNEL_OPENCL_NVIDIA__ -cl-nv-maxrregcount=24 -cl-nv-verbose ";
-
-               else if(platform_name == "Apple")
-                       build_options += "-D__KERNEL_OPENCL_APPLE__ -Wno-missing-prototypes";
-
-               else if(platform_name == "AMD Accelerated Parallel Processing")
-                       build_options += "-D__KERNEL_OPENCL_AMD__ ";
-
-               return build_options;
-       }
-
-       bool build_kernel(const string& kernel_path)
+       bool build_kernel(const string& kernel_path, const string *debug_src = NULL)
        {
-               string build_options = kernel_build_options();
+               string build_options = opencl_kernel_build_options(platform_name, debug_src);
        
                ciErr = clBuildProgram(cpProgram, 0, NULL, build_options.c_str(), NULL, NULL);
 
-               if(ciErr != CL_SUCCESS) {
-                       /* show build errors */
-                       char *build_log;
-                       size_t ret_val_size;
+               /* show warnings even if build is successful */
+               size_t ret_val_size = 0;
 
-                       clGetProgramBuildInfo(cpProgram, cdDevice, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
+               clGetProgramBuildInfo(cpProgram, cdDevice, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
 
-                       build_log = new char[ret_val_size+1];
-                       clGetProgramBuildInfo(cpProgram, cdDevice, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
+               if(ret_val_size > 1) {
+                       vector<char> build_log(ret_val_size+1);
+                       clGetProgramBuildInfo(cpProgram, cdDevice, CL_PROGRAM_BUILD_LOG, ret_val_size, &build_log[0], NULL);
 
                        build_log[ret_val_size] = '\0';
-                       opencl_error("OpenCL build failed: errors in console");
-                       fprintf(stderr, "%s\n", build_log);
-
-                       delete[] build_log;
+                       fprintf(stderr, "OpenCL kernel build output:\n");
+                       fprintf(stderr, "%s\n", &build_log[0]);
+               }
 
+               if(ciErr != CL_SUCCESS) {
+                       opencl_error("OpenCL build failed: errors in console");
                        return false;
                }
 
                return true;
        }
 
-       bool compile_kernel(const string& kernel_path, const string& kernel_md5)
+       bool compile_kernel(const string& kernel_path, const string& kernel_md5, const string *debug_src = NULL)
        {
                /* we compile kernels consisting of many files. unfortunately opencl
                 * kernel caches do not seem to recognize changes in included files.
@@ -367,6 +436,9 @@ public:
                string source = "#include \"kernel.cl\" // " + kernel_md5 + "\n";
                source = path_source_replace_includes(source, kernel_path);
 
+               if (debug_src)
+                       path_write_text(*debug_src, source);
+
                size_t source_len = source.size();
                const char *source_str = source.c_str();
 
@@ -378,7 +450,7 @@ public:
                double starttime = time_dt();
                printf("Compiling OpenCL kernel ...\n");
 
-               if(!build_kernel(kernel_path))
+               if(!build_kernel(kernel_path, debug_src))
                        return false;
 
                printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime);
@@ -401,7 +473,7 @@ public:
                md5.append((uint8_t*)name, strlen(name));
                md5.append((uint8_t*)driver, strlen(driver));
 
-               string options = kernel_build_options();
+               string options = opencl_kernel_build_options(platform_name);
                md5.append((uint8_t*)options.c_str(), options.size());
 
                return md5.get_hex();
@@ -424,18 +496,26 @@ public:
                string kernel_md5 = path_files_md5_hash(kernel_path);
                string device_md5 = device_md5_hash();
 
-               /* try to use cache binary */
+               /* path to cached binary */
                string clbin = string_printf("cycles_kernel_%s_%s.clbin", device_md5.c_str(), kernel_md5.c_str());
                clbin = path_user_get(path_join("cache", clbin));
 
-               if(path_exists(clbin)) {
-                       /* if exists already, try use it */
-                       if(!load_binary(kernel_path, clbin))
-                               return false;
+               /* path to preprocessed source for debugging */
+               string clsrc, *debug_src = NULL;
+               
+               if (opencl_kernel_use_debug()) {
+                       clsrc = string_printf("cycles_kernel_%s_%s.cl", device_md5.c_str(), kernel_md5.c_str());
+                       clsrc = path_user_get(path_join("cache", clsrc));
+                       debug_src = &clsrc;
+               }
+
+               /* if exists already, try use it */
+               if(path_exists(clbin) && load_binary(kernel_path, clbin, debug_src)) {
+                       /* kernel loaded from binary */
                }
                else {
-                       /* compile kernel */
-                       if(!compile_kernel(kernel_path, kernel_md5))
+                       /* if does not exist or loading binary failed, compile kernel */
+                       if(!compile_kernel(kernel_path, kernel_md5, debug_src))
                                return false;
 
                        /* save binary for reuse */
@@ -461,7 +541,7 @@ public:
                if(null_mem)
                        clReleaseMemObject(CL_MEM_PTR(null_mem));
 
-               map<string, device_vector<uchar>*>::iterator mt;
+               ConstMemMap::iterator mt;
                for(mt = const_mem_map.begin(); mt != const_mem_map.end(); mt++) {
                        mem_free(*(mt->second));
                        delete mt->second;
@@ -533,26 +613,29 @@ public:
 
        void const_copy_to(const char *name, void *host, size_t size)
        {
-               if(const_mem_map.find(name) == const_mem_map.end()) {
+               ConstMemMap::iterator i = const_mem_map.find(name);
+
+               if(i == const_mem_map.end()) {
                        device_vector<uchar> *data = new device_vector<uchar>();
                        data->copy((uchar*)host, size);
 
                        mem_alloc(*data, MEM_READ_ONLY);
-                       const_mem_map[name] = data;
+                       i = const_mem_map.insert(ConstMemMap::value_type(name, data)).first;
                }
                else {
-                       device_vector<uchar> *data = const_mem_map[name];
+                       device_vector<uchar> *data = i->second;
                        data->copy((uchar*)host, size);
                }
 
-               mem_copy_to(*const_mem_map[name]);
+               mem_copy_to(*i->second);
        }
 
        void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
        {
                mem_alloc(mem, MEM_READ_ONLY);
                mem_copy_to(mem);
-               mem_map[name] = &mem;
+               assert(mem_map.find(name) == mem_map.end());
+               mem_map.insert(MemMap::value_type(name, mem.device_pointer));
        }
 
        void tex_free(device_memory& mem)
@@ -567,6 +650,33 @@ public:
                return global_size + ((r == 0)? 0: group_size - r);
        }
 
+       void enqueue_kernel(cl_kernel kernel, size_t w, size_t h)
+       {
+               size_t workgroup_size, max_work_items[3];
+
+               clGetKernelWorkGroupInfo(kernel, cdDevice,
+                       CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &workgroup_size, NULL);
+               clGetDeviceInfo(cdDevice,
+                       CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t)*3, max_work_items, NULL);
+       
+               /* try to divide evenly over 2 dimensions */
+               size_t sqrt_workgroup_size = max(sqrt((double)workgroup_size), 1.0);
+               size_t local_size[2] = {sqrt_workgroup_size, sqrt_workgroup_size};
+
+               /* some implementations have max size 1 on 2nd dimension */
+               if (local_size[1] > max_work_items[1]) {
+                       local_size[0] = workgroup_size/max_work_items[1];
+                       local_size[1] = max_work_items[1];
+               }
+
+               size_t global_size[2] = {global_size_round_up(local_size[0], w), global_size_round_up(local_size[1], h)};
+
+               /* run kernel */
+               ciErr = clEnqueueNDRangeKernel(cqCommandQueue, kernel, 2, NULL, global_size, local_size, 0, NULL, NULL);
+               opencl_assert(ciErr);
+               opencl_assert(clFinish(cqCommandQueue));
+       }
+
        void path_trace(RenderTile& rtile, int sample)
        {
                /* cast arguments to cl types */
@@ -582,7 +692,7 @@ public:
                cl_int d_stride = rtile.stride;
 
                /* sample arguments */
-               int narg = 0;
+               cl_uint narg = 0;
                ciErr = 0;
 
                ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_data), (void*)&d_data);
@@ -603,31 +713,17 @@ public:
 
                opencl_assert(ciErr);
 
-               size_t workgroup_size;
-
-               clGetKernelWorkGroupInfo(ckPathTraceKernel, cdDevice,
-                       CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &workgroup_size, NULL);
-       
-               workgroup_size = max(sqrt((double)workgroup_size), 1.0);
-
-               size_t local_size[2] = {workgroup_size, workgroup_size};
-               size_t global_size[2] = {global_size_round_up(local_size[0], d_w), global_size_round_up(local_size[1], d_h)};
-
-               /* run kernel */
-               ciErr = clEnqueueNDRangeKernel(cqCommandQueue, ckPathTraceKernel, 2, NULL, global_size, local_size, 0, NULL, NULL);
-               opencl_assert(ciErr);
-               opencl_assert(clFinish(cqCommandQueue));
+               enqueue_kernel(ckPathTraceKernel, d_w, d_h);
        }
 
-       cl_int set_kernel_arg_mem(cl_kernel kernel, int *narg, const char *name)
+       cl_int set_kernel_arg_mem(cl_kernel kernel, cl_uint *narg, const char *name)
        {
                cl_mem ptr;
                cl_int err = 0;
 
-               if(mem_map.find(name) != mem_map.end()) {
-                       device_memory *mem = mem_map[name];
-               
-                       ptr = CL_MEM_PTR(mem->device_pointer);
+               MemMap::iterator i = mem_map.find(name);
+               if(i != mem_map.end()) {
+                       ptr = CL_MEM_PTR(i->second);
                }
                else {
                        /* work around NULL not working, even though the spec says otherwise */
@@ -655,7 +751,7 @@ public:
                cl_int d_stride = task.stride;
 
                /* sample arguments */
-               int narg = 0;
+               cl_uint narg = 0;
                ciErr = 0;
 
                ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_data), (void*)&d_data);
@@ -676,20 +772,7 @@ public:
 
                opencl_assert(ciErr);
 
-               size_t workgroup_size;
-
-               clGetKernelWorkGroupInfo(ckFilmConvertKernel, cdDevice,
-                       CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &workgroup_size, NULL);
-       
-               workgroup_size = max(sqrt((double)workgroup_size), 1.0);
-
-               size_t local_size[2] = {workgroup_size, workgroup_size};
-               size_t global_size[2] = {global_size_round_up(local_size[0], d_w), global_size_round_up(local_size[1], d_h)};
-
-               /* run kernel */
-               ciErr = clEnqueueNDRangeKernel(cqCommandQueue, ckFilmConvertKernel, 2, NULL, global_size, local_size, 0, NULL, NULL);
-               opencl_assert(ciErr);
-               opencl_assert(clFinish(cqCommandQueue));
+               enqueue_kernel(ckFilmConvertKernel, d_w, d_h);
        }
 
        void thread_run(DeviceTask *task)
@@ -769,34 +852,44 @@ void device_opencl_info(vector<DeviceInfo>& devices)
        if(clGetPlatformIDs(num_platforms, &platform_ids[0], NULL) != CL_SUCCESS)
                return;
 
-       if(clGetDeviceIDs(platform_ids[0], opencl_device_type(), 0, NULL, &num_devices) != CL_SUCCESS || num_devices == 0)
-               return;
-       
-       device_ids.resize(num_devices);
+       /* devices are numbered consecutively across platforms */
+       int num_base = 0;
 
-       if(clGetDeviceIDs(platform_ids[0], opencl_device_type(), num_devices, &device_ids[0], NULL) != CL_SUCCESS)
-               return;
-       
-       /* add devices */
-       for(int num = 0; num < num_devices; num++) {
-               cl_device_id device_id = device_ids[num];
-               char name[1024] = "\0";
+       for (int platform = 0; platform < num_platforms; platform++, num_base += num_devices) {
+               num_devices = 0;
+               if(clGetDeviceIDs(platform_ids[platform], opencl_device_type(), 0, NULL, &num_devices) != CL_SUCCESS || num_devices == 0)
+                       continue;
 
-               if(clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(name), &name, NULL) != CL_SUCCESS)
+               device_ids.resize(num_devices);
+
+               if(clGetDeviceIDs(platform_ids[platform], opencl_device_type(), num_devices, &device_ids[0], NULL) != CL_SUCCESS)
                        continue;
 
-               DeviceInfo info;
+               char pname[256];
+               clGetPlatformInfo(platform_ids[platform], CL_PLATFORM_NAME, sizeof(pname), &pname, NULL);
+               string platform_name = pname;
+
+               /* add devices */
+               for(int num = 0; num < num_devices; num++) {
+                       cl_device_id device_id = device_ids[num];
+                       char name[1024] = "\0";
 
-               info.type = DEVICE_OPENCL;
-               info.description = string(name);
-               info.id = string_printf("OPENCL_%d", num);
-               info.num = num;
-               /* we don't know if it's used for display, but assume it is */
-               info.display_device = true;
-               info.advanced_shading = false;
-               info.pack_images = true;
+                       if(clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(name), &name, NULL) != CL_SUCCESS)
+                               continue;
 
-               devices.push_back(info);
+                       DeviceInfo info;
+
+                       info.type = DEVICE_OPENCL;
+                       info.description = string(name);
+                       info.num = num_base + num;
+                       info.id = string_printf("OPENCL_%d", info.num);
+                       /* we don't know if it's used for display, but assume it is */
+                       info.display_device = true;
+                       info.advanced_shading = opencl_kernel_use_advanced_shading(platform_name);
+                       info.pack_images = true;
+
+                       devices.push_back(info);
+               }
        }
 }
 
index 6c41bfa552188e1e8943ec0904a700bd7712c1f0..66cf0bb996b368af25bddd4262b351579cd44164 100644 (file)
 #define atan2f(x, y) atan2(((float)x), ((float)y))
 #define fmaxf(x, y) fmax(((float)x), ((float)y))
 #define fminf(x, y) fmin(((float)x), ((float)y))
+#define fmodf(x, y) fmod((float)x, (float)y)
 
 /* data lookup defines */
 #define kernel_data (*kg->data)
index 535b948998519a610d83a2c5ce19eae269935027..1dcd3a52b6af8117698267e5d71765acfd3653f3 100644 (file)
@@ -66,9 +66,11 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __KERNEL_OPENCL__
 
+/* keep __KERNEL_ADV_SHADING__ in sync with opencl_kernel_use_advanced_shading! */
+
 #ifdef __KERNEL_OPENCL_NVIDIA__
 #define __KERNEL_SHADING__
-#define __MULTI_CLOSURE__
+//#define __KERNEL_ADV_SHADING__
 #endif
 
 #ifdef __KERNEL_OPENCL_APPLE__
@@ -85,6 +87,11 @@ CCL_NAMESPACE_BEGIN
 #define __EXTRA_NODES__
 #endif
 
+#ifdef __KERNEL_OPENCL_INTEL_CPU__
+#define __KERNEL_SHADING__
+#define __KERNEL_ADV_SHADING__
+#endif
+
 #endif
 
 /* kernel features */
@@ -122,7 +129,12 @@ CCL_NAMESPACE_BEGIN
 #define __OBJECT_MOTION__
 #define __HAIR__
 #endif
-//#define __SOBOL_FULL_SCREEN__
+
+/* Sanity check */
+
+#if defined(__KERNEL_OPENCL_NEED_ADVANCED_SHADING__) && !defined(__MULTI_CLOSURE__)
+#error "OpenCL: mismatch between advanced shading flags in device_opencl.cpp and kernel_types.h"
+#endif
 
 /* Shader Evaluation */
 
index 8cf23bc6a764f5fa1d89a8ca272992b711f8d0de..79062fe251fdf919393bcb60c6dc952cbf877898 100644 (file)
@@ -145,6 +145,14 @@ bool path_write_binary(const string& path, const vector<uint8_t>& binary)
        return true;
 }
 
+bool path_write_text(const string& path, string& text)
+{
+       vector<uint8_t> binary(text.length(), 0);
+       std::copy(text.begin(), text.end(), binary.begin());
+
+       return path_write_binary(path, binary);
+}
+
 bool path_read_binary(const string& path, vector<uint8_t>& binary)
 {
        binary.resize(boost::filesystem::file_size(path));
@@ -176,7 +184,7 @@ bool path_read_text(const string& path, string& text)
 
        if(!path_exists(path) || !path_read_binary(path, binary))
                return false;
-       
+
        const char *str = (const char*)&binary[0];
        size_t size = binary.size();
        text = string(str, size);
index 89e4452ecd91950e7b437ddc70fc4dd67c49c30b..d5257e79c056d89d5b33f4d302bab11a73570a40 100644 (file)
@@ -44,6 +44,7 @@ string path_files_md5_hash(const string& dir);
 
 void path_create_directories(const string& path);
 bool path_write_binary(const string& path, const vector<uint8_t>& binary);
+bool path_write_text(const string& path, string& text);
 bool path_read_binary(const string& path, vector<uint8_t>& binary);
 bool path_read_text(const string& path, string& text);