Cycles: some steps to getting OpenCL backend to compile.
authorBrecht Van Lommel <brechtvanlommel@pandora.be>
Fri, 20 May 2011 12:26:01 +0000 (12:26 +0000)
committerBrecht Van Lommel <brechtvanlommel@pandora.be>
Fri, 20 May 2011 12:26:01 +0000 (12:26 +0000)
25 files changed:
intern/cycles/device/device_opencl.cpp
intern/cycles/kernel/CMakeLists.txt
intern/cycles/kernel/kernel.cl
intern/cycles/kernel/kernel_compat_cuda.h
intern/cycles/kernel/kernel_compat_opencl.h
intern/cycles/kernel/kernel_globals.h
intern/cycles/kernel/kernel_light.h
intern/cycles/kernel/kernel_textures.h [new file with mode: 0644]
intern/cycles/kernel/kernel_triangle.h
intern/cycles/kernel/kernel_types.h
intern/cycles/kernel/svm/bsdf_ashikhmin_velvet.h
intern/cycles/kernel/svm/bsdf_diffuse.h
intern/cycles/kernel/svm/bsdf_microfacet.h
intern/cycles/kernel/svm/bsdf_ward.h
intern/cycles/kernel/svm/bsdf_westin.h
intern/cycles/kernel/svm/svm_blend.h
intern/cycles/kernel/svm/svm_displace.h
intern/cycles/kernel/svm/svm_distorted_noise.h
intern/cycles/kernel/svm/svm_image.h
intern/cycles/kernel/svm/svm_mix.h
intern/cycles/kernel/svm/svm_sky.h
intern/cycles/kernel/svm/svm_texture.h
intern/cycles/kernel/svm/svm_types.h
intern/cycles/util/util_color.h
intern/cycles/util/util_math.h

index 6b564d10e78d3b606d42498de1da66428c507f94..ef416dfb8dc7d91fc742e99dbb7bad0ea9aaf41c 100644 (file)
@@ -55,6 +55,7 @@ public:
        cl_int ciErr;
        map<string, device_vector<uchar>*> const_mem_map;
        map<string, device_memory*> mem_map;
+       device_ptr null_mem;
 
        const char *opencl_error_string(cl_int err)
        {
@@ -125,10 +126,10 @@ public:
                ciErr = clGetPlatformIDs(1, &cpPlatform, NULL);
                opencl_assert(ciErr);
 
-               ciErr = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, 1, &cdDevice, NULL);
+               ciErr = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_CPU, 1, &cdDevice, NULL);
                opencl_assert(ciErr);
 
-               cxGPUContext = clCreateContext(0, 1, &cdDevice, NULL /*clLogMessagesToStdoutAPPLE */, NULL, &ciErr);
+               cxGPUContext = clCreateContext(0, 1, &cdDevice, NULL, NULL, &ciErr);
                opencl_assert(ciErr);
 
                cqCommandQueue = clCreateCommandQueue(cxGPUContext, cdDevice, 0, &ciErr);
@@ -137,10 +138,16 @@ public:
                /* compile kernel */
                string source = string_printf("#include \"kernel.cl\" // %lf\n", time_dt());
                size_t source_len = source.size();
-               string build_options = "-I ../kernel -I ../util -Werror -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END="; //" + path_get("kernel") + " -Werror";
-               //printf("path %s\n", path_get("kernel").c_str());
 
-               //clUnloadCompiler();
+               string build_options = "";
+
+               //string csource = "../blender/intern/cycles";
+               //build_options += "-I " + csource + "/kernel -I " + csource + "/util";
+
+               build_options += " -I " + path_get("kernel"); /* todo: escape path */
+
+               build_options += " -Werror";
+               build_options += " -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END=";
 
                cpProgram = clCreateProgramWithSource(cxGPUContext, 1, (const char **)&source, &source_len, &ciErr);
 
@@ -170,10 +177,15 @@ public:
                opencl_assert(ciErr);
                ckFilmConvertKernel = clCreateKernel(cpProgram, "kernel_ocl_tonemap", &ciErr);
                opencl_assert(ciErr);
+
+               null_mem = (device_ptr)clCreateBuffer(cxGPUContext, CL_MEM_READ_ONLY, 1, NULL, &ciErr);
        }
 
        ~OpenCLDevice()
        {
+
+               clReleaseMemObject(CL_MEM_PTR(null_mem));
+
                map<string, device_vector<uchar>*>::iterator mt;
                for(mt = const_mem_map.begin(); mt != const_mem_map.end(); mt++) {
                        mem_free(*(mt->second));
@@ -261,6 +273,7 @@ public:
        void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
        {
                mem_alloc(mem, MEM_READ_ONLY);
+               mem_copy_to(mem);
                mem_map[name] = &mem;
        }
 
@@ -295,6 +308,11 @@ public:
                ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_data), (void*)&d_data);
                ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_buffer), (void*)&d_buffer);
                ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_rng_state), (void*)&d_rng_state);
+
+#define KERNEL_TEX(type, ttype, name) \
+       ciErr |= set_kernel_arg_mem(ckPathTraceKernel, &narg, #name);
+#include "kernel_textures.h"
+
                ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_pass), (void*)&d_pass);
                ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_x), (void*)&d_x);
                ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_y), (void*)&d_y);
@@ -314,10 +332,20 @@ public:
 
        cl_int set_kernel_arg_mem(cl_kernel kernel, int *narg, const char *name)
        {
-               device_memory *mem = mem_map[name];
-               cl_mem ptr = CL_MEM_PTR(mem->device_pointer);
-               cl_int size = mem->data_width;
-               cl_int err = 0;
+               cl_mem ptr;
+               cl_int size, err = 0;
+
+               if(mem_map.find(name) != mem_map.end()) {
+                       device_memory *mem = mem_map[name];
+               
+                       ptr = CL_MEM_PTR(mem->device_pointer);
+                       size = mem->data_width;
+               }
+               else {
+                       /* work around NULL not working, even though the spec says otherwise */
+                       ptr = CL_MEM_PTR(null_mem);
+                       size = 1;
+               }
                
                err |= clSetKernelArg(kernel, (*narg)++, sizeof(ptr), (void*)&ptr);
                opencl_assert(err);
@@ -347,9 +375,11 @@ public:
                ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_data), (void*)&d_data);
                ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_rgba), (void*)&d_rgba);
                ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_buffer), (void*)&d_buffer);
-               ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, "__response_curve_R");
-               ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, "__response_curve_G");
-               ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, "__response_curve_B");
+
+#define KERNEL_TEX(type, ttype, name) \
+       ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, #name);
+#include "kernel_textures.h"
+
                ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_pass), (void*)&d_pass);
                ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_resolution), (void*)&d_resolution);
                ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_x), (void*)&d_x);
index f1aa1db9e8cf6e6a36284b90bb6740c8f17e981a..bc1f8bd40a5acd33f1cc92362b31a2c90bab8102 100644 (file)
@@ -25,8 +25,11 @@ SET(headers
        kernel_qbvh.h
        kernel_random.h
        kernel_shader.h
+       kernel_textures.h
        kernel_triangle.h
-       kernel_types.h
+       kernel_types.h)
+
+SET(svm_headers
        svm/bsdf.h
        svm/bsdf_ashikhmin_velvet.h
        svm/bsdf_diffuse.h
@@ -78,7 +81,7 @@ ELSE()
 ENDIF()
 
 IF(WITH_CYCLES_CUDA)
-       SET(cuda_sources kernel.cu ${headers})
+       SET(cuda_sources kernel.cu ${headers} ${svm_headers})
        SET(cuda_cubins)
 
        FOREACH(arch ${CYCLES_CUDA_ARCH})
@@ -106,9 +109,23 @@ ENDIF()
 
 INCLUDE_DIRECTORIES(. ../util osl svm)
 
-ADD_LIBRARY(cycles_kernel ${sources} ${headers})
+ADD_LIBRARY(cycles_kernel ${sources} ${headers} ${svm_headers})
 
 IF(WITH_CYCLES_CUDA)
        ADD_DEPENDENCIES(cycles_kernel cycles_kernel_cuda)
 ENDIF()
 
+# OPENCL kernel
+
+IF(WITH_CYCLES_OPENCL)
+       SET(util_headers
+               ../util/util_color.h
+               ../util/util_math.h
+               ../util/util_transform.h
+               ../util/util_types.h)
+
+       INSTALL(FILES kernel.cl ${headers} DESTINATION ${CYCLES_INSTALL_PATH}/cycles/kernel)
+       INSTALL(FILES ${svm_headers} DESTINATION ${CYCLES_INSTALL_PATH}/cycles/kernel/svm)
+       INSTALL(FILES ${util_headers} DESTINATION ${CYCLES_INSTALL_PATH}/cycles/kernel)
+ENDIF()
+
index a22db5fe040645315fa0a242c835219c25a40cfa..e1a9b3a0696601393ee12408680204466001d46e 100644 (file)
 #include "kernel_types.h"
 #include "kernel_globals.h"
 
-typedef struct KernelGlobals {
-       __constant KernelData *data;
-
-       __global float *__response_curve_R;
-       int __response_curve_R_width;
-
-       __global float *__response_curve_G;
-       int __response_curve_G_width;
-
-       __global float *__response_curve_B;
-       int __response_curve_B_width;
-} KernelGlobals;
-
 #include "kernel_film.h"
-//#include "kernel_path.h"
+#include "kernel_path.h"
 //#include "kernel_displace.h"
 
-__kernel void kernel_ocl_path_trace(__constant KernelData *data, __global float4 *buffer, __global uint *rng_state, int pass, int sx, int sy, int sw, int sh)
+__kernel void kernel_ocl_path_trace(
+       __constant KernelData *data,
+       __global float4 *buffer,
+       __global uint *rng_state,
+
+#define KERNEL_TEX(type, ttype, name) \
+       __global type *name, \
+       int name##_width,
+#include "kernel_textures.h"
+
+       int pass,
+       int sx, int sy, int sw, int sh)
 {
        KernelGlobals kglobals, *kg = &kglobals;
+
        kg->data = data;
 
-       int x = get_global_id(0);
-       int y = get_global_id(1);
+#define KERNEL_TEX(type, ttype, name) \
+       kg->name = name; \
+       kg->name##_width = name##_width;
+#include "kernel_textures.h"
+
+       int x = sx + get_global_id(0);
+       int y = sy + get_global_id(1);
        int w = kernel_data.cam.width;
 
-       if(x < sx + sw && y < sy + sh) {
-               if(pass == 0) {
-                       buffer[x + w*y].x = 0.5f;
-                       buffer[x + w*y].y = 0.5f;
-                       buffer[x + w*y].z = 0.5f;
-               }
-               else {
-                       buffer[x + w*y].x += 0.5f;
-                       buffer[x + w*y].y += 0.5f;
-                       buffer[x + w*y].z += 0.5f;
-               }
-               
-               //= make_float3(1.0f, 0.9f, 0.0f);
-               //kernel_path_trace(buffer, rng_state, pass, x, y);
-       }
+       if(x < sx + sw && y < sy + sh)
+               kernel_path_trace(kg, buffer, rng_state, pass, x, y);
 }
 
 __kernel void kernel_ocl_tonemap(
        __constant KernelData *data,
        __global uchar4 *rgba,
        __global float4 *buffer,
-       __global float *__response_curve_R,
-       int __response_curve_R_width,
-       __global float *__response_curve_G,
-       int __response_curve_G_width,
-       __global float *__response_curve_B,
-       int __response_curve_B_width,
+
+#define KERNEL_TEX(type, ttype, name) \
+       __global type *name, \
+       int name##_width,
+#include "kernel_textures.h"
+
        int pass, int resolution,
        int sx, int sy, int sw, int sh)
 {
        KernelGlobals kglobals, *kg = &kglobals;
 
        kg->data = data;
-       kg->__response_curve_R = __response_curve_R;
-       kg->__response_curve_R_width = __response_curve_R_width;
-       kg->__response_curve_G = __response_curve_G;
-       kg->__response_curve_G_width = __response_curve_G_width;
-       kg->__response_curve_B = __response_curve_B;
-       kg->__response_curve_B_width = __response_curve_B_width;
+
+#define KERNEL_TEX(type, ttype, name) \
+       kg->name = name; \
+       kg->name##_width = name##_width;
+#include "kernel_textures.h"
 
        int x = sx + get_global_id(0);
        int y = sy + get_global_id(1);
@@ -96,10 +86,10 @@ __kernel void kernel_ocl_tonemap(
                kernel_film_tonemap(kg, rgba, buffer, pass, resolution, x, y);
 }
 
-__kernel void kernel_ocl_displace(__global uint4 *input, __global float3 *offset, int sx)
+/*__kernel void kernel_ocl_displace(__global uint4 *input, __global float3 *offset, int sx)
 {
        int x = sx + get_global_id(0);
 
        kernel_displace(input, offset, x);
-}
+}*/
 
index 99c1df1fb1a4ec386b9d704fe1212ad007cdfb1a..b7b29d4632395a2ef53c92aed597abe6c118d1b7 100644 (file)
@@ -35,7 +35,7 @@ CCL_NAMESPACE_BEGIN
 #define __device_inline  __device__ __inline__
 #define __global
 #define __shared __shared__
-#define __constant __constant__
+#define __constant
 
 /* No assert supported for CUDA */
 
index 16ddca5305b73cc237c758d58acffb2d74fe0f17..e6e54850605ce2eb15eee3a4fedf4974307bfcd9 100644 (file)
@@ -29,6 +29,8 @@ CCL_NAMESPACE_BEGIN
 #define __device
 #define __device_inline
 
+#define kernel_assert(cond)
+
 __device float kernel_tex_interp_(__global float *data, int width, float x)
 {
        x = clamp(x, 0.0f, 1.0f)*width;
@@ -40,9 +42,20 @@ __device float kernel_tex_interp_(__global float *data, int width, float x)
        return (1.0f - t)*data[index] + t*data[nindex];
 }
 
+#define make_float3(x, y, z) ((float3)(x, y, z)) /* todo 1.1 */
+
+#define __uint_as_float(x) as_float(x)
+#define __float_as_uint(x) as_uint(x)
+#define __int_as_float(x) as_float(x)
+#define __float_as_int(x) as_int(x)
+
 #define kernel_data (*kg->data)
 #define kernel_tex_interp(t, x) \
-       kernel_tex_interp_(kg->t, kg->t##_width, x);
+       kernel_tex_interp_(kg->t, kg->t##_width, x)
+#define kernel_tex_fetch(t, index) \
+       kg->t[index]
+
+#define NULL 0
 
 CCL_NAMESPACE_END
 
index 4c4f35bb5084d1346980bbda552004ee6a6d78a0..720a9f28fa1b994912fe36a1d99c7aa64cb1cf02 100644 (file)
 
 /* Constant Globals */
 
-#ifdef __KERNEL_CPU__
-
-#ifdef WITH_OSL
-#include "osl_globals.h"
-#endif
-
 CCL_NAMESPACE_BEGIN
 
 /* On the CPU, we pass along the struct KernelGlobals to nearly everywhere in
    the kernel, to access constant data. These are all stored as "textures", but
    these are really just standard arrays. We can't use actually globals because
    multiple renders may be running inside the same process. */
-typedef struct KernelGlobals {
 
-#else
+#ifdef __KERNEL_CPU__
 
-/* On the GPU, constant memory textures must be globals, so we can't put them
-   into a struct. As a result we don't actually use this struct and use actual
-   globals and simply pass along a NULL pointer everywhere, which we hope gets
-   optimized out. */
-#ifdef __KERNEL_CUDA__
-typedef struct KernelGlobals {} KernelGlobals;
+#ifdef WITH_OSL
+//#include "osl_globals.h"
 #endif
 
+typedef struct KernelGlobals {
+
+#define KERNEL_TEX(type, ttype, name) ttype name;
+#define KERNEL_IMAGE_TEX(type, ttype, name) ttype name;
+#include "kernel_textures.h"
+
+       KernelData __data;
+
+#ifdef WITH_OSL
+       /* On the CPU, we also have the OSL globals here. Most data structures are shared
+          with SVM, the difference is in the shaders and object/mesh attributes. */
+       //OSLGlobals osl;
 #endif
 
-/* globals */
-__constant KernelData __data;
-
-#ifndef __KERNEL_OPENCL__
-
-/* bvh */
-texture_float4 __bvh_nodes;
-texture_float4 __tri_woop;
-texture_uint __prim_index;
-texture_uint __prim_object;
-texture_uint __object_node;
-
-/* objects */
-texture_float4 __objects;
-
-/* triangles */
-texture_float4 __tri_normal;
-texture_float4 __tri_vnormal;
-texture_float4 __tri_vindex;
-texture_float4 __tri_verts;
-
-/* attributes */
-texture_uint4 __attributes_map;
-texture_float __attributes_float;
-texture_float4 __attributes_float3;
-
-/* lights */
-texture_float4 __light_distribution;
-texture_float4 __light_point;
-
-/* shaders */
-texture_uint4 __svm_nodes;
-
-/* camera/film */
-texture_float __filter_table;
-texture_float __response_curve_R;
-texture_float __response_curve_G;
-texture_float __response_curve_B;
-
-/* sobol */
-texture_uint __sobol_directions;
-
-/* image */
-texture_image_uchar4 __tex_image_000;
-texture_image_uchar4 __tex_image_001;
-texture_image_uchar4 __tex_image_002;
-texture_image_uchar4 __tex_image_003;
-texture_image_uchar4 __tex_image_004;
-texture_image_uchar4 __tex_image_005;
-texture_image_uchar4 __tex_image_006;
-texture_image_uchar4 __tex_image_007;
-texture_image_uchar4 __tex_image_008;
-texture_image_uchar4 __tex_image_009;
-texture_image_uchar4 __tex_image_010;
-texture_image_uchar4 __tex_image_011;
-texture_image_uchar4 __tex_image_012;
-texture_image_uchar4 __tex_image_013;
-texture_image_uchar4 __tex_image_014;
-texture_image_uchar4 __tex_image_015;
-texture_image_uchar4 __tex_image_016;
-texture_image_uchar4 __tex_image_017;
-texture_image_uchar4 __tex_image_018;
-texture_image_uchar4 __tex_image_019;
-texture_image_uchar4 __tex_image_020;
-texture_image_uchar4 __tex_image_021;
-texture_image_uchar4 __tex_image_022;
-texture_image_uchar4 __tex_image_023;
-texture_image_uchar4 __tex_image_024;
-texture_image_uchar4 __tex_image_025;
-texture_image_uchar4 __tex_image_026;
-texture_image_uchar4 __tex_image_027;
-texture_image_uchar4 __tex_image_028;
-texture_image_uchar4 __tex_image_029;
-texture_image_uchar4 __tex_image_030;
-texture_image_uchar4 __tex_image_031;
-texture_image_uchar4 __tex_image_032;
-texture_image_uchar4 __tex_image_033;
-texture_image_uchar4 __tex_image_034;
-texture_image_uchar4 __tex_image_035;
-texture_image_uchar4 __tex_image_036;
-texture_image_uchar4 __tex_image_037;
-texture_image_uchar4 __tex_image_038;
-texture_image_uchar4 __tex_image_039;
-texture_image_uchar4 __tex_image_040;
-texture_image_uchar4 __tex_image_041;
-texture_image_uchar4 __tex_image_042;
-texture_image_uchar4 __tex_image_043;
-texture_image_uchar4 __tex_image_044;
-texture_image_uchar4 __tex_image_045;
-texture_image_uchar4 __tex_image_046;
-texture_image_uchar4 __tex_image_047;
-texture_image_uchar4 __tex_image_048;
-texture_image_uchar4 __tex_image_049;
-texture_image_uchar4 __tex_image_050;
-texture_image_uchar4 __tex_image_051;
-texture_image_uchar4 __tex_image_052;
-texture_image_uchar4 __tex_image_053;
-texture_image_uchar4 __tex_image_054;
-texture_image_uchar4 __tex_image_055;
-texture_image_uchar4 __tex_image_056;
-texture_image_uchar4 __tex_image_057;
-texture_image_uchar4 __tex_image_058;
-texture_image_uchar4 __tex_image_059;
-texture_image_uchar4 __tex_image_060;
-texture_image_uchar4 __tex_image_061;
-texture_image_uchar4 __tex_image_062;
-texture_image_uchar4 __tex_image_063;
-texture_image_uchar4 __tex_image_064;
-texture_image_uchar4 __tex_image_065;
-texture_image_uchar4 __tex_image_066;
-texture_image_uchar4 __tex_image_067;
-texture_image_uchar4 __tex_image_068;
-texture_image_uchar4 __tex_image_069;
-texture_image_uchar4 __tex_image_070;
-texture_image_uchar4 __tex_image_071;
-texture_image_uchar4 __tex_image_072;
-texture_image_uchar4 __tex_image_073;
-texture_image_uchar4 __tex_image_074;
-texture_image_uchar4 __tex_image_075;
-texture_image_uchar4 __tex_image_076;
-texture_image_uchar4 __tex_image_077;
-texture_image_uchar4 __tex_image_078;
-texture_image_uchar4 __tex_image_079;
-texture_image_uchar4 __tex_image_080;
-texture_image_uchar4 __tex_image_081;
-texture_image_uchar4 __tex_image_082;
-texture_image_uchar4 __tex_image_083;
-texture_image_uchar4 __tex_image_084;
-texture_image_uchar4 __tex_image_085;
-texture_image_uchar4 __tex_image_086;
-texture_image_uchar4 __tex_image_087;
-texture_image_uchar4 __tex_image_088;
-texture_image_uchar4 __tex_image_089;
-texture_image_uchar4 __tex_image_090;
-texture_image_uchar4 __tex_image_091;
-texture_image_uchar4 __tex_image_092;
-texture_image_uchar4 __tex_image_093;
-texture_image_uchar4 __tex_image_094;
-texture_image_uchar4 __tex_image_095;
-texture_image_uchar4 __tex_image_096;
-texture_image_uchar4 __tex_image_097;
-texture_image_uchar4 __tex_image_098;
-texture_image_uchar4 __tex_image_099;
+} KernelGLobals;
 
 #endif
 
-#ifdef __KERNEL_CPU__
+/* For CUDA, constant memory textures must be globals, so we can't put them
+   into a struct. As a result we don't actually use this struct and use actual
+   globals and simply pass along a NULL pointer everywhere, which we hope gets
+   optimized out. */
 
-#ifdef WITH_OSL
+#ifdef __KERNEL_CUDA__
 
-/* On the CPU, we also have the OSL globals here. Most data structures are shared
-   with SVM, the difference is in the shaders and object/mesh attributes. */
+__constant__ KernelData __data;
+typedef struct KernelGlobals {} KernelGlobals;
 
-OSLGlobals osl;
+#define KERNEL_TEX(type, ttype, name) ttype name;
+#define KERNEL_IMAGE_TEX(type, ttype, name) ttype name;
+#include "kernel_textures.h"
 
 #endif
 
+/* OpenCL */
+
+#ifdef __KERNEL_OPENCL__
+
+typedef struct KernelGlobals {
+       __constant KernelData *data;
+
+#define KERNEL_TEX(type, ttype, name) \
+       __global type *name; \
+       int name##_width;
+#include "kernel_textures.h"
 } KernelGlobals;
+
 #endif
 
 CCL_NAMESPACE_END
index 537f7ea00f0b29c127c491655828920f48683343..5164e5deea25bedee72a1f01596e5f2794badfb6 100644 (file)
 
 CCL_NAMESPACE_BEGIN
 
-struct LightSample {
+typedef struct LightSample {
        float3 P;
        float3 Ng;
        int object;
        int prim;
        int shader;
        float weight;
-};
+} LightSample;
 
 /* Point Light */
 
diff --git a/intern/cycles/kernel/kernel_textures.h b/intern/cycles/kernel/kernel_textures.h
new file mode 100644 (file)
index 0000000..bd44ed7
--- /dev/null
@@ -0,0 +1,153 @@
+
+#ifndef KERNEL_TEX
+#define KERNEL_TEX(type, ttype, name)
+#endif
+
+#ifndef KERNEL_IMAGE_TEX
+#define KERNEL_IMAGE_TEX(type, ttype, name)
+#endif
+
+
+/* bvh */
+KERNEL_TEX(float4, texture_float4, __bvh_nodes)
+KERNEL_TEX(float4, texture_float4, __tri_woop)
+KERNEL_TEX(uint, texture_uint, __prim_index)
+KERNEL_TEX(uint, texture_uint, __prim_object)
+KERNEL_TEX(uint, texture_uint, __object_node)
+
+/* objects */
+KERNEL_TEX(float4, texture_float4, __objects)
+
+/* triangles */
+KERNEL_TEX(float4, texture_float4, __tri_normal)
+KERNEL_TEX(float4, texture_float4, __tri_vnormal)
+KERNEL_TEX(float4, texture_float4, __tri_vindex)
+KERNEL_TEX(float4, texture_float4, __tri_verts)
+
+/* attributes */
+KERNEL_TEX(uint4, texture_uint4, __attributes_map)
+KERNEL_TEX(float, texture_float, __attributes_float)
+KERNEL_TEX(float4, texture_float4, __attributes_float3)
+
+/* lights */
+KERNEL_TEX(float4, texture_float4, __light_distribution)
+KERNEL_TEX(float4, texture_float4, __light_point)
+
+/* shaders */
+KERNEL_TEX(uint4, texture_uint4, __svm_nodes)
+
+/* camera/film */
+KERNEL_TEX(float, texture_float, __filter_table)
+KERNEL_TEX(float, texture_float, __response_curve_R)
+KERNEL_TEX(float, texture_float, __response_curve_G)
+KERNEL_TEX(float, texture_float, __response_curve_B)
+
+/* sobol */
+KERNEL_TEX(uint, texture_uint, __sobol_directions)
+
+/* image */
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_000)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_001)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_002)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_003)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_004)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_005)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_006)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_007)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_008)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_009)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_010)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_011)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_012)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_013)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_014)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_015)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_016)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_017)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_018)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_019)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_020)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_021)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_022)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_023)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_024)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_025)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_026)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_027)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_028)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_029)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_030)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_031)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_032)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_033)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_034)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_035)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_036)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_037)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_038)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_039)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_040)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_041)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_042)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_043)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_044)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_045)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_046)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_047)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_048)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_049)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_050)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_051)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_052)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_053)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_054)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_055)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_056)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_057)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_058)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_059)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_060)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_061)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_062)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_063)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_064)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_065)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_066)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_067)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_068)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_069)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_070)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_071)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_072)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_073)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_074)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_075)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_076)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_077)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_078)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_079)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_080)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_081)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_082)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_083)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_084)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_085)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_086)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_087)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_088)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_089)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_090)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_091)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_092)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_093)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_094)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_095)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_096)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_097)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_098)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_099)
+
+#undef KERNEL_TEX
+#undef KERNEL_IMAGE_TEX
+
+
index ddf8b7b1caf7e61ba6a8b7d4fa1bcf9c8baa1d75..7eaf54d14bffc3e37a8f8678abdd3acc83be1b09 100644 (file)
@@ -22,11 +22,11 @@ CCL_NAMESPACE_BEGIN
 __device_inline float3 triangle_point_MT(KernelGlobals *kg, int tri_index, float u, float v)
 {
        /* load triangle vertices */
-       float3 tri_vindex = as_float3(kernel_tex_fetch(__tri_vindex, tri_index));
+       float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, tri_index));
 
-       float3 v0 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
-       float3 v1 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
-       float3 v2 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
+       float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
+       float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
+       float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
 
        /* compute point */
        float t = 1.0f - u - v;
@@ -50,11 +50,11 @@ __device_inline float3 triangle_normal_MT(KernelGlobals *kg, int tri_index, int
 {
 #if 0
        /* load triangle vertices */
-       float3 tri_vindex = as_float3(kernel_tex_fetch(__tri_vindex, tri_index));
+       float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, tri_index));
 
-       float3 v0 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
-       float3 v1 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
-       float3 v2 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
+       float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
+       float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
+       float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
 
        /* compute normal */
        return normalize(cross(v2 - v0, v1 - v0));
@@ -68,11 +68,11 @@ __device_inline float3 triangle_normal_MT(KernelGlobals *kg, int tri_index, int
 __device_inline float3 triangle_smooth_normal(KernelGlobals *kg, int tri_index, float u, float v)
 {
        /* load triangle vertices */
-       float3 tri_vindex = as_float3(kernel_tex_fetch(__tri_vindex, tri_index));
+       float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, tri_index));
 
-       float3 n0 = as_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.x)));
-       float3 n1 = as_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.y)));
-       float3 n2 = as_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.z)));
+       float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.x)));
+       float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.y)));
+       float3 n2 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.z)));
 
        return normalize((1.0f - u - v)*n2 + u*n0 + v*n1);
 }
@@ -80,11 +80,11 @@ __device_inline float3 triangle_smooth_normal(KernelGlobals *kg, int tri_index,
 __device_inline void triangle_dPdudv(KernelGlobals *kg, float3 *dPdu, float3 *dPdv, int tri)
 {
        /* fetch triangle vertex coordinates */
-       float3 tri_vindex = as_float3(kernel_tex_fetch(__tri_vindex, tri));
+       float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, tri));
 
-       float3 p0 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
-       float3 p1 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
-       float3 p2 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
+       float3 p0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
+       float3 p1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
+       float3 p2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
 
        /* compute derivatives of P w.r.t. uv */
        *dPdu = (p0 - p2);
@@ -102,7 +102,7 @@ __device float triangle_attribute_float(KernelGlobals *kg, const ShaderData *sd,
                return kernel_tex_fetch(__attributes_float, offset + sd->prim);
        }
        else if(elem == ATTR_ELEMENT_VERTEX) {
-               float3 tri_vindex = as_float3(kernel_tex_fetch(__tri_vindex, sd->prim));
+               float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, sd->prim));
 
                float f0 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.x));
                float f1 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.y));
@@ -142,14 +142,14 @@ __device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData *s
                if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
                if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
 
-               return as_float3(kernel_tex_fetch(__attributes_float3, offset + sd->prim));
+               return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + sd->prim));
        }
        else if(elem == ATTR_ELEMENT_VERTEX) {
-               float3 tri_vindex = as_float3(kernel_tex_fetch(__tri_vindex, sd->prim));
+               float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, sd->prim));
 
-               float3 f0 = as_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.x)));
-               float3 f1 = as_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.y)));
-               float3 f2 = as_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.z)));
+               float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.x)));
+               float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.y)));
+               float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.z)));
 
 #ifdef __RAY_DIFFERENTIALS__
                if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
@@ -160,9 +160,9 @@ __device float3 triangle_attribute_float3(KernelGlobals *kg, const ShaderData *s
        }
        else if(elem == ATTR_ELEMENT_CORNER) {
                int tri = offset + sd->prim*3;
-               float3 f0 = as_float3(kernel_tex_fetch(__attributes_float3, tri + 0));
-               float3 f1 = as_float3(kernel_tex_fetch(__attributes_float3, tri + 1));
-               float3 f2 = as_float3(kernel_tex_fetch(__attributes_float3, tri + 2));
+               float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 0));
+               float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 1));
+               float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 2));
 
 #ifdef __RAY_DIFFERENTIALS__
                if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
index bd337eb95e1ed200ca797a734b6579cc602030c5..fbe827b179167408dcc9b5e39182ae660ae835bb 100644 (file)
 
 #include "kernel_math.h"
 
-#ifndef __KERNEL_OPENCL__
-
-#include "svm_types.h"
-
-#endif
+#include "svm/svm_types.h"
 
 CCL_NAMESPACE_BEGIN
 
@@ -239,9 +235,7 @@ typedef struct ShaderData {
 
        /* SVM closure data. we always sample a single closure, to get fixed
         * memory usage, svm_closure_data contains closure parameters. */
-#ifndef __KERNEL_OPENCL__
        ClosureType svm_closure;
-#endif
        float3 svm_closure_weight;
        float svm_closure_data[3]; /* CUDA gives compile error if out of bounds */
 
@@ -291,11 +285,15 @@ typedef struct KernelCamera {
        float shutterclose;
 
        /* differentials */
-       float3 dx, dy;
+       float3 dx;
+       float pad1;
+       float3 dy;
+       float pad2;
 
        /* clipping */
        float nearclip;
        float cliplength;
+       float pad3, pad4;
 
        /* more matrices */
        Transform screentoworld;
@@ -321,13 +319,14 @@ typedef struct KernelBackground {
 
 typedef struct KernelSunSky {
        /* sun direction in spherical and cartesian */
-       float theta, phi;
+       float theta, phi, pad3, pad4;
        float3 dir;
        float pad;
 
        /* perez function parameters */
-       float zenith_Y, zenith_x, zenith_y;
+       float zenith_Y, zenith_x, zenith_y, pad2;
        float perez_Y[5], perez_x[5], perez_y[5];
+       float pad5;
 } KernelSunSky;
 
 typedef struct KernelIntegrator {
@@ -348,7 +347,7 @@ typedef struct KernelIntegrator {
        float blur_caustics;
 
        /* padding */
-       int pad;
+       int pad[2];
 } KernelIntegrator;
 
 typedef struct KernelBVH {
index 40bae72a6c5c3fcb01262e7f26de9b2138e6965f..04e4ccb8313f8aa8a09cdb354277bbfc5f458998 100644 (file)
@@ -139,8 +139,8 @@ __device int bsdf_ashikhmin_velvet_sample(const ShaderData *sd, float randu, flo
                // TODO: find a better approximation for the retroreflective bounce
                *domega_in_dx = (2 * dot(m_N, sd->dI.dx)) * m_N - sd->dI.dx;
                *domega_in_dy = (2 * dot(m_N, sd->dI.dy)) * m_N - sd->dI.dy;
-               *domega_in_dx *= 125;
-               *domega_in_dy *= 125;
+               *domega_in_dx *= 125.0f;
+               *domega_in_dy *= 125.0f;
 #endif
        } else
                *pdf = 0.0f;
index c505de036aa89f655055086658dd2566a74d5d7b..00493e722037084eb08e2605aa44b0e99553ab95 100644 (file)
@@ -88,8 +88,8 @@ __device int bsdf_diffuse_sample(const ShaderData *sd, float randu, float randv,
                // TODO: find a better approximation for the diffuse bounce
                *domega_in_dx = (2 * dot(m_N, sd->dI.dx)) * m_N - sd->dI.dx;
                *domega_in_dy = (2 * dot(m_N, sd->dI.dy)) * m_N - sd->dI.dy;
-               *domega_in_dx *= 125;
-               *domega_in_dy *= 125;
+               *domega_in_dx *= 125.0f;
+               *domega_in_dy *= 125.0f;
 #endif
        }
        else
@@ -151,8 +151,8 @@ __device int bsdf_translucent_sample(const ShaderData *sd, float randu, float ra
                // TODO: find a better approximation for the diffuse bounce
                *domega_in_dx = (2 * dot(m_N, sd->dI.dx)) * m_N - sd->dI.dx;
                *domega_in_dy = (2 * dot(m_N, sd->dI.dy)) * m_N - sd->dI.dy;
-               *domega_in_dx *= -125;
-               *domega_in_dy *= -125;
+               *domega_in_dx *= -125.0f;
+               *domega_in_dy *= -125.0f;
 #endif
        } else
                *pdf = 0;
index b6baa1e90d89a5730416d59c4b1e3863b0f56b60..a948ba06871df2bc2032b99506ddb0905a0f2cbd 100644 (file)
@@ -195,8 +195,8 @@ __device int bsdf_microfacet_ggx_sample(const ShaderData *sd, float randu, float
                                        // derivatives a bit bigger. In theory this varies with the
                                        // roughness but the exact relationship is complex and
                                        // requires more ops than are practical.
-                                       *domega_in_dx *= 10;
-                                       *domega_in_dy *= 10;
+                                       *domega_in_dx *= 10.0f;
+                                       *domega_in_dy *= 10.0f;
 #endif
                                }
                        }
@@ -246,8 +246,8 @@ __device int bsdf_microfacet_ggx_sample(const ShaderData *sd, float randu, float
                                // derivatives a bit bigger. In theory this varies with the
                                // roughness but the exact relationship is complex and
                                // requires more ops than are practical.
-                               *domega_in_dx *= 10;
-                               *domega_in_dy *= 10;
+                               *domega_in_dx *= 10.0f;
+                               *domega_in_dy *= 10.0f;
 #endif
                        }
                }
@@ -423,8 +423,8 @@ __device int bsdf_microfacet_beckmann_sample(const ShaderData *sd, float randu,
                                        // derivatives a bit bigger. In theory this varies with the
                                        // roughness but the exact relationship is complex and
                                        // requires more ops than are practical.
-                                       *domega_in_dx *= 10;
-                                       *domega_in_dy *= 10;
+                                       *domega_in_dx *= 10.0f;
+                                       *domega_in_dy *= 10.0f;
 #endif
                                }
                        }
@@ -478,8 +478,8 @@ __device int bsdf_microfacet_beckmann_sample(const ShaderData *sd, float randu,
                                // derivatives a bit bigger. In theory this varies with the
                                // roughness but the exact relationship is complex and
                                // requires more ops than are practical.
-                               *domega_in_dx *= 10;
-                               *domega_in_dy *= 10;
+                               *domega_in_dx *= 10.0f;
+                               *domega_in_dy *= 10.0f;
 #endif
                        }
                }
index bf591acc9fa09157f02881d30312c8ecb3095bfb..9f857b32468f3dbfaea0cad44ce3dea2b7a5b19a 100644 (file)
@@ -187,8 +187,8 @@ __device int bsdf_ward_sample(const ShaderData *sd, float randu, float randv, fl
                                // derivatives a bit bigger. In theory this varies with the
                                // roughness but the exact relationship is complex and
                                // requires more ops than are practical.
-                               *domega_in_dx *= 10;
-                               *domega_in_dy *= 10;
+                               *domega_in_dx *= 10.0f;
+                               *domega_in_dy *= 10.0f;
 #endif
                        }
                }
index 7fe10f10dfc21b1e045dbf55787aa0bc6adeccf6..6031012d0ca7861401f405decd19b428efa3cf90 100644 (file)
@@ -122,8 +122,8 @@ __device int bsdf_westin_backscatter_sample(const ShaderData *sd, float randu, f
                                // derivatives a bit bigger. In theory this varies with the
                                // exponent but the exact relationship is complex and
                                // requires more ops than are practical.
-                               *domega_in_dx *= 10;
-                               *domega_in_dy *= 10;
+                               *domega_in_dx *= 10.0f;
+                               *domega_in_dy *= 10.0f;
 #endif
                        }
                }
@@ -198,8 +198,8 @@ __device int bsdf_westin_sheen_sample(const ShaderData *sd, float randu, float r
                // TODO: find a better approximation for the diffuse bounce
                *domega_in_dx = (2 * dot(m_N, sd->dI.dx)) * m_N - sd->dI.dx;
                *domega_in_dy = (2 * dot(m_N, sd->dI.dy)) * m_N - sd->dI.dy;
-               *domega_in_dx *= 125;
-               *domega_in_dy *= 125;
+               *domega_in_dx *= 125.0f;
+               *domega_in_dy *= 125.0f;
 #endif
        } else
                pdf = 0;
index b1be7b7f6bcb15d7238c5d2a404942148d010218..97fa4aff9e7500f6de5c987d77d3d8e6dc4715bc 100644 (file)
@@ -41,7 +41,7 @@ __device float svm_blend(float3 p, NodeBlendType type, NodeBlendAxis axis)
                return r*r;
        }
        else if(type == NODE_BLEND_EASING) {
-               float r = min(fmaxf((1.0f + x)/2.0f, 0.0f), 1.0f);
+               float r = fminf(fmaxf((1.0f + x)/2.0f, 0.0f), 1.0f);
                float t = r*r;
                
                return (3.0f*t - 2.0f*t*r);
index db8a8a132894044141e444fc4046e0de72f06198..b1677f67ecac1b426463cfc4c785fcd38905d107 100644 (file)
@@ -34,8 +34,9 @@ __device void svm_node_set_bump(ShaderData *sd, float *stack, uint c_offset, uin
        float3 surfgrad = (h_x - h_c)*Rx + (h_y - h_c)*Ry;
 
        surfgrad *= 0.1f; /* todo: remove this factor */
-       
-       sd->N = normalize(fabsf(det)*sd->N - signf(det)*surfgrad);
+
+       float absdet = fabsf(det);
+       sd->N = normalize(absdet*sd->N - signf(det)*surfgrad);
 #endif
 }
 
index 469313e377db282cb1d31c736181e678529e70b4..7518a3a9d2d13e622712d579a0118f9babb356fb 100644 (file)
@@ -23,12 +23,13 @@ CCL_NAMESPACE_BEGIN
 __device float svm_distorted_noise(float3 p, float size, NodeNoiseBasis basis, NodeNoiseBasis distortion_basis, float distortion)
 {
        float3 r;
+       float3 offset = make_float3(13.5f, 13.5f, 13.5f);
 
        p /= size;
 
-       r.x = noise_basis(p + make_float3(13.5f, 13.5f, 13.5f), basis) * distortion;
+       r.x = noise_basis(p + offset, basis) * distortion;
        r.y = noise_basis(p, basis) * distortion;
-       r.z = noise_basis(p - make_float3(13.5f, 13.5f, 13.5f), basis) * distortion;
+       r.z = noise_basis(p - offset, basis) * distortion;
 
     return noise_basis(p + r, distortion_basis); /* distorted-domain noise */
 }
index 88f0b58244201d9d0708597750f18c9e609753dc..586e35c646582773abb069441a0428cfb112d5fc 100644 (file)
@@ -31,6 +31,9 @@ __device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y)
           also note that cuda has 128 textures limit, we use 100 now, since
           we still need some for other storage */
 
+#ifdef __KERNEL_OPENCL__
+       r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); /* todo */
+#else
        switch(id) {
                case 0: r = kernel_tex_image_interp(__tex_image_000, x, y); break;
                case 1: r = kernel_tex_image_interp(__tex_image_001, x, y); break;
@@ -136,6 +139,7 @@ __device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y)
                        kernel_assert(0);
                        return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
        }
+#endif
 
        return r;
 }
@@ -151,8 +155,11 @@ __device void svm_node_tex_image(KernelGlobals *kg, ShaderData *sd, float *stack
        float4 f = svm_image_texture(kg, id, co.x, co.y);
        float3 r = make_float3(f.x, f.y, f.z);
 
-       if(srgb)
-               r = color_srgb_to_scene_linear(r);
+       if(srgb) {
+               r.x = color_srgb_to_scene_linear(r.x);
+               r.y = color_srgb_to_scene_linear(r.y);
+               r.z = color_srgb_to_scene_linear(r.z);
+       }
 
        stack_store_float3(stack, out_offset, r);
 }
@@ -170,8 +177,11 @@ __device void svm_node_tex_environment(KernelGlobals *kg, ShaderData *sd, float
        float4 f = svm_image_texture(kg, id, u, v);
        float3 r = make_float3(f.x, f.y, f.z);
 
-       if(srgb)
-               r = color_srgb_to_scene_linear(r);
+       if(srgb) {
+               r.x = color_srgb_to_scene_linear(r.x);
+               r.y = color_srgb_to_scene_linear(r.y);
+               r.z = color_srgb_to_scene_linear(r.z);
+       }
 
        stack_store_float3(stack, out_offset, r);
 }
index 5a8ca2f76dd7a6c3b1231c10042212ba4b188994..c9e6cdf43b954c3693d3b883cabdbc408bf0e80a 100644 (file)
@@ -41,7 +41,8 @@ __device float3 rgb_to_hsv(float3 rgb)
                h = 0.0f;
        }
        else {
-               c = (make_float3(cmax, cmax, cmax) - rgb)/cdelta;
+               float3 cmax3 = make_float3(cmax, cmax, cmax);
+               c = (cmax3 - rgb)/cdelta;
 
                if(rgb.x == cmax) h = c.z - c.y;
                else if(rgb.y == cmax) h = 2.0f + c.x -  c.z;
@@ -91,26 +92,33 @@ __device float3 hsv_to_rgb(float3 hsv)
        return rgb;
 }
 
+__device float3 svm_lerp(const float3 a, const float3 b, float t)
+{
+       return (a * (1.0f - t) + b * t);
+}
+
 __device float3 svm_mix_blend(float t, float3 col1, float3 col2)
 {
-       return lerp(col1, col2, t);
+       return svm_lerp(col1, col2, t);
 }
 
 __device float3 svm_mix_add(float t, float3 col1, float3 col2)
 {
-       return lerp(col1, col1 + col2, t);
+       return svm_lerp(col1, col1 + col2, t);
 }
 
 __device float3 svm_mix_mul(float t, float3 col1, float3 col2)
 {
-       return lerp(col1, col1 * col2, t);
+       return svm_lerp(col1, col1 * col2, t);
 }
 
 __device float3 svm_mix_screen(float t, float3 col1, float3 col2)
 {
        float tm = 1.0f - t;
+       float3 one = make_float3(1.0f, 1.0f, 1.0f);
+       float3 tm3 = make_float3(tm, tm, tm);
 
-       return make_float3(1.0f, 1.0f, 1.0f) - (make_float3(tm, tm, tm) + t*(make_float3(1.0f, 1.0f, 1.0f) - col2))*(make_float3(1.0f, 1.0f, 1.0f) - col1);
+       return one - (tm3 + t*(one - col2))*(one - col1);
 }
 
 __device float3 svm_mix_overlay(float t, float3 col1, float3 col2)
@@ -139,7 +147,7 @@ __device float3 svm_mix_overlay(float t, float3 col1, float3 col2)
 
 __device float3 svm_mix_sub(float t, float3 col1, float3 col2)
 {
-       return lerp(col1, col1 - col2, t);
+       return svm_lerp(col1, col1 - col2, t);
 }
 
 __device float3 svm_mix_div(float t, float3 col1, float3 col2)
@@ -157,7 +165,7 @@ __device float3 svm_mix_div(float t, float3 col1, float3 col2)
 
 __device float3 svm_mix_diff(float t, float3 col1, float3 col2)
 {
-       return lerp(col1, fabs(col1 - col2), t);
+       return svm_lerp(col1, fabs(col1 - col2), t);
 }
 
 __device float3 svm_mix_dark(float t, float3 col1, float3 col2)
@@ -255,7 +263,7 @@ __device float3 svm_mix_hue(float t, float3 col1, float3 col2)
                hsv.x = hsv2.x;
                float3 tmp = hsv_to_rgb(hsv); 
 
-               outcol = lerp(outcol, tmp, t);
+               outcol = svm_lerp(outcol, tmp, t);
        }
 
        return outcol;
@@ -302,7 +310,7 @@ __device float3 svm_mix_color(float t, float3 col1, float3 col2)
                hsv.y = hsv2.y;
                float3 tmp = hsv_to_rgb(hsv); 
 
-               outcol = lerp(outcol, tmp, t);
+               outcol = svm_lerp(outcol, tmp, t);
        }
 
        return outcol;
index dd02cb64cd7e70a3e3e756993f5652167604019c..eaba4d183650f50fd41831ace156a5c2218f9e37 100644 (file)
@@ -49,7 +49,7 @@ __device float sky_angle_between(float thetav, float phiv, float theta, float ph
        return safe_acosf(cospsi);
 }
 
-__device float sky_perez_function(float lam[5], float theta, float gamma)
+__device float sky_perez_function(__constant float *lam, float theta, float gamma)
 {
        float ctheta = cosf(theta);
        float cgamma = cosf(gamma);
index c5ded6d975f961c29e4f8bb0f9973b0c9281b82f..d4765cca384d3316a64315535a0d4dab831f99a9 100644 (file)
@@ -69,7 +69,7 @@ __device void voronoi(float3 p, NodeDistanceMetric distance_metric, float e, flo
                                float3 pd = p - (vp + ip);
                                float d = voronoi_distance(distance_metric, pd, e);
 
-                               vp += make_float3((float)xx, (float)yy, (float)zz);
+                               vp += ip;
 
                                if(d < da[0]) {
                                        da[3] = da[2];
index aa58aba79c921518c5ebb62b3e3d0ad837fa2013..769ccfc9bc0395a95be6117aaf12f3634fcfb7f3 100644 (file)
@@ -35,7 +35,7 @@ CCL_NAMESPACE_BEGIN
  * happens i have no idea, but consecutive values are problematic, maybe it
  * generates an incorrect jump table. */
 
-enum NodeType {
+typedef enum NodeType {
        NODE_END = 0,
        NODE_CLOSURE_BSDF = 100,
        NODE_CLOSURE_EMISSION = 200,
@@ -82,23 +82,23 @@ enum NodeType {
        NODE_ATTR_BUMP_DX = 4400,
        NODE_ATTR_BUMP_DY = 4500,
        NODE_TEX_ENVIRONMENT = 4600
-};
+} NodeType;
 
-enum NodeAttributeType {
+typedef enum NodeAttributeType {
        NODE_ATTR_FLOAT = 0,
        NODE_ATTR_FLOAT3
-};
+} NodeAttributeType;
 
-enum NodeGeometry {
+typedef enum NodeGeometry {
        NODE_GEOM_P = 0,
        NODE_GEOM_N,
        NODE_GEOM_T,
        NODE_GEOM_I,
        NODE_GEOM_Ng,
        NODE_GEOM_uv
-};
+} NodeGeometry;
 
-enum NodeLightPath {
+typedef enum NodeLightPath {
        NODE_LP_camera = 0,
        NODE_LP_shadow,
        NODE_LP_diffuse,
@@ -106,16 +106,16 @@ enum NodeLightPath {
        NODE_LP_reflection,
        NODE_LP_transmission,
        NODE_LP_backfacing
-};
+} NodeLightPath;
 
-enum NodeTexCoord {
+typedef enum NodeTexCoord {
        NODE_TEXCO_OBJECT,
        NODE_TEXCO_CAMERA,
        NODE_TEXCO_WINDOW,
        NODE_TEXCO_REFLECTION
-};
+} NodeTexCoord;
 
-enum NodeMix {
+typedef enum NodeMix {
        NODE_MIX_BLEND = 0,
        NODE_MIX_ADD,
        NODE_MIX_MUL,
@@ -134,9 +134,9 @@ enum NodeMix {
        NODE_MIX_COLOR,
        NODE_MIX_SOFT,
        NODE_MIX_LINEAR
-};
+} NodeMix;
 
-enum NodeMath {
+typedef enum NodeMath {
        NODE_MATH_ADD,
        NODE_MATH_SUBTRACT,
        NODE_MATH_MULTIPLY,
@@ -154,24 +154,24 @@ enum NodeMath {
        NODE_MATH_ROUND,
        NODE_MATH_LESS_THAN,
        NODE_MATH_GREATER_THAN
-};
+} NodeMath;
 
-enum NodeVectorMath {
+typedef enum NodeVectorMath {
        NODE_VECTOR_MATH_ADD,
        NODE_VECTOR_MATH_SUBTRACT,
        NODE_VECTOR_MATH_AVERAGE,
        NODE_VECTOR_MATH_DOT_PRODUCT,
        NODE_VECTOR_MATH_CROSS_PRODUCT,
        NODE_VECTOR_MATH_NORMALIZE
-};
+} NodeVectorMath;
 
-enum NodeConvert {
+typedef enum NodeConvert {
        NODE_CONVERT_FV,
        NODE_CONVERT_CF,
        NODE_CONVERT_VF
-};
+} NodeConvert;
 
-enum NodeDistanceMetric {
+typedef enum NodeDistanceMetric {
        NODE_VORONOI_DISTANCE_SQUARED,
        NODE_VORONOI_ACTUAL_DISTANCE,
        NODE_VORONOI_MANHATTAN,
@@ -179,9 +179,9 @@ enum NodeDistanceMetric {
        NODE_VORONOI_MINKOVSKY_H,
        NODE_VORONOI_MINKOVSKY_4,
        NODE_VORONOI_MINKOVSKY
-};
+} NodeDistanceMetric;
 
-enum NodeNoiseBasis {
+typedef enum NodeNoiseBasis {
        NODE_NOISE_PERLIN,
        NODE_NOISE_VORONOI_F1,
        NODE_NOISE_VORONOI_F2,
@@ -190,30 +190,30 @@ enum NodeNoiseBasis {
        NODE_NOISE_VORONOI_F2_F1,
        NODE_NOISE_VORONOI_CRACKLE,
        NODE_NOISE_CELL_NOISE
-};
+} NodeNoiseBasis;
 
-enum NodeWaveType {
+typedef enum NodeWaveType {
        NODE_WAVE_SINE,
        NODE_WAVE_SAW,
        NODE_WAVE_TRI
-};
+} NodeWaveType;
 
-enum NodeMusgraveType {
+typedef enum NodeMusgraveType {
        NODE_MUSGRAVE_MULTIFRACTAL,
        NODE_MUSGRAVE_FBM,
        NODE_MUSGRAVE_HYBRID_MULTIFRACTAL,
        NODE_MUSGRAVE_RIDGED_MULTIFRACTAL,
        NODE_MUSGRAVE_HETERO_TERRAIN
-};
+} NodeMusgraveType;
 
-enum NodeWoodType {
+typedef enum NodeWoodType {
        NODE_WOOD_BANDS,
        NODE_WOOD_RINGS,
        NODE_WOOD_BAND_NOISE,
        NODE_WOOD_RING_NOISE
-};
+} NodeWoodType;
 
-enum NodeBlendType {
+typedef enum NodeBlendType {
        NODE_BLEND_LINEAR,
        NODE_BLEND_QUADRATIC,
        NODE_BLEND_EASING,
@@ -221,37 +221,37 @@ enum NodeBlendType {
        NODE_BLEND_RADIAL,
        NODE_BLEND_QUADRATIC_SPHERE,
        NODE_BLEND_SPHERICAL
-};
+} NodeBlendType;
 
-enum NodeBlendAxis {
+typedef enum NodeBlendAxis {
        NODE_BLEND_HORIZONTAL,
        NODE_BLEND_VERTICAL
-};
+} NodeBlendAxis;
 
-enum NodeMarbleType {
+typedef enum NodeMarbleType {
        NODE_MARBLE_SOFT,
        NODE_MARBLE_SHARP,
        NODE_MARBLE_SHARPER
-};
+} NodeMarbleType;
 
-enum NodeStucciType {
+typedef enum NodeStucciType {
        NODE_STUCCI_PLASTIC,
        NODE_STUCCI_WALL_IN,
        NODE_STUCCI_WALL_OUT
-};
+} NodeStucciType;
 
-enum NodeVoronoiColoring {
+typedef enum NodeVoronoiColoring {
        NODE_VORONOI_INTENSITY,
        NODE_VORONOI_POSITION,
        NODE_VORONOI_POSITION_OUTLINE,
        NODE_VORONOI_POSITION_OUTLINE_INTENSITY
-};
+} NodeVoronoiColoring;
 
-enum ShaderType {
+typedef enum ShaderType {
        SHADER_TYPE_SURFACE,
        SHADER_TYPE_VOLUME,
        SHADER_TYPE_DISPLACEMENT
-};
+} ShaderType;
 
 /* Closure */
 
index 60b738bfc51063e15fe5515ce494012144b7eb0e..a11a5c7c2a75bc6ef6f159786b6a310e3810e7c7 100644 (file)
@@ -40,6 +40,8 @@ __device float color_scene_linear_to_srgb(float c)
                return 1.055f * pow(c, 1.0f/2.4f) - 0.055f;
 }
 
+#ifndef __KERNEL_OPENCL__
+
 __device float3 color_srgb_to_scene_linear(float3 c)
 {
        return make_float3(
@@ -56,6 +58,8 @@ __device float3 color_scene_linear_to_srgb(float3 c)
                color_scene_linear_to_srgb(c.z));
 }
 
+#endif
+
 CCL_NAMESPACE_END
 
 #endif /* __UTIL_COLOR_H__ */
index 3475e309af5d0fbc15588c11b11a562ac5f2e79c..e6dd00fd86be0268f26f7238361fc1ee64e91539 100644 (file)
@@ -506,13 +506,13 @@ __device_inline float3 fabs(float3 a)
        return make_float3(fabsf(a.x), fabsf(a.y), fabsf(a.z));
 }
 
-__device_inline float3 as_float3(const float4& a)
+#endif
+
+__device_inline float3 float4_to_float3(const float4 a)
 {
        return make_float3(a.x, a.y, a.z);
 }
 
-#endif
-
 #ifndef __KERNEL_GPU__
 
 __device_inline void print_float3(const char *label, const float3& a)