cl_int ciErr;
map<string, device_vector<uchar>*> const_mem_map;
map<string, device_memory*> mem_map;
+ device_ptr null_mem;
const char *opencl_error_string(cl_int err)
{
ciErr = clGetPlatformIDs(1, &cpPlatform, NULL);
opencl_assert(ciErr);
- ciErr = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_GPU, 1, &cdDevice, NULL);
+ ciErr = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_CPU, 1, &cdDevice, NULL);
opencl_assert(ciErr);
- cxGPUContext = clCreateContext(0, 1, &cdDevice, NULL /*clLogMessagesToStdoutAPPLE */, NULL, &ciErr);
+ cxGPUContext = clCreateContext(0, 1, &cdDevice, NULL, NULL, &ciErr);
opencl_assert(ciErr);
cqCommandQueue = clCreateCommandQueue(cxGPUContext, cdDevice, 0, &ciErr);
/* compile kernel */
string source = string_printf("#include \"kernel.cl\" // %lf\n", time_dt());
size_t source_len = source.size();
- string build_options = "-I ../kernel -I ../util -Werror -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END="; //" + path_get("kernel") + " -Werror";
- //printf("path %s\n", path_get("kernel").c_str());
- //clUnloadCompiler();
+ string build_options = "";
+
+ //string csource = "../blender/intern/cycles";
+ //build_options += "-I " + csource + "/kernel -I " + csource + "/util";
+
+ build_options += " -I " + path_get("kernel"); /* todo: escape path */
+
+ build_options += " -Werror";
+ build_options += " -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END=";
cpProgram = clCreateProgramWithSource(cxGPUContext, 1, (const char **)&source, &source_len, &ciErr);
opencl_assert(ciErr);
ckFilmConvertKernel = clCreateKernel(cpProgram, "kernel_ocl_tonemap", &ciErr);
opencl_assert(ciErr);
+
+ null_mem = (device_ptr)clCreateBuffer(cxGPUContext, CL_MEM_READ_ONLY, 1, NULL, &ciErr);
}
~OpenCLDevice()
{
+
+ clReleaseMemObject(CL_MEM_PTR(null_mem));
+
map<string, device_vector<uchar>*>::iterator mt;
for(mt = const_mem_map.begin(); mt != const_mem_map.end(); mt++) {
mem_free(*(mt->second));
void tex_alloc(const char *name, device_memory& mem, bool interpolation, bool periodic)
{
mem_alloc(mem, MEM_READ_ONLY);
+ mem_copy_to(mem);
mem_map[name] = &mem;
}
ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_data), (void*)&d_data);
ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_buffer), (void*)&d_buffer);
ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_rng_state), (void*)&d_rng_state);
+
+#define KERNEL_TEX(type, ttype, name) \
+ ciErr |= set_kernel_arg_mem(ckPathTraceKernel, &narg, #name);
+#include "kernel_textures.h"
+
ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_pass), (void*)&d_pass);
ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_x), (void*)&d_x);
ciErr |= clSetKernelArg(ckPathTraceKernel, narg++, sizeof(d_y), (void*)&d_y);
cl_int set_kernel_arg_mem(cl_kernel kernel, int *narg, const char *name)
{
- device_memory *mem = mem_map[name];
- cl_mem ptr = CL_MEM_PTR(mem->device_pointer);
- cl_int size = mem->data_width;
- cl_int err = 0;
+ cl_mem ptr;
+ cl_int size, err = 0;
+
+ if(mem_map.find(name) != mem_map.end()) {
+ device_memory *mem = mem_map[name];
+
+ ptr = CL_MEM_PTR(mem->device_pointer);
+ size = mem->data_width;
+ }
+ else {
+ /* work around NULL not working, even though the spec says otherwise */
+ ptr = CL_MEM_PTR(null_mem);
+ size = 1;
+ }
err |= clSetKernelArg(kernel, (*narg)++, sizeof(ptr), (void*)&ptr);
opencl_assert(err);
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_data), (void*)&d_data);
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_rgba), (void*)&d_rgba);
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_buffer), (void*)&d_buffer);
- ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, "__response_curve_R");
- ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, "__response_curve_G");
- ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, "__response_curve_B");
+
+#define KERNEL_TEX(type, ttype, name) \
+ ciErr |= set_kernel_arg_mem(ckFilmConvertKernel, &narg, #name);
+#include "kernel_textures.h"
+
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_pass), (void*)&d_pass);
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_resolution), (void*)&d_resolution);
ciErr |= clSetKernelArg(ckFilmConvertKernel, narg++, sizeof(d_x), (void*)&d_x);
kernel_qbvh.h
kernel_random.h
kernel_shader.h
+ kernel_textures.h
kernel_triangle.h
- kernel_types.h
+ kernel_types.h)
+
+SET(svm_headers
svm/bsdf.h
svm/bsdf_ashikhmin_velvet.h
svm/bsdf_diffuse.h
ENDIF()
IF(WITH_CYCLES_CUDA)
- SET(cuda_sources kernel.cu ${headers})
+ SET(cuda_sources kernel.cu ${headers} ${svm_headers})
SET(cuda_cubins)
FOREACH(arch ${CYCLES_CUDA_ARCH})
INCLUDE_DIRECTORIES(. ../util osl svm)
-ADD_LIBRARY(cycles_kernel ${sources} ${headers})
+ADD_LIBRARY(cycles_kernel ${sources} ${headers} ${svm_headers})
IF(WITH_CYCLES_CUDA)
ADD_DEPENDENCIES(cycles_kernel cycles_kernel_cuda)
ENDIF()
+# OPENCL kernel
+
+IF(WITH_CYCLES_OPENCL)
+ SET(util_headers
+ ../util/util_color.h
+ ../util/util_math.h
+ ../util/util_transform.h
+ ../util/util_types.h)
+
+ INSTALL(FILES kernel.cl ${headers} DESTINATION ${CYCLES_INSTALL_PATH}/cycles/kernel)
+ INSTALL(FILES ${svm_headers} DESTINATION ${CYCLES_INSTALL_PATH}/cycles/kernel/svm)
+ INSTALL(FILES ${util_headers} DESTINATION ${CYCLES_INSTALL_PATH}/cycles/kernel)
+ENDIF()
+
#include "kernel_types.h"
#include "kernel_globals.h"
-typedef struct KernelGlobals {
- __constant KernelData *data;
-
- __global float *__response_curve_R;
- int __response_curve_R_width;
-
- __global float *__response_curve_G;
- int __response_curve_G_width;
-
- __global float *__response_curve_B;
- int __response_curve_B_width;
-} KernelGlobals;
-
#include "kernel_film.h"
-//#include "kernel_path.h"
+#include "kernel_path.h"
//#include "kernel_displace.h"
-__kernel void kernel_ocl_path_trace(__constant KernelData *data, __global float4 *buffer, __global uint *rng_state, int pass, int sx, int sy, int sw, int sh)
+__kernel void kernel_ocl_path_trace(
+ __constant KernelData *data,
+ __global float4 *buffer,
+ __global uint *rng_state,
+
+#define KERNEL_TEX(type, ttype, name) \
+ __global type *name, \
+ int name##_width,
+#include "kernel_textures.h"
+
+ int pass,
+ int sx, int sy, int sw, int sh)
{
KernelGlobals kglobals, *kg = &kglobals;
+
kg->data = data;
- int x = get_global_id(0);
- int y = get_global_id(1);
+#define KERNEL_TEX(type, ttype, name) \
+ kg->name = name; \
+ kg->name##_width = name##_width;
+#include "kernel_textures.h"
+
+ int x = sx + get_global_id(0);
+ int y = sy + get_global_id(1);
int w = kernel_data.cam.width;
- if(x < sx + sw && y < sy + sh) {
- if(pass == 0) {
- buffer[x + w*y].x = 0.5f;
- buffer[x + w*y].y = 0.5f;
- buffer[x + w*y].z = 0.5f;
- }
- else {
- buffer[x + w*y].x += 0.5f;
- buffer[x + w*y].y += 0.5f;
- buffer[x + w*y].z += 0.5f;
- }
-
- //= make_float3(1.0f, 0.9f, 0.0f);
- //kernel_path_trace(buffer, rng_state, pass, x, y);
- }
+ if(x < sx + sw && y < sy + sh)
+ kernel_path_trace(kg, buffer, rng_state, pass, x, y);
}
__kernel void kernel_ocl_tonemap(
__constant KernelData *data,
__global uchar4 *rgba,
__global float4 *buffer,
- __global float *__response_curve_R,
- int __response_curve_R_width,
- __global float *__response_curve_G,
- int __response_curve_G_width,
- __global float *__response_curve_B,
- int __response_curve_B_width,
+
+#define KERNEL_TEX(type, ttype, name) \
+ __global type *name, \
+ int name##_width,
+#include "kernel_textures.h"
+
int pass, int resolution,
int sx, int sy, int sw, int sh)
{
KernelGlobals kglobals, *kg = &kglobals;
kg->data = data;
- kg->__response_curve_R = __response_curve_R;
- kg->__response_curve_R_width = __response_curve_R_width;
- kg->__response_curve_G = __response_curve_G;
- kg->__response_curve_G_width = __response_curve_G_width;
- kg->__response_curve_B = __response_curve_B;
- kg->__response_curve_B_width = __response_curve_B_width;
+
+#define KERNEL_TEX(type, ttype, name) \
+ kg->name = name; \
+ kg->name##_width = name##_width;
+#include "kernel_textures.h"
int x = sx + get_global_id(0);
int y = sy + get_global_id(1);
kernel_film_tonemap(kg, rgba, buffer, pass, resolution, x, y);
}
-__kernel void kernel_ocl_displace(__global uint4 *input, __global float3 *offset, int sx)
+/*__kernel void kernel_ocl_displace(__global uint4 *input, __global float3 *offset, int sx)
{
int x = sx + get_global_id(0);
kernel_displace(input, offset, x);
-}
+}*/
#define __device_inline __device__ __inline__
#define __global
#define __shared __shared__
-#define __constant __constant__
+#define __constant
/* No assert supported for CUDA */
#define __device
#define __device_inline
+#define kernel_assert(cond)
+
__device float kernel_tex_interp_(__global float *data, int width, float x)
{
x = clamp(x, 0.0f, 1.0f)*width;
return (1.0f - t)*data[index] + t*data[nindex];
}
+#define make_float3(x, y, z) ((float3)(x, y, z)) /* todo 1.1 */
+
+#define __uint_as_float(x) as_float(x)
+#define __float_as_uint(x) as_uint(x)
+#define __int_as_float(x) as_float(x)
+#define __float_as_int(x) as_int(x)
+
#define kernel_data (*kg->data)
#define kernel_tex_interp(t, x) \
- kernel_tex_interp_(kg->t, kg->t##_width, x);
+ kernel_tex_interp_(kg->t, kg->t##_width, x)
+#define kernel_tex_fetch(t, index) \
+ kg->t[index]
+
+#define NULL 0
CCL_NAMESPACE_END
/* Constant Globals */
-#ifdef __KERNEL_CPU__
-
-#ifdef WITH_OSL
-#include "osl_globals.h"
-#endif
-
CCL_NAMESPACE_BEGIN
/* On the CPU, we pass along the struct KernelGlobals to nearly everywhere in
the kernel, to access constant data. These are all stored as "textures", but
these are really just standard arrays. We can't use actually globals because
multiple renders may be running inside the same process. */
-typedef struct KernelGlobals {
-#else
+#ifdef __KERNEL_CPU__
-/* On the GPU, constant memory textures must be globals, so we can't put them
- into a struct. As a result we don't actually use this struct and use actual
- globals and simply pass along a NULL pointer everywhere, which we hope gets
- optimized out. */
-#ifdef __KERNEL_CUDA__
-typedef struct KernelGlobals {} KernelGlobals;
+#ifdef WITH_OSL
+//#include "osl_globals.h"
#endif
+typedef struct KernelGlobals {
+
+#define KERNEL_TEX(type, ttype, name) ttype name;
+#define KERNEL_IMAGE_TEX(type, ttype, name) ttype name;
+#include "kernel_textures.h"
+
+ KernelData __data;
+
+#ifdef WITH_OSL
+ /* On the CPU, we also have the OSL globals here. Most data structures are shared
+ with SVM, the difference is in the shaders and object/mesh attributes. */
+ //OSLGlobals osl;
#endif
-/* globals */
-__constant KernelData __data;
-
-#ifndef __KERNEL_OPENCL__
-
-/* bvh */
-texture_float4 __bvh_nodes;
-texture_float4 __tri_woop;
-texture_uint __prim_index;
-texture_uint __prim_object;
-texture_uint __object_node;
-
-/* objects */
-texture_float4 __objects;
-
-/* triangles */
-texture_float4 __tri_normal;
-texture_float4 __tri_vnormal;
-texture_float4 __tri_vindex;
-texture_float4 __tri_verts;
-
-/* attributes */
-texture_uint4 __attributes_map;
-texture_float __attributes_float;
-texture_float4 __attributes_float3;
-
-/* lights */
-texture_float4 __light_distribution;
-texture_float4 __light_point;
-
-/* shaders */
-texture_uint4 __svm_nodes;
-
-/* camera/film */
-texture_float __filter_table;
-texture_float __response_curve_R;
-texture_float __response_curve_G;
-texture_float __response_curve_B;
-
-/* sobol */
-texture_uint __sobol_directions;
-
-/* image */
-texture_image_uchar4 __tex_image_000;
-texture_image_uchar4 __tex_image_001;
-texture_image_uchar4 __tex_image_002;
-texture_image_uchar4 __tex_image_003;
-texture_image_uchar4 __tex_image_004;
-texture_image_uchar4 __tex_image_005;
-texture_image_uchar4 __tex_image_006;
-texture_image_uchar4 __tex_image_007;
-texture_image_uchar4 __tex_image_008;
-texture_image_uchar4 __tex_image_009;
-texture_image_uchar4 __tex_image_010;
-texture_image_uchar4 __tex_image_011;
-texture_image_uchar4 __tex_image_012;
-texture_image_uchar4 __tex_image_013;
-texture_image_uchar4 __tex_image_014;
-texture_image_uchar4 __tex_image_015;
-texture_image_uchar4 __tex_image_016;
-texture_image_uchar4 __tex_image_017;
-texture_image_uchar4 __tex_image_018;
-texture_image_uchar4 __tex_image_019;
-texture_image_uchar4 __tex_image_020;
-texture_image_uchar4 __tex_image_021;
-texture_image_uchar4 __tex_image_022;
-texture_image_uchar4 __tex_image_023;
-texture_image_uchar4 __tex_image_024;
-texture_image_uchar4 __tex_image_025;
-texture_image_uchar4 __tex_image_026;
-texture_image_uchar4 __tex_image_027;
-texture_image_uchar4 __tex_image_028;
-texture_image_uchar4 __tex_image_029;
-texture_image_uchar4 __tex_image_030;
-texture_image_uchar4 __tex_image_031;
-texture_image_uchar4 __tex_image_032;
-texture_image_uchar4 __tex_image_033;
-texture_image_uchar4 __tex_image_034;
-texture_image_uchar4 __tex_image_035;
-texture_image_uchar4 __tex_image_036;
-texture_image_uchar4 __tex_image_037;
-texture_image_uchar4 __tex_image_038;
-texture_image_uchar4 __tex_image_039;
-texture_image_uchar4 __tex_image_040;
-texture_image_uchar4 __tex_image_041;
-texture_image_uchar4 __tex_image_042;
-texture_image_uchar4 __tex_image_043;
-texture_image_uchar4 __tex_image_044;
-texture_image_uchar4 __tex_image_045;
-texture_image_uchar4 __tex_image_046;
-texture_image_uchar4 __tex_image_047;
-texture_image_uchar4 __tex_image_048;
-texture_image_uchar4 __tex_image_049;
-texture_image_uchar4 __tex_image_050;
-texture_image_uchar4 __tex_image_051;
-texture_image_uchar4 __tex_image_052;
-texture_image_uchar4 __tex_image_053;
-texture_image_uchar4 __tex_image_054;
-texture_image_uchar4 __tex_image_055;
-texture_image_uchar4 __tex_image_056;
-texture_image_uchar4 __tex_image_057;
-texture_image_uchar4 __tex_image_058;
-texture_image_uchar4 __tex_image_059;
-texture_image_uchar4 __tex_image_060;
-texture_image_uchar4 __tex_image_061;
-texture_image_uchar4 __tex_image_062;
-texture_image_uchar4 __tex_image_063;
-texture_image_uchar4 __tex_image_064;
-texture_image_uchar4 __tex_image_065;
-texture_image_uchar4 __tex_image_066;
-texture_image_uchar4 __tex_image_067;
-texture_image_uchar4 __tex_image_068;
-texture_image_uchar4 __tex_image_069;
-texture_image_uchar4 __tex_image_070;
-texture_image_uchar4 __tex_image_071;
-texture_image_uchar4 __tex_image_072;
-texture_image_uchar4 __tex_image_073;
-texture_image_uchar4 __tex_image_074;
-texture_image_uchar4 __tex_image_075;
-texture_image_uchar4 __tex_image_076;
-texture_image_uchar4 __tex_image_077;
-texture_image_uchar4 __tex_image_078;
-texture_image_uchar4 __tex_image_079;
-texture_image_uchar4 __tex_image_080;
-texture_image_uchar4 __tex_image_081;
-texture_image_uchar4 __tex_image_082;
-texture_image_uchar4 __tex_image_083;
-texture_image_uchar4 __tex_image_084;
-texture_image_uchar4 __tex_image_085;
-texture_image_uchar4 __tex_image_086;
-texture_image_uchar4 __tex_image_087;
-texture_image_uchar4 __tex_image_088;
-texture_image_uchar4 __tex_image_089;
-texture_image_uchar4 __tex_image_090;
-texture_image_uchar4 __tex_image_091;
-texture_image_uchar4 __tex_image_092;
-texture_image_uchar4 __tex_image_093;
-texture_image_uchar4 __tex_image_094;
-texture_image_uchar4 __tex_image_095;
-texture_image_uchar4 __tex_image_096;
-texture_image_uchar4 __tex_image_097;
-texture_image_uchar4 __tex_image_098;
-texture_image_uchar4 __tex_image_099;
+} KernelGLobals;
#endif
-#ifdef __KERNEL_CPU__
+/* For CUDA, constant memory textures must be globals, so we can't put them
+ into a struct. As a result we don't actually use this struct and use actual
+ globals and simply pass along a NULL pointer everywhere, which we hope gets
+ optimized out. */
-#ifdef WITH_OSL
+#ifdef __KERNEL_CUDA__
-/* On the CPU, we also have the OSL globals here. Most data structures are shared
- with SVM, the difference is in the shaders and object/mesh attributes. */
+__constant__ KernelData __data;
+typedef struct KernelGlobals {} KernelGlobals;
-OSLGlobals osl;
+#define KERNEL_TEX(type, ttype, name) ttype name;
+#define KERNEL_IMAGE_TEX(type, ttype, name) ttype name;
+#include "kernel_textures.h"
#endif
+/* OpenCL */
+
+#ifdef __KERNEL_OPENCL__
+
+typedef struct KernelGlobals {
+ __constant KernelData *data;
+
+#define KERNEL_TEX(type, ttype, name) \
+ __global type *name; \
+ int name##_width;
+#include "kernel_textures.h"
} KernelGlobals;
+
#endif
CCL_NAMESPACE_END
CCL_NAMESPACE_BEGIN
-struct LightSample {
+typedef struct LightSample {
float3 P;
float3 Ng;
int object;
int prim;
int shader;
float weight;
-};
+} LightSample;
/* Point Light */
--- /dev/null
+
+#ifndef KERNEL_TEX
+#define KERNEL_TEX(type, ttype, name)
+#endif
+
+#ifndef KERNEL_IMAGE_TEX
+#define KERNEL_IMAGE_TEX(type, ttype, name)
+#endif
+
+
+/* bvh */
+KERNEL_TEX(float4, texture_float4, __bvh_nodes)
+KERNEL_TEX(float4, texture_float4, __tri_woop)
+KERNEL_TEX(uint, texture_uint, __prim_index)
+KERNEL_TEX(uint, texture_uint, __prim_object)
+KERNEL_TEX(uint, texture_uint, __object_node)
+
+/* objects */
+KERNEL_TEX(float4, texture_float4, __objects)
+
+/* triangles */
+KERNEL_TEX(float4, texture_float4, __tri_normal)
+KERNEL_TEX(float4, texture_float4, __tri_vnormal)
+KERNEL_TEX(float4, texture_float4, __tri_vindex)
+KERNEL_TEX(float4, texture_float4, __tri_verts)
+
+/* attributes */
+KERNEL_TEX(uint4, texture_uint4, __attributes_map)
+KERNEL_TEX(float, texture_float, __attributes_float)
+KERNEL_TEX(float4, texture_float4, __attributes_float3)
+
+/* lights */
+KERNEL_TEX(float4, texture_float4, __light_distribution)
+KERNEL_TEX(float4, texture_float4, __light_point)
+
+/* shaders */
+KERNEL_TEX(uint4, texture_uint4, __svm_nodes)
+
+/* camera/film */
+KERNEL_TEX(float, texture_float, __filter_table)
+KERNEL_TEX(float, texture_float, __response_curve_R)
+KERNEL_TEX(float, texture_float, __response_curve_G)
+KERNEL_TEX(float, texture_float, __response_curve_B)
+
+/* sobol */
+KERNEL_TEX(uint, texture_uint, __sobol_directions)
+
+/* image */
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_000)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_001)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_002)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_003)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_004)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_005)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_006)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_007)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_008)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_009)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_010)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_011)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_012)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_013)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_014)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_015)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_016)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_017)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_018)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_019)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_020)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_021)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_022)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_023)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_024)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_025)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_026)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_027)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_028)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_029)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_030)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_031)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_032)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_033)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_034)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_035)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_036)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_037)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_038)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_039)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_040)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_041)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_042)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_043)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_044)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_045)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_046)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_047)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_048)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_049)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_050)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_051)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_052)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_053)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_054)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_055)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_056)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_057)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_058)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_059)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_060)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_061)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_062)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_063)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_064)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_065)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_066)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_067)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_068)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_069)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_070)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_071)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_072)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_073)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_074)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_075)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_076)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_077)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_078)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_079)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_080)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_081)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_082)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_083)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_084)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_085)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_086)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_087)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_088)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_089)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_090)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_091)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_092)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_093)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_094)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_095)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_096)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_097)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_098)
+KERNEL_IMAGE_TEX(uchar4, texture_image_uchar4, __tex_image_099)
+
+#undef KERNEL_TEX
+#undef KERNEL_IMAGE_TEX
+
+
__device_inline float3 triangle_point_MT(KernelGlobals *kg, int tri_index, float u, float v)
{
/* load triangle vertices */
- float3 tri_vindex = as_float3(kernel_tex_fetch(__tri_vindex, tri_index));
+ float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, tri_index));
- float3 v0 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
- float3 v1 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
- float3 v2 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
+ float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
+ float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
+ float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
/* compute point */
float t = 1.0f - u - v;
{
#if 0
/* load triangle vertices */
- float3 tri_vindex = as_float3(kernel_tex_fetch(__tri_vindex, tri_index));
+ float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, tri_index));
- float3 v0 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
- float3 v1 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
- float3 v2 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
+ float3 v0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
+ float3 v1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
+ float3 v2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
/* compute normal */
return normalize(cross(v2 - v0, v1 - v0));
__device_inline float3 triangle_smooth_normal(KernelGlobals *kg, int tri_index, float u, float v)
{
/* load triangle vertices */
- float3 tri_vindex = as_float3(kernel_tex_fetch(__tri_vindex, tri_index));
+ float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, tri_index));
- float3 n0 = as_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.x)));
- float3 n1 = as_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.y)));
- float3 n2 = as_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.z)));
+ float3 n0 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.x)));
+ float3 n1 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.y)));
+ float3 n2 = float4_to_float3(kernel_tex_fetch(__tri_vnormal, __float_as_int(tri_vindex.z)));
return normalize((1.0f - u - v)*n2 + u*n0 + v*n1);
}
__device_inline void triangle_dPdudv(KernelGlobals *kg, float3 *dPdu, float3 *dPdv, int tri)
{
/* fetch triangle vertex coordinates */
- float3 tri_vindex = as_float3(kernel_tex_fetch(__tri_vindex, tri));
+ float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, tri));
- float3 p0 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
- float3 p1 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
- float3 p2 = as_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
+ float3 p0 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.x)));
+ float3 p1 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.y)));
+ float3 p2 = float4_to_float3(kernel_tex_fetch(__tri_verts, __float_as_int(tri_vindex.z)));
/* compute derivatives of P w.r.t. uv */
*dPdu = (p0 - p2);
return kernel_tex_fetch(__attributes_float, offset + sd->prim);
}
else if(elem == ATTR_ELEMENT_VERTEX) {
- float3 tri_vindex = as_float3(kernel_tex_fetch(__tri_vindex, sd->prim));
+ float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, sd->prim));
float f0 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.x));
float f1 = kernel_tex_fetch(__attributes_float, offset + __float_as_int(tri_vindex.y));
if(dx) *dx = make_float3(0.0f, 0.0f, 0.0f);
if(dy) *dy = make_float3(0.0f, 0.0f, 0.0f);
- return as_float3(kernel_tex_fetch(__attributes_float3, offset + sd->prim));
+ return float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + sd->prim));
}
else if(elem == ATTR_ELEMENT_VERTEX) {
- float3 tri_vindex = as_float3(kernel_tex_fetch(__tri_vindex, sd->prim));
+ float3 tri_vindex = float4_to_float3(kernel_tex_fetch(__tri_vindex, sd->prim));
- float3 f0 = as_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.x)));
- float3 f1 = as_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.y)));
- float3 f2 = as_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.z)));
+ float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.x)));
+ float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.y)));
+ float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, offset + __float_as_int(tri_vindex.z)));
#ifdef __RAY_DIFFERENTIALS__
if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
}
else if(elem == ATTR_ELEMENT_CORNER) {
int tri = offset + sd->prim*3;
- float3 f0 = as_float3(kernel_tex_fetch(__attributes_float3, tri + 0));
- float3 f1 = as_float3(kernel_tex_fetch(__attributes_float3, tri + 1));
- float3 f2 = as_float3(kernel_tex_fetch(__attributes_float3, tri + 2));
+ float3 f0 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 0));
+ float3 f1 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 1));
+ float3 f2 = float4_to_float3(kernel_tex_fetch(__attributes_float3, tri + 2));
#ifdef __RAY_DIFFERENTIALS__
if(dx) *dx = sd->du.dx*f0 + sd->dv.dx*f1 - (sd->du.dx + sd->dv.dx)*f2;
#include "kernel_math.h"
-#ifndef __KERNEL_OPENCL__
-
-#include "svm_types.h"
-
-#endif
+#include "svm/svm_types.h"
CCL_NAMESPACE_BEGIN
/* SVM closure data. we always sample a single closure, to get fixed
* memory usage, svm_closure_data contains closure parameters. */
-#ifndef __KERNEL_OPENCL__
ClosureType svm_closure;
-#endif
float3 svm_closure_weight;
float svm_closure_data[3]; /* CUDA gives compile error if out of bounds */
float shutterclose;
/* differentials */
- float3 dx, dy;
+ float3 dx;
+ float pad1;
+ float3 dy;
+ float pad2;
/* clipping */
float nearclip;
float cliplength;
+ float pad3, pad4;
/* more matrices */
Transform screentoworld;
typedef struct KernelSunSky {
/* sun direction in spherical and cartesian */
- float theta, phi;
+ float theta, phi, pad3, pad4;
float3 dir;
float pad;
/* perez function parameters */
- float zenith_Y, zenith_x, zenith_y;
+ float zenith_Y, zenith_x, zenith_y, pad2;
float perez_Y[5], perez_x[5], perez_y[5];
+ float pad5;
} KernelSunSky;
typedef struct KernelIntegrator {
float blur_caustics;
/* padding */
- int pad;
+ int pad[2];
} KernelIntegrator;
typedef struct KernelBVH {
// TODO: find a better approximation for the retroreflective bounce
*domega_in_dx = (2 * dot(m_N, sd->dI.dx)) * m_N - sd->dI.dx;
*domega_in_dy = (2 * dot(m_N, sd->dI.dy)) * m_N - sd->dI.dy;
- *domega_in_dx *= 125;
- *domega_in_dy *= 125;
+ *domega_in_dx *= 125.0f;
+ *domega_in_dy *= 125.0f;
#endif
} else
*pdf = 0.0f;
// TODO: find a better approximation for the diffuse bounce
*domega_in_dx = (2 * dot(m_N, sd->dI.dx)) * m_N - sd->dI.dx;
*domega_in_dy = (2 * dot(m_N, sd->dI.dy)) * m_N - sd->dI.dy;
- *domega_in_dx *= 125;
- *domega_in_dy *= 125;
+ *domega_in_dx *= 125.0f;
+ *domega_in_dy *= 125.0f;
#endif
}
else
// TODO: find a better approximation for the diffuse bounce
*domega_in_dx = (2 * dot(m_N, sd->dI.dx)) * m_N - sd->dI.dx;
*domega_in_dy = (2 * dot(m_N, sd->dI.dy)) * m_N - sd->dI.dy;
- *domega_in_dx *= -125;
- *domega_in_dy *= -125;
+ *domega_in_dx *= -125.0f;
+ *domega_in_dy *= -125.0f;
#endif
} else
*pdf = 0;
// derivatives a bit bigger. In theory this varies with the
// roughness but the exact relationship is complex and
// requires more ops than are practical.
- *domega_in_dx *= 10;
- *domega_in_dy *= 10;
+ *domega_in_dx *= 10.0f;
+ *domega_in_dy *= 10.0f;
#endif
}
}
// derivatives a bit bigger. In theory this varies with the
// roughness but the exact relationship is complex and
// requires more ops than are practical.
- *domega_in_dx *= 10;
- *domega_in_dy *= 10;
+ *domega_in_dx *= 10.0f;
+ *domega_in_dy *= 10.0f;
#endif
}
}
// derivatives a bit bigger. In theory this varies with the
// roughness but the exact relationship is complex and
// requires more ops than are practical.
- *domega_in_dx *= 10;
- *domega_in_dy *= 10;
+ *domega_in_dx *= 10.0f;
+ *domega_in_dy *= 10.0f;
#endif
}
}
// derivatives a bit bigger. In theory this varies with the
// roughness but the exact relationship is complex and
// requires more ops than are practical.
- *domega_in_dx *= 10;
- *domega_in_dy *= 10;
+ *domega_in_dx *= 10.0f;
+ *domega_in_dy *= 10.0f;
#endif
}
}
// derivatives a bit bigger. In theory this varies with the
// roughness but the exact relationship is complex and
// requires more ops than are practical.
- *domega_in_dx *= 10;
- *domega_in_dy *= 10;
+ *domega_in_dx *= 10.0f;
+ *domega_in_dy *= 10.0f;
#endif
}
}
// derivatives a bit bigger. In theory this varies with the
// exponent but the exact relationship is complex and
// requires more ops than are practical.
- *domega_in_dx *= 10;
- *domega_in_dy *= 10;
+ *domega_in_dx *= 10.0f;
+ *domega_in_dy *= 10.0f;
#endif
}
}
// TODO: find a better approximation for the diffuse bounce
*domega_in_dx = (2 * dot(m_N, sd->dI.dx)) * m_N - sd->dI.dx;
*domega_in_dy = (2 * dot(m_N, sd->dI.dy)) * m_N - sd->dI.dy;
- *domega_in_dx *= 125;
- *domega_in_dy *= 125;
+ *domega_in_dx *= 125.0f;
+ *domega_in_dy *= 125.0f;
#endif
} else
pdf = 0;
return r*r;
}
else if(type == NODE_BLEND_EASING) {
- float r = min(fmaxf((1.0f + x)/2.0f, 0.0f), 1.0f);
+ float r = fminf(fmaxf((1.0f + x)/2.0f, 0.0f), 1.0f);
float t = r*r;
return (3.0f*t - 2.0f*t*r);
float3 surfgrad = (h_x - h_c)*Rx + (h_y - h_c)*Ry;
surfgrad *= 0.1f; /* todo: remove this factor */
-
- sd->N = normalize(fabsf(det)*sd->N - signf(det)*surfgrad);
+
+ float absdet = fabsf(det);
+ sd->N = normalize(absdet*sd->N - signf(det)*surfgrad);
#endif
}
__device float svm_distorted_noise(float3 p, float size, NodeNoiseBasis basis, NodeNoiseBasis distortion_basis, float distortion)
{
float3 r;
+ float3 offset = make_float3(13.5f, 13.5f, 13.5f);
p /= size;
- r.x = noise_basis(p + make_float3(13.5f, 13.5f, 13.5f), basis) * distortion;
+ r.x = noise_basis(p + offset, basis) * distortion;
r.y = noise_basis(p, basis) * distortion;
- r.z = noise_basis(p - make_float3(13.5f, 13.5f, 13.5f), basis) * distortion;
+ r.z = noise_basis(p - offset, basis) * distortion;
return noise_basis(p + r, distortion_basis); /* distorted-domain noise */
}
also note that cuda has 128 textures limit, we use 100 now, since
we still need some for other storage */
+#ifdef __KERNEL_OPENCL__
+ r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); /* todo */
+#else
switch(id) {
case 0: r = kernel_tex_image_interp(__tex_image_000, x, y); break;
case 1: r = kernel_tex_image_interp(__tex_image_001, x, y); break;
kernel_assert(0);
return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
}
+#endif
return r;
}
float4 f = svm_image_texture(kg, id, co.x, co.y);
float3 r = make_float3(f.x, f.y, f.z);
- if(srgb)
- r = color_srgb_to_scene_linear(r);
+ if(srgb) {
+ r.x = color_srgb_to_scene_linear(r.x);
+ r.y = color_srgb_to_scene_linear(r.y);
+ r.z = color_srgb_to_scene_linear(r.z);
+ }
stack_store_float3(stack, out_offset, r);
}
float4 f = svm_image_texture(kg, id, u, v);
float3 r = make_float3(f.x, f.y, f.z);
- if(srgb)
- r = color_srgb_to_scene_linear(r);
+ if(srgb) {
+ r.x = color_srgb_to_scene_linear(r.x);
+ r.y = color_srgb_to_scene_linear(r.y);
+ r.z = color_srgb_to_scene_linear(r.z);
+ }
stack_store_float3(stack, out_offset, r);
}
h = 0.0f;
}
else {
- c = (make_float3(cmax, cmax, cmax) - rgb)/cdelta;
+ float3 cmax3 = make_float3(cmax, cmax, cmax);
+ c = (cmax3 - rgb)/cdelta;
if(rgb.x == cmax) h = c.z - c.y;
else if(rgb.y == cmax) h = 2.0f + c.x - c.z;
return rgb;
}
+__device float3 svm_lerp(const float3 a, const float3 b, float t)
+{
+ return (a * (1.0f - t) + b * t);
+}
+
__device float3 svm_mix_blend(float t, float3 col1, float3 col2)
{
- return lerp(col1, col2, t);
+ return svm_lerp(col1, col2, t);
}
__device float3 svm_mix_add(float t, float3 col1, float3 col2)
{
- return lerp(col1, col1 + col2, t);
+ return svm_lerp(col1, col1 + col2, t);
}
__device float3 svm_mix_mul(float t, float3 col1, float3 col2)
{
- return lerp(col1, col1 * col2, t);
+ return svm_lerp(col1, col1 * col2, t);
}
__device float3 svm_mix_screen(float t, float3 col1, float3 col2)
{
float tm = 1.0f - t;
+ float3 one = make_float3(1.0f, 1.0f, 1.0f);
+ float3 tm3 = make_float3(tm, tm, tm);
- return make_float3(1.0f, 1.0f, 1.0f) - (make_float3(tm, tm, tm) + t*(make_float3(1.0f, 1.0f, 1.0f) - col2))*(make_float3(1.0f, 1.0f, 1.0f) - col1);
+ return one - (tm3 + t*(one - col2))*(one - col1);
}
__device float3 svm_mix_overlay(float t, float3 col1, float3 col2)
__device float3 svm_mix_sub(float t, float3 col1, float3 col2)
{
- return lerp(col1, col1 - col2, t);
+ return svm_lerp(col1, col1 - col2, t);
}
__device float3 svm_mix_div(float t, float3 col1, float3 col2)
__device float3 svm_mix_diff(float t, float3 col1, float3 col2)
{
- return lerp(col1, fabs(col1 - col2), t);
+ return svm_lerp(col1, fabs(col1 - col2), t);
}
__device float3 svm_mix_dark(float t, float3 col1, float3 col2)
hsv.x = hsv2.x;
float3 tmp = hsv_to_rgb(hsv);
- outcol = lerp(outcol, tmp, t);
+ outcol = svm_lerp(outcol, tmp, t);
}
return outcol;
hsv.y = hsv2.y;
float3 tmp = hsv_to_rgb(hsv);
- outcol = lerp(outcol, tmp, t);
+ outcol = svm_lerp(outcol, tmp, t);
}
return outcol;
return safe_acosf(cospsi);
}
-__device float sky_perez_function(float lam[5], float theta, float gamma)
+__device float sky_perez_function(__constant float *lam, float theta, float gamma)
{
float ctheta = cosf(theta);
float cgamma = cosf(gamma);
float3 pd = p - (vp + ip);
float d = voronoi_distance(distance_metric, pd, e);
- vp += make_float3((float)xx, (float)yy, (float)zz);
+ vp += ip;
if(d < da[0]) {
da[3] = da[2];
* happens i have no idea, but consecutive values are problematic, maybe it
* generates an incorrect jump table. */
-enum NodeType {
+typedef enum NodeType {
NODE_END = 0,
NODE_CLOSURE_BSDF = 100,
NODE_CLOSURE_EMISSION = 200,
NODE_ATTR_BUMP_DX = 4400,
NODE_ATTR_BUMP_DY = 4500,
NODE_TEX_ENVIRONMENT = 4600
-};
+} NodeType;
-enum NodeAttributeType {
+typedef enum NodeAttributeType {
NODE_ATTR_FLOAT = 0,
NODE_ATTR_FLOAT3
-};
+} NodeAttributeType;
-enum NodeGeometry {
+typedef enum NodeGeometry {
NODE_GEOM_P = 0,
NODE_GEOM_N,
NODE_GEOM_T,
NODE_GEOM_I,
NODE_GEOM_Ng,
NODE_GEOM_uv
-};
+} NodeGeometry;
-enum NodeLightPath {
+typedef enum NodeLightPath {
NODE_LP_camera = 0,
NODE_LP_shadow,
NODE_LP_diffuse,
NODE_LP_reflection,
NODE_LP_transmission,
NODE_LP_backfacing
-};
+} NodeLightPath;
-enum NodeTexCoord {
+typedef enum NodeTexCoord {
NODE_TEXCO_OBJECT,
NODE_TEXCO_CAMERA,
NODE_TEXCO_WINDOW,
NODE_TEXCO_REFLECTION
-};
+} NodeTexCoord;
-enum NodeMix {
+typedef enum NodeMix {
NODE_MIX_BLEND = 0,
NODE_MIX_ADD,
NODE_MIX_MUL,
NODE_MIX_COLOR,
NODE_MIX_SOFT,
NODE_MIX_LINEAR
-};
+} NodeMix;
-enum NodeMath {
+typedef enum NodeMath {
NODE_MATH_ADD,
NODE_MATH_SUBTRACT,
NODE_MATH_MULTIPLY,
NODE_MATH_ROUND,
NODE_MATH_LESS_THAN,
NODE_MATH_GREATER_THAN
-};
+} NodeMath;
-enum NodeVectorMath {
+typedef enum NodeVectorMath {
NODE_VECTOR_MATH_ADD,
NODE_VECTOR_MATH_SUBTRACT,
NODE_VECTOR_MATH_AVERAGE,
NODE_VECTOR_MATH_DOT_PRODUCT,
NODE_VECTOR_MATH_CROSS_PRODUCT,
NODE_VECTOR_MATH_NORMALIZE
-};
+} NodeVectorMath;
-enum NodeConvert {
+typedef enum NodeConvert {
NODE_CONVERT_FV,
NODE_CONVERT_CF,
NODE_CONVERT_VF
-};
+} NodeConvert;
-enum NodeDistanceMetric {
+typedef enum NodeDistanceMetric {
NODE_VORONOI_DISTANCE_SQUARED,
NODE_VORONOI_ACTUAL_DISTANCE,
NODE_VORONOI_MANHATTAN,
NODE_VORONOI_MINKOVSKY_H,
NODE_VORONOI_MINKOVSKY_4,
NODE_VORONOI_MINKOVSKY
-};
+} NodeDistanceMetric;
-enum NodeNoiseBasis {
+typedef enum NodeNoiseBasis {
NODE_NOISE_PERLIN,
NODE_NOISE_VORONOI_F1,
NODE_NOISE_VORONOI_F2,
NODE_NOISE_VORONOI_F2_F1,
NODE_NOISE_VORONOI_CRACKLE,
NODE_NOISE_CELL_NOISE
-};
+} NodeNoiseBasis;
-enum NodeWaveType {
+typedef enum NodeWaveType {
NODE_WAVE_SINE,
NODE_WAVE_SAW,
NODE_WAVE_TRI
-};
+} NodeWaveType;
-enum NodeMusgraveType {
+typedef enum NodeMusgraveType {
NODE_MUSGRAVE_MULTIFRACTAL,
NODE_MUSGRAVE_FBM,
NODE_MUSGRAVE_HYBRID_MULTIFRACTAL,
NODE_MUSGRAVE_RIDGED_MULTIFRACTAL,
NODE_MUSGRAVE_HETERO_TERRAIN
-};
+} NodeMusgraveType;
-enum NodeWoodType {
+typedef enum NodeWoodType {
NODE_WOOD_BANDS,
NODE_WOOD_RINGS,
NODE_WOOD_BAND_NOISE,
NODE_WOOD_RING_NOISE
-};
+} NodeWoodType;
-enum NodeBlendType {
+typedef enum NodeBlendType {
NODE_BLEND_LINEAR,
NODE_BLEND_QUADRATIC,
NODE_BLEND_EASING,
NODE_BLEND_RADIAL,
NODE_BLEND_QUADRATIC_SPHERE,
NODE_BLEND_SPHERICAL
-};
+} NodeBlendType;
-enum NodeBlendAxis {
+typedef enum NodeBlendAxis {
NODE_BLEND_HORIZONTAL,
NODE_BLEND_VERTICAL
-};
+} NodeBlendAxis;
-enum NodeMarbleType {
+typedef enum NodeMarbleType {
NODE_MARBLE_SOFT,
NODE_MARBLE_SHARP,
NODE_MARBLE_SHARPER
-};
+} NodeMarbleType;
-enum NodeStucciType {
+typedef enum NodeStucciType {
NODE_STUCCI_PLASTIC,
NODE_STUCCI_WALL_IN,
NODE_STUCCI_WALL_OUT
-};
+} NodeStucciType;
-enum NodeVoronoiColoring {
+typedef enum NodeVoronoiColoring {
NODE_VORONOI_INTENSITY,
NODE_VORONOI_POSITION,
NODE_VORONOI_POSITION_OUTLINE,
NODE_VORONOI_POSITION_OUTLINE_INTENSITY
-};
+} NodeVoronoiColoring;
-enum ShaderType {
+typedef enum ShaderType {
SHADER_TYPE_SURFACE,
SHADER_TYPE_VOLUME,
SHADER_TYPE_DISPLACEMENT
-};
+} ShaderType;
/* Closure */
return 1.055f * pow(c, 1.0f/2.4f) - 0.055f;
}
+#ifndef __KERNEL_OPENCL__
+
__device float3 color_srgb_to_scene_linear(float3 c)
{
return make_float3(
color_scene_linear_to_srgb(c.z));
}
+#endif
+
CCL_NAMESPACE_END
#endif /* __UTIL_COLOR_H__ */
return make_float3(fabsf(a.x), fabsf(a.y), fabsf(a.z));
}
-__device_inline float3 as_float3(const float4& a)
+#endif
+
+__device_inline float3 float4_to_float3(const float4 a)
{
return make_float3(a.x, a.y, a.z);
}
-#endif
-
#ifndef __KERNEL_GPU__
__device_inline void print_float3(const char *label, const float3& a)