Cycles: merge of changes from tomato branch.
[blender-staging.git] / intern / cycles / kernel / svm / svm_image.h
index 62e24166970db8375f2214e5e31f0415ee31df98..662419418e3f10399189bb59eb0658abc3552447 100644 (file)
 
 CCL_NAMESPACE_BEGIN
 
-__device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y)
+#ifdef __KERNEL_OPENCL__
+
+/* For OpenCL all images are packed in a single array, and we do manual lookup
+ * and interpolation. */
+
+__device_inline float4 svm_image_texture_read(KernelGlobals *kg, int offset)
 {
-       float4 r;
+       uchar4 r = kernel_tex_fetch(__tex_image_packed, offset);
+       float f = 1.0f/255.0f;
+       return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
+}
 
-       /* not particularly proud of this massive switch, what are the
-          alternatives?
-          - use a single big 1D texture, and do our own lookup/filtering
-          - group by size and use a 3d texture, performance impact
-          - group into larger texture with some padding for correct lerp
+__device_inline int svm_image_texture_wrap_periodic(int x, int width)
+{
+       x %= width;
+       if(x < 0)
+               x += width;
+       return x;
+}
 
-          also note that cuda has 128 textures limit, we use 100 now, since
-          we still need some for other storage */
+__device_inline int svm_image_texture_wrap_clamp(int x, int width)
+{
+       return clamp(x, 0, width-1);
+}
 
-#ifdef __KERNEL_OPENCL__
-       r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); /* todo */
+__device_inline float svm_image_texture_frac(float x, int *ix)
+{
+       int i = (int)x - ((x < 0.0f)? 1: 0);
+       *ix = i;
+       return x - (float)i;
+}
+
+__device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint srgb)
+{
+       uint4 info = kernel_tex_fetch(__tex_image_packed_info, id);
+       uint width = info.x;
+       uint height = info.y;
+       uint offset = info.z;
+       uint periodic = info.w;
+
+       int ix, iy, nix, niy;
+       float tx = svm_image_texture_frac(x*width, &ix);
+       float ty = svm_image_texture_frac(y*height, &iy);
+
+       if(periodic) {
+               ix = svm_image_texture_wrap_periodic(ix, width);
+               iy = svm_image_texture_wrap_periodic(iy, height);
+
+               nix = svm_image_texture_wrap_periodic(ix+1, width);
+               niy = svm_image_texture_wrap_periodic(iy+1, height);
+       }
+       else {
+               ix = svm_image_texture_wrap_clamp(ix, width);
+               iy = svm_image_texture_wrap_clamp(iy, height);
+
+               nix = svm_image_texture_wrap_clamp(ix+1, width);
+               niy = svm_image_texture_wrap_clamp(iy+1, height);
+       }
+
+       float4 r = (1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, offset + ix + iy*width);
+       r += (1.0f - ty)*tx*svm_image_texture_read(kg, offset + nix + iy*width);
+       r += ty*(1.0f - tx)*svm_image_texture_read(kg, offset + ix + niy*width);
+       r += ty*tx*svm_image_texture_read(kg, offset + nix + niy*width);
+
+       if(srgb) {
+               r.x = color_srgb_to_scene_linear(r.x);
+               r.y = color_srgb_to_scene_linear(r.y);
+               r.z = color_srgb_to_scene_linear(r.z);
+       }
+
+       return r;
+}
+
+#else
+
+__device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint srgb)
+{
+       float4 r;
+
+#ifdef __KERNEL_CPU__
+       r = kernel_tex_image_interp(id, x, y);
 #else
+       /* not particularly proud of this massive switch, what are the
+        * alternatives?
+        * - use a single big 1D texture, and do our own lookup/filtering
+        * - group by size and use a 3d texture, performance impact
+        * - group into larger texture with some padding for correct lerp
+        *
+        * also note that cuda has 128 textures limit, we use 100 now, since
+        * we still need some for other storage */
+
        switch(id) {
-               case 0: r = kernel_tex_image_interp(__tex_image_000, x, y); break;
-               case 1: r = kernel_tex_image_interp(__tex_image_001, x, y); break;
-               case 2: r = kernel_tex_image_interp(__tex_image_002, x, y); break;
-               case 3: r = kernel_tex_image_interp(__tex_image_003, x, y); break;
-               case 4: r = kernel_tex_image_interp(__tex_image_004, x, y); break;
+               case 0: r = kernel_tex_image_interp(__tex_image_float_000, x, y); break;
+               case 1: r = kernel_tex_image_interp(__tex_image_float_001, x, y); break;
+               case 2: r = kernel_tex_image_interp(__tex_image_float_002, x, y); break;
+               case 3: r = kernel_tex_image_interp(__tex_image_float_003, x, y); break;
+               case 4: r = kernel_tex_image_interp(__tex_image_float_004, x, y); break;
                case 5: r = kernel_tex_image_interp(__tex_image_005, x, y); break;
                case 6: r = kernel_tex_image_interp(__tex_image_006, x, y); break;
                case 7: r = kernel_tex_image_interp(__tex_image_007, x, y); break;
@@ -141,9 +216,17 @@ __device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y)
        }
 #endif
 
+       if(srgb) {
+               r.x = color_srgb_to_scene_linear(r.x);
+               r.y = color_srgb_to_scene_linear(r.y);
+               r.z = color_srgb_to_scene_linear(r.z);
+       }
+
        return r;
 }
 
+#endif
+
 __device void svm_node_tex_image(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
 {
        uint id = node.y;
@@ -152,42 +235,124 @@ __device void svm_node_tex_image(KernelGlobals *kg, ShaderData *sd, float *stack
        decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &srgb);
 
        float3 co = stack_load_float3(stack, co_offset);
-       float4 f = svm_image_texture(kg, id, co.x, co.y);
-       float3 r = make_float3(f.x, f.y, f.z);
+       float4 f = svm_image_texture(kg, id, co.x, co.y, srgb);
 
-       if(srgb) {
-               r.x = color_srgb_to_scene_linear(r.x);
-               r.y = color_srgb_to_scene_linear(r.y);
-               r.z = color_srgb_to_scene_linear(r.z);
+       if(stack_valid(out_offset))
+               stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z));
+       if(stack_valid(alpha_offset))
+               stack_store_float(stack, alpha_offset, f.w);
+}
+
+__device void svm_node_tex_image_box(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
+{
+       /* get object space normal */
+       float3 N = sd->N;
+
+       N = sd->N;
+       if(sd->object != ~0)
+               object_inverse_normal_transform(kg, sd, &N);
+
+       /* project from direction vector to barycentric coordinates in triangles */
+       N.x = fabsf(N.x);
+       N.y = fabsf(N.y);
+       N.z = fabsf(N.z);
+
+       N /= (N.x + N.y + N.z);
+
+       /* basic idea is to think of this as a triangle, each corner representing
+        * one of the 3 faces of the cube. in the corners we have single textures,
+        * in between we blend between two textures, and in the middle we a blend
+        * between three textures.
+        *
+        * the Nxyz values are the barycentric coordinates in an equilateral
+        * triangle, which in case of blending in the middle has a smaller
+        * equilateral triangle where 3 textures blend. this divides things into
+        * 7 zones, with an if() test for each zone */
+
+       float3 weight = make_float3(0.0f, 0.0f, 0.0f);
+       float blend = __int_as_float(node.w);
+       float limit = 0.5f*(1.0f + blend);
+
+       /* first test for corners with single texture */
+       if(N.x > limit*(N.x + N.y) && N.x > limit*(N.x + N.z)) {
+               weight.x = 1.0f;
+       }
+       else if(N.y > limit*(N.x + N.y) && N.y > limit*(N.y + N.z)) {
+               weight.y = 1.0f;
+       }
+       else if(N.z > limit*(N.x + N.z) && N.z > limit*(N.y + N.z)) {
+               weight.z = 1.0f;
+       }
+       else if(blend > 0.0f) {
+               /* in case of blending, test for mixes between two textures */
+               if(N.z < (1.0f - limit)*(N.y + N.x)) {
+                       weight.x = N.x/(N.x + N.y);
+                       weight.x = clamp((weight.x - 0.5f*(1.0f - blend))/blend, 0.0f, 1.0f);
+                       weight.y = 1.0f - weight.x;
+               }
+               else if(N.x < (1.0f - limit)*(N.y + N.z)) {
+                       weight.y = N.y/(N.y + N.z);
+                       weight.y = clamp((weight.y - 0.5f*(1.0f - blend))/blend, 0.0f, 1.0f);
+                       weight.z = 1.0f - weight.y;
+               }
+               else if(N.y < (1.0f - limit)*(N.x + N.z)) {
+                       weight.x = N.x/(N.x + N.z);
+                       weight.x = clamp((weight.x - 0.5f*(1.0f - blend))/blend, 0.0f, 1.0f);
+                       weight.z = 1.0f - weight.x;
+               }
+               else {
+                       /* last case, we have a mix between three */
+                       weight.x = ((2.0f - limit)*N.x + (limit - 1.0f))/(2.0f*limit - 1.0f);
+                       weight.y = ((2.0f - limit)*N.y + (limit - 1.0f))/(2.0f*limit - 1.0f);
+                       weight.z = ((2.0f - limit)*N.z + (limit - 1.0f))/(2.0f*limit - 1.0f);
+               }
        }
 
+       /* now fetch textures */
+       uint co_offset, out_offset, alpha_offset, srgb;
+       decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &srgb);
+
+       float3 co = stack_load_float3(stack, co_offset);
+       uint id = node.y;
+
+       float4 f = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
+
+       if(weight.x > 0.0f)
+               f += weight.x*svm_image_texture(kg, id, co.y, co.z, srgb);
+       if(weight.y > 0.0f)
+               f += weight.y*svm_image_texture(kg, id, co.x, co.z, srgb);
+       if(weight.z > 0.0f)
+               f += weight.z*svm_image_texture(kg, id, co.y, co.x, srgb);
+
        if(stack_valid(out_offset))
-               stack_store_float3(stack, out_offset, r);
+               stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z));
        if(stack_valid(alpha_offset))
                stack_store_float(stack, alpha_offset, f.w);
 }
 
+
 __device void svm_node_tex_environment(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
 {
        uint id = node.y;
        uint co_offset, out_offset, alpha_offset, srgb;
+       uint projection = node.w;
 
        decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &srgb);
 
        float3 co = stack_load_float3(stack, co_offset);
-       float u = (atan2f(co.y, co.x) + M_PI_F)/(2*M_PI_F);
-       float v = atan2f(co.z, hypotf(co.x, co.y))/M_PI_F + 0.5f;
-       float4 f = svm_image_texture(kg, id, u, v);
-       float3 r = make_float3(f.x, f.y, f.z);
+       float2 uv;
 
-       if(srgb) {
-               r.x = color_srgb_to_scene_linear(r.x);
-               r.y = color_srgb_to_scene_linear(r.y);
-               r.z = color_srgb_to_scene_linear(r.z);
-       }
+       co = normalize(co);
+       
+       if(projection == 0)
+               uv = direction_to_equirectangular(co);
+       else
+               uv = direction_to_mirrorball(co);
+
+       float4 f = svm_image_texture(kg, id, uv.x, uv.y, srgb);
 
        if(stack_valid(out_offset))
-               stack_store_float3(stack, out_offset, r);
+               stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z));
        if(stack_valid(alpha_offset))
                stack_store_float(stack, alpha_offset, f.w);
 }