2 * Copyright 2011-2013 Blender Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 /* Float4 textures on various devices. */
20 #if defined(__KERNEL_CPU__)
21 # define TEX_NUM_FLOAT4_IMAGES TEX_NUM_FLOAT4_CPU
22 #elif defined(__KERNEL_CUDA__)
23 # if __CUDA_ARCH__ < 300
24 # define TEX_NUM_FLOAT4_IMAGES TEX_NUM_FLOAT4_CUDA
26 # define TEX_NUM_FLOAT4_IMAGES TEX_NUM_FLOAT4_CUDA_KEPLER
29 # define TEX_NUM_FLOAT4_IMAGES TEX_NUM_FLOAT4_OPENCL
32 #ifdef __KERNEL_OPENCL__
34 /* For OpenCL all images are packed in a single array, and we do manual lookup
35 * and interpolation. */
37 ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int offset)
39 if(id >= TEX_NUM_FLOAT4_IMAGES) {
40 uchar4 r = kernel_tex_fetch(__tex_image_byte4_packed, offset);
41 float f = 1.0f/255.0f;
42 return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
45 return kernel_tex_fetch(__tex_image_float4_packed, offset);
49 ccl_device_inline int svm_image_texture_wrap_periodic(int x, int width)
57 ccl_device_inline int svm_image_texture_wrap_clamp(int x, int width)
59 return clamp(x, 0, width-1);
62 ccl_device_inline float svm_image_texture_frac(float x, int *ix)
64 int i = float_to_int(x) - ((x < 0.0f)? 1: 0);
69 ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint srgb, uint use_alpha)
71 uint4 info = kernel_tex_fetch(__tex_image_packed_info, id);
75 uint periodic = (info.w & 0x1);
76 uint interpolation = info.w >> 1;
80 if(interpolation == INTERPOLATION_CLOSEST) {
81 svm_image_texture_frac(x*width, &ix);
82 svm_image_texture_frac(y*height, &iy);
85 ix = svm_image_texture_wrap_periodic(ix, width);
86 iy = svm_image_texture_wrap_periodic(iy, height);
89 ix = svm_image_texture_wrap_clamp(ix, width);
90 iy = svm_image_texture_wrap_clamp(iy, height);
93 r = svm_image_texture_read(kg, id, offset + ix + iy*width);
95 else { /* We default to linear interpolation if it is not closest */
96 float tx = svm_image_texture_frac(x*width - 0.5f, &ix);
97 float ty = svm_image_texture_frac(y*height - 0.5f, &iy);
100 ix = svm_image_texture_wrap_periodic(ix, width);
101 iy = svm_image_texture_wrap_periodic(iy, height);
103 nix = svm_image_texture_wrap_periodic(ix+1, width);
104 niy = svm_image_texture_wrap_periodic(iy+1, height);
107 ix = svm_image_texture_wrap_clamp(ix, width);
108 iy = svm_image_texture_wrap_clamp(iy, height);
110 nix = svm_image_texture_wrap_clamp(ix+1, width);
111 niy = svm_image_texture_wrap_clamp(iy+1, height);
115 r = (1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + iy*width);
116 r += (1.0f - ty)*tx*svm_image_texture_read(kg, id, offset + nix + iy*width);
117 r += ty*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + niy*width);
118 r += ty*tx*svm_image_texture_read(kg, id, offset + nix + niy*width);
121 if(use_alpha && r.w != 1.0f && r.w != 0.0f) {
122 float invw = 1.0f/r.w;
127 if(id >= TEX_NUM_FLOAT4_IMAGES) {
128 r.x = min(r.x, 1.0f);
129 r.y = min(r.y, 1.0f);
130 r.z = min(r.z, 1.0f);
135 r.x = color_srgb_to_scene_linear(r.x);
136 r.y = color_srgb_to_scene_linear(r.y);
137 r.z = color_srgb_to_scene_linear(r.z);
145 ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint srgb, uint use_alpha)
147 #ifdef __KERNEL_CPU__
148 # ifdef __KERNEL_SSE2__
150 float4 &r = (float4 &)r_ssef;
151 r = kernel_tex_image_interp(id, x, y);
153 float4 r = kernel_tex_image_interp(id, x, y);
158 # if __CUDA_ARCH__ < 300
159 /* not particularly proud of this massive switch, what are the
161 * - use a single big 1D texture, and do our own lookup/filtering
162 * - group by size and use a 3d texture, performance impact
163 * - group into larger texture with some padding for correct lerp
165 * also note that cuda has a textures limit (128 for Fermi, 256 for Kepler),
166 * and we cannot use all since we still need some for other storage */
169 case 0: r = kernel_tex_image_interp(__tex_image_float4_000, x, y); break;
170 case 1: r = kernel_tex_image_interp(__tex_image_float4_001, x, y); break;
171 case 2: r = kernel_tex_image_interp(__tex_image_float4_002, x, y); break;
172 case 3: r = kernel_tex_image_interp(__tex_image_float4_003, x, y); break;
173 case 4: r = kernel_tex_image_interp(__tex_image_float4_004, x, y); break;
174 case 5: r = kernel_tex_image_interp(__tex_image_byte4_005, x, y); break;
175 case 6: r = kernel_tex_image_interp(__tex_image_byte4_006, x, y); break;
176 case 7: r = kernel_tex_image_interp(__tex_image_byte4_007, x, y); break;
177 case 8: r = kernel_tex_image_interp(__tex_image_byte4_008, x, y); break;
178 case 9: r = kernel_tex_image_interp(__tex_image_byte4_009, x, y); break;
179 case 10: r = kernel_tex_image_interp(__tex_image_byte4_010, x, y); break;
180 case 11: r = kernel_tex_image_interp(__tex_image_byte4_011, x, y); break;
181 case 12: r = kernel_tex_image_interp(__tex_image_byte4_012, x, y); break;
182 case 13: r = kernel_tex_image_interp(__tex_image_byte4_013, x, y); break;
183 case 14: r = kernel_tex_image_interp(__tex_image_byte4_014, x, y); break;
184 case 15: r = kernel_tex_image_interp(__tex_image_byte4_015, x, y); break;
185 case 16: r = kernel_tex_image_interp(__tex_image_byte4_016, x, y); break;
186 case 17: r = kernel_tex_image_interp(__tex_image_byte4_017, x, y); break;
187 case 18: r = kernel_tex_image_interp(__tex_image_byte4_018, x, y); break;
188 case 19: r = kernel_tex_image_interp(__tex_image_byte4_019, x, y); break;
189 case 20: r = kernel_tex_image_interp(__tex_image_byte4_020, x, y); break;
190 case 21: r = kernel_tex_image_interp(__tex_image_byte4_021, x, y); break;
191 case 22: r = kernel_tex_image_interp(__tex_image_byte4_022, x, y); break;
192 case 23: r = kernel_tex_image_interp(__tex_image_byte4_023, x, y); break;
193 case 24: r = kernel_tex_image_interp(__tex_image_byte4_024, x, y); break;
194 case 25: r = kernel_tex_image_interp(__tex_image_byte4_025, x, y); break;
195 case 26: r = kernel_tex_image_interp(__tex_image_byte4_026, x, y); break;
196 case 27: r = kernel_tex_image_interp(__tex_image_byte4_027, x, y); break;
197 case 28: r = kernel_tex_image_interp(__tex_image_byte4_028, x, y); break;
198 case 29: r = kernel_tex_image_interp(__tex_image_byte4_029, x, y); break;
199 case 30: r = kernel_tex_image_interp(__tex_image_byte4_030, x, y); break;
200 case 31: r = kernel_tex_image_interp(__tex_image_byte4_031, x, y); break;
201 case 32: r = kernel_tex_image_interp(__tex_image_byte4_032, x, y); break;
202 case 33: r = kernel_tex_image_interp(__tex_image_byte4_033, x, y); break;
203 case 34: r = kernel_tex_image_interp(__tex_image_byte4_034, x, y); break;
204 case 35: r = kernel_tex_image_interp(__tex_image_byte4_035, x, y); break;
205 case 36: r = kernel_tex_image_interp(__tex_image_byte4_036, x, y); break;
206 case 37: r = kernel_tex_image_interp(__tex_image_byte4_037, x, y); break;
207 case 38: r = kernel_tex_image_interp(__tex_image_byte4_038, x, y); break;
208 case 39: r = kernel_tex_image_interp(__tex_image_byte4_039, x, y); break;
209 case 40: r = kernel_tex_image_interp(__tex_image_byte4_040, x, y); break;
210 case 41: r = kernel_tex_image_interp(__tex_image_byte4_041, x, y); break;
211 case 42: r = kernel_tex_image_interp(__tex_image_byte4_042, x, y); break;
212 case 43: r = kernel_tex_image_interp(__tex_image_byte4_043, x, y); break;
213 case 44: r = kernel_tex_image_interp(__tex_image_byte4_044, x, y); break;
214 case 45: r = kernel_tex_image_interp(__tex_image_byte4_045, x, y); break;
215 case 46: r = kernel_tex_image_interp(__tex_image_byte4_046, x, y); break;
216 case 47: r = kernel_tex_image_interp(__tex_image_byte4_047, x, y); break;
217 case 48: r = kernel_tex_image_interp(__tex_image_byte4_048, x, y); break;
218 case 49: r = kernel_tex_image_interp(__tex_image_byte4_049, x, y); break;
219 case 50: r = kernel_tex_image_interp(__tex_image_byte4_050, x, y); break;
220 case 51: r = kernel_tex_image_interp(__tex_image_byte4_051, x, y); break;
221 case 52: r = kernel_tex_image_interp(__tex_image_byte4_052, x, y); break;
222 case 53: r = kernel_tex_image_interp(__tex_image_byte4_053, x, y); break;
223 case 54: r = kernel_tex_image_interp(__tex_image_byte4_054, x, y); break;
224 case 55: r = kernel_tex_image_interp(__tex_image_byte4_055, x, y); break;
225 case 56: r = kernel_tex_image_interp(__tex_image_byte4_056, x, y); break;
226 case 57: r = kernel_tex_image_interp(__tex_image_byte4_057, x, y); break;
227 case 58: r = kernel_tex_image_interp(__tex_image_byte4_058, x, y); break;
228 case 59: r = kernel_tex_image_interp(__tex_image_byte4_059, x, y); break;
229 case 60: r = kernel_tex_image_interp(__tex_image_byte4_060, x, y); break;
230 case 61: r = kernel_tex_image_interp(__tex_image_byte4_061, x, y); break;
231 case 62: r = kernel_tex_image_interp(__tex_image_byte4_062, x, y); break;
232 case 63: r = kernel_tex_image_interp(__tex_image_byte4_063, x, y); break;
233 case 64: r = kernel_tex_image_interp(__tex_image_byte4_064, x, y); break;
234 case 65: r = kernel_tex_image_interp(__tex_image_byte4_065, x, y); break;
235 case 66: r = kernel_tex_image_interp(__tex_image_byte4_066, x, y); break;
236 case 67: r = kernel_tex_image_interp(__tex_image_byte4_067, x, y); break;
237 case 68: r = kernel_tex_image_interp(__tex_image_byte4_068, x, y); break;
238 case 69: r = kernel_tex_image_interp(__tex_image_byte4_069, x, y); break;
239 case 70: r = kernel_tex_image_interp(__tex_image_byte4_070, x, y); break;
240 case 71: r = kernel_tex_image_interp(__tex_image_byte4_071, x, y); break;
241 case 72: r = kernel_tex_image_interp(__tex_image_byte4_072, x, y); break;
242 case 73: r = kernel_tex_image_interp(__tex_image_byte4_073, x, y); break;
243 case 74: r = kernel_tex_image_interp(__tex_image_byte4_074, x, y); break;
244 case 75: r = kernel_tex_image_interp(__tex_image_byte4_075, x, y); break;
245 case 76: r = kernel_tex_image_interp(__tex_image_byte4_076, x, y); break;
246 case 77: r = kernel_tex_image_interp(__tex_image_byte4_077, x, y); break;
247 case 78: r = kernel_tex_image_interp(__tex_image_byte4_078, x, y); break;
248 case 79: r = kernel_tex_image_interp(__tex_image_byte4_079, x, y); break;
249 case 80: r = kernel_tex_image_interp(__tex_image_byte4_080, x, y); break;
250 case 81: r = kernel_tex_image_interp(__tex_image_byte4_081, x, y); break;
251 case 82: r = kernel_tex_image_interp(__tex_image_byte4_082, x, y); break;
252 case 83: r = kernel_tex_image_interp(__tex_image_byte4_083, x, y); break;
253 case 84: r = kernel_tex_image_interp(__tex_image_byte4_084, x, y); break;
254 case 85: r = kernel_tex_image_interp(__tex_image_byte4_085, x, y); break;
255 case 86: r = kernel_tex_image_interp(__tex_image_byte4_086, x, y); break;
256 case 87: r = kernel_tex_image_interp(__tex_image_byte4_087, x, y); break;
257 case 88: r = kernel_tex_image_interp(__tex_image_byte4_088, x, y); break;
258 case 89: r = kernel_tex_image_interp(__tex_image_byte4_089, x, y); break;
259 case 90: r = kernel_tex_image_interp(__tex_image_byte4_090, x, y); break;
260 case 91: r = kernel_tex_image_interp(__tex_image_byte4_091, x, y); break;
261 case 92: r = kernel_tex_image_interp(__tex_image_byte4_092, x, y); break;
264 return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
267 CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id);
268 if(id < 2048) /* TODO(dingto): Make this a variable */
269 r = kernel_tex_image_interp_float4(tex, x, y);
271 float f = kernel_tex_image_interp_float(tex, x, y);
272 r = make_float4(f, f, f, 1.0);
277 #ifdef __KERNEL_SSE2__
280 if(use_alpha && alpha != 1.0f && alpha != 0.0f) {
281 r_ssef = r_ssef / ssef(alpha);
282 if(id >= TEX_NUM_FLOAT4_IMAGES)
283 r_ssef = min(r_ssef, ssef(1.0f));
288 r_ssef = color_srgb_to_scene_linear(r_ssef);
292 if(use_alpha && r.w != 1.0f && r.w != 0.0f) {
293 float invw = 1.0f/r.w;
298 if(id >= TEX_NUM_FLOAT4_IMAGES) {
299 r.x = min(r.x, 1.0f);
300 r.y = min(r.y, 1.0f);
301 r.z = min(r.z, 1.0f);
306 r.x = color_srgb_to_scene_linear(r.x);
307 r.y = color_srgb_to_scene_linear(r.y);
308 r.z = color_srgb_to_scene_linear(r.z);
317 /* Remap coordnate from 0..1 box to -1..-1 */
318 ccl_device_inline float3 texco_remap_square(float3 co)
320 return (co - make_float3(0.5f, 0.5f, 0.5f)) * 2.0f;
323 ccl_device void svm_node_tex_image(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
326 uint co_offset, out_offset, alpha_offset, srgb;
328 decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &srgb);
330 float3 co = stack_load_float3(stack, co_offset);
332 uint use_alpha = stack_valid(alpha_offset);
333 if(node.w == NODE_IMAGE_PROJ_SPHERE) {
334 co = texco_remap_square(co);
335 tex_co = map_to_sphere(co);
337 else if(node.w == NODE_IMAGE_PROJ_TUBE) {
338 co = texco_remap_square(co);
339 tex_co = map_to_tube(co);
342 tex_co = make_float2(co.x, co.y);
344 float4 f = svm_image_texture(kg, id, tex_co.x, tex_co.y, srgb, use_alpha);
346 if(stack_valid(out_offset))
347 stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z));
348 if(stack_valid(alpha_offset))
349 stack_store_float(stack, alpha_offset, f.w);
352 ccl_device void svm_node_tex_image_box(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
354 /* get object space normal */
355 float3 N = ccl_fetch(sd, N);
357 N = ccl_fetch(sd, N);
358 if(ccl_fetch(sd, object) != OBJECT_NONE)
359 object_inverse_normal_transform(kg, sd, &N);
361 /* project from direction vector to barycentric coordinates in triangles */
366 N /= (N.x + N.y + N.z);
368 /* basic idea is to think of this as a triangle, each corner representing
369 * one of the 3 faces of the cube. in the corners we have single textures,
370 * in between we blend between two textures, and in the middle we a blend
371 * between three textures.
373 * the Nxyz values are the barycentric coordinates in an equilateral
374 * triangle, which in case of blending, in the middle has a smaller
375 * equilateral triangle where 3 textures blend. this divides things into
376 * 7 zones, with an if() test for each zone */
378 float3 weight = make_float3(0.0f, 0.0f, 0.0f);
379 float blend = __int_as_float(node.w);
380 float limit = 0.5f*(1.0f + blend);
382 /* first test for corners with single texture */
383 if(N.x > limit*(N.x + N.y) && N.x > limit*(N.x + N.z)) {
386 else if(N.y > limit*(N.x + N.y) && N.y > limit*(N.y + N.z)) {
389 else if(N.z > limit*(N.x + N.z) && N.z > limit*(N.y + N.z)) {
392 else if(blend > 0.0f) {
393 /* in case of blending, test for mixes between two textures */
394 if(N.z < (1.0f - limit)*(N.y + N.x)) {
395 weight.x = N.x/(N.x + N.y);
396 weight.x = saturate((weight.x - 0.5f*(1.0f - blend))/blend);
397 weight.y = 1.0f - weight.x;
399 else if(N.x < (1.0f - limit)*(N.y + N.z)) {
400 weight.y = N.y/(N.y + N.z);
401 weight.y = saturate((weight.y - 0.5f*(1.0f - blend))/blend);
402 weight.z = 1.0f - weight.y;
404 else if(N.y < (1.0f - limit)*(N.x + N.z)) {
405 weight.x = N.x/(N.x + N.z);
406 weight.x = saturate((weight.x - 0.5f*(1.0f - blend))/blend);
407 weight.z = 1.0f - weight.x;
410 /* last case, we have a mix between three */
411 weight.x = ((2.0f - limit)*N.x + (limit - 1.0f))/(2.0f*limit - 1.0f);
412 weight.y = ((2.0f - limit)*N.y + (limit - 1.0f))/(2.0f*limit - 1.0f);
413 weight.z = ((2.0f - limit)*N.z + (limit - 1.0f))/(2.0f*limit - 1.0f);
417 /* Desperate mode, no valid choice anyway, fallback to one side.*/
421 /* now fetch textures */
422 uint co_offset, out_offset, alpha_offset, srgb;
423 decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &srgb);
425 float3 co = stack_load_float3(stack, co_offset);
428 float4 f = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
429 uint use_alpha = stack_valid(alpha_offset);
432 f += weight.x*svm_image_texture(kg, id, co.y, co.z, srgb, use_alpha);
434 f += weight.y*svm_image_texture(kg, id, co.x, co.z, srgb, use_alpha);
436 f += weight.z*svm_image_texture(kg, id, co.y, co.x, srgb, use_alpha);
438 if(stack_valid(out_offset))
439 stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z));
440 if(stack_valid(alpha_offset))
441 stack_store_float(stack, alpha_offset, f.w);
444 ccl_device void svm_node_tex_environment(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
447 uint co_offset, out_offset, alpha_offset, srgb;
448 uint projection = node.w;
450 decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &srgb);
452 float3 co = stack_load_float3(stack, co_offset);
458 uv = direction_to_equirectangular(co);
460 uv = direction_to_mirrorball(co);
462 uint use_alpha = stack_valid(alpha_offset);
463 float4 f = svm_image_texture(kg, id, uv.x, uv.y, srgb, use_alpha);
465 if(stack_valid(out_offset))
466 stack_store_float3(stack, out_offset, make_float3(f.x, f.y, f.z));
467 if(stack_valid(alpha_offset))
468 stack_store_float(stack, alpha_offset, f.w);