Windows: Add support for building with clang.
[blender.git] / intern / cycles / kernel / kernels / cpu / kernel_cpu_image.h
1 /*
2  * Copyright 2011-2016 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #ifndef __KERNEL_CPU_IMAGE_H__
18 #define __KERNEL_CPU_IMAGE_H__
19
20 CCL_NAMESPACE_BEGIN
21
22 template<typename T> struct TextureInterpolator  {
23 #define SET_CUBIC_SPLINE_WEIGHTS(u, t) \
24         { \
25                 u[0] = (((-1.0f/6.0f)* t + 0.5f) * t - 0.5f) * t + (1.0f/6.0f); \
26                 u[1] =  ((      0.5f * t - 1.0f) * t       ) * t + (2.0f/3.0f); \
27                 u[2] =  ((     -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \
28                 u[3] = (1.0f / 6.0f) * t * t * t; \
29         } (void)0
30
31         static ccl_always_inline float4 read(float4 r)
32         {
33                 return r;
34         }
35
36         static ccl_always_inline float4 read(uchar4 r)
37         {
38                 float f = 1.0f/255.0f;
39                 return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
40         }
41
42         static ccl_always_inline float4 read(uchar r)
43         {
44                 float f = r*(1.0f/255.0f);
45                 return make_float4(f, f, f, 1.0f);
46         }
47
48         static ccl_always_inline float4 read(float r)
49         {
50                 /* TODO(dingto): Optimize this, so interpolation
51                  * happens on float instead of float4 */
52                 return make_float4(r, r, r, 1.0f);
53         }
54
55         static ccl_always_inline float4 read(half4 r)
56         {
57                 return half4_to_float4(r);
58         }
59
60         static ccl_always_inline float4 read(half r)
61         {
62                 float f = half_to_float(r);
63                 return make_float4(f, f, f, 1.0f);
64         }
65
66         static ccl_always_inline float4 read(const T *data,
67                                              int x, int y,
68                                              int width, int height)
69         {
70                 if(x < 0 || y < 0 || x >= width || y >= height) {
71                         return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
72                 }
73                 return read(data[y * width + x]);
74         }
75
76         static ccl_always_inline int wrap_periodic(int x, int width)
77         {
78                 x %= width;
79                 if(x < 0)
80                         x += width;
81                 return x;
82         }
83
84         static ccl_always_inline int wrap_clamp(int x, int width)
85         {
86                 return clamp(x, 0, width-1);
87         }
88
89         static ccl_always_inline float frac(float x, int *ix)
90         {
91                 int i = float_to_int(x) - ((x < 0.0f)? 1: 0);
92                 *ix = i;
93                 return x - (float)i;
94         }
95
96         /* ********  2D interpolation ******** */
97
98         static ccl_always_inline float4 interp_closest(const TextureInfo& info,
99                                                        float x, float y)
100         {
101                 const T *data = (const T*)info.data;
102                 const int width = info.width;
103                 const int height = info.height;
104                 int ix, iy;
105                 frac(x*(float)width, &ix);
106                 frac(y*(float)height, &iy);
107                 switch(info.extension) {
108                         case EXTENSION_REPEAT:
109                                 ix = wrap_periodic(ix, width);
110                                 iy = wrap_periodic(iy, height);
111                                 break;
112                         case EXTENSION_CLIP:
113                                 if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
114                                         return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
115                                 }
116                                 ATTR_FALLTHROUGH;
117                         case EXTENSION_EXTEND:
118                                 ix = wrap_clamp(ix, width);
119                                 iy = wrap_clamp(iy, height);
120                                 break;
121                         default:
122                                 kernel_assert(0);
123                                 return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
124                 }
125                 return read(data[ix + iy*width]);
126         }
127
128         static ccl_always_inline float4 interp_linear(const TextureInfo& info,
129                                                       float x, float y)
130         {
131                 const T *data = (const T*)info.data;
132                 const int width = info.width;
133                 const int height = info.height;
134                 int ix, iy, nix, niy;
135                 const float tx = frac(x*(float)width - 0.5f, &ix);
136                 const float ty = frac(y*(float)height - 0.5f, &iy);
137                 switch(info.extension) {
138                         case EXTENSION_REPEAT:
139                                 ix = wrap_periodic(ix, width);
140                                 iy = wrap_periodic(iy, height);
141                                 nix = wrap_periodic(ix+1, width);
142                                 niy = wrap_periodic(iy+1, height);
143                                 break;
144                         case EXTENSION_CLIP:
145                                 nix = ix + 1;
146                                 niy = iy + 1;
147                                 break;
148                         case EXTENSION_EXTEND:
149                                 nix = wrap_clamp(ix+1, width);
150                                 niy = wrap_clamp(iy+1, height);
151                                 ix = wrap_clamp(ix, width);
152                                 iy = wrap_clamp(iy, height);
153                                 break;
154                         default:
155                                 kernel_assert(0);
156                                 return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
157                 }
158                 return (1.0f - ty) * (1.0f - tx) * read(data, ix, iy, width, height) +
159                        (1.0f - ty) * tx * read(data, nix, iy, width, height) +
160                        ty * (1.0f - tx) * read(data, ix, niy, width, height) +
161                        ty * tx * read(data, nix, niy, width, height);
162         }
163
164         static ccl_always_inline float4 interp_cubic(const TextureInfo& info,
165                                                      float x, float y)
166         {
167                 const T *data = (const T*)info.data;
168                 const int width = info.width;
169                 const int height = info.height;
170                 int ix, iy, nix, niy;
171                 const float tx = frac(x*(float)width - 0.5f, &ix);
172                 const float ty = frac(y*(float)height - 0.5f, &iy);
173                 int pix, piy, nnix, nniy;
174                 switch(info.extension) {
175                         case EXTENSION_REPEAT:
176                                 ix = wrap_periodic(ix, width);
177                                 iy = wrap_periodic(iy, height);
178                                 pix = wrap_periodic(ix-1, width);
179                                 piy = wrap_periodic(iy-1, height);
180                                 nix = wrap_periodic(ix+1, width);
181                                 niy = wrap_periodic(iy+1, height);
182                                 nnix = wrap_periodic(ix+2, width);
183                                 nniy = wrap_periodic(iy+2, height);
184                                 break;
185                         case EXTENSION_CLIP:
186                                 pix = ix - 1;
187                                 piy = iy - 1;
188                                 nix = ix + 1;
189                                 niy = iy + 1;
190                                 nnix = ix + 2;
191                                 nniy = iy + 2;
192                                 break;
193                         case EXTENSION_EXTEND:
194                                 pix = wrap_clamp(ix-1, width);
195                                 piy = wrap_clamp(iy-1, height);
196                                 nix = wrap_clamp(ix+1, width);
197                                 niy = wrap_clamp(iy+1, height);
198                                 nnix = wrap_clamp(ix+2, width);
199                                 nniy = wrap_clamp(iy+2, height);
200                                 ix = wrap_clamp(ix, width);
201                                 iy = wrap_clamp(iy, height);
202                                 break;
203                         default:
204                                 kernel_assert(0);
205                                 return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
206                 }
207                 const int xc[4] = {pix, ix, nix, nnix};
208                 const int yc[4] = {piy, iy, niy, nniy};
209                 float u[4], v[4];
210                 /* Some helper macro to keep code reasonable size,
211                  * let compiler to inline all the matrix multiplications.
212                  */
213 #define DATA(x, y) (read(data, xc[x], yc[y], width, height))
214 #define TERM(col) \
215                 (v[col] * (u[0] * DATA(0, col) + \
216                            u[1] * DATA(1, col) + \
217                            u[2] * DATA(2, col) + \
218                            u[3] * DATA(3, col)))
219
220                 SET_CUBIC_SPLINE_WEIGHTS(u, tx);
221                 SET_CUBIC_SPLINE_WEIGHTS(v, ty);
222
223                 /* Actual interpolation. */
224                 return TERM(0) + TERM(1) + TERM(2) + TERM(3);
225 #undef TERM
226 #undef DATA
227         }
228
229         static ccl_always_inline float4 interp(const TextureInfo& info,
230                                                float x, float y)
231         {
232                 if(UNLIKELY(!info.data)) {
233                         return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
234                 }
235                 switch(info.interpolation) {
236                         case INTERPOLATION_CLOSEST:
237                                 return interp_closest(info, x, y);
238                         case INTERPOLATION_LINEAR:
239                                 return interp_linear(info, x, y);
240                         default:
241                                 return interp_cubic(info, x, y);
242                 }
243         }
244
245         /* ********  3D interpolation ******** */
246
247         static ccl_always_inline float4 interp_3d_closest(const TextureInfo& info,
248                                                           float x, float y, float z)
249         {
250                 int width = info.width;
251                 int height = info.height;
252                 int depth = info.depth;
253                 int ix, iy, iz;
254
255                 frac(x*(float)width, &ix);
256                 frac(y*(float)height, &iy);
257                 frac(z*(float)depth, &iz);
258
259                 switch(info.extension) {
260                         case EXTENSION_REPEAT:
261                                 ix = wrap_periodic(ix, width);
262                                 iy = wrap_periodic(iy, height);
263                                 iz = wrap_periodic(iz, depth);
264                                 break;
265                         case EXTENSION_CLIP:
266                                 if(x < 0.0f || y < 0.0f || z < 0.0f ||
267                                    x > 1.0f || y > 1.0f || z > 1.0f)
268                                 {
269                                         return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
270                                 }
271                                 ATTR_FALLTHROUGH;
272                         case EXTENSION_EXTEND:
273                                 ix = wrap_clamp(ix, width);
274                                 iy = wrap_clamp(iy, height);
275                                 iz = wrap_clamp(iz, depth);
276                                 break;
277                         default:
278                                 kernel_assert(0);
279                                 return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
280                 }
281
282                 const T *data = (const T*)info.data;
283                 return read(data[ix + iy*width + iz*width*height]);
284         }
285
286         static ccl_always_inline float4 interp_3d_linear(const TextureInfo& info,
287                                                          float x, float y, float z)
288         {
289                 int width = info.width;
290                 int height = info.height;
291                 int depth = info.depth;
292                 int ix, iy, iz;
293                 int nix, niy, niz;
294
295                 float tx = frac(x*(float)width - 0.5f, &ix);
296                 float ty = frac(y*(float)height - 0.5f, &iy);
297                 float tz = frac(z*(float)depth - 0.5f, &iz);
298
299                 switch(info.extension) {
300                         case EXTENSION_REPEAT:
301                                 ix = wrap_periodic(ix, width);
302                                 iy = wrap_periodic(iy, height);
303                                 iz = wrap_periodic(iz, depth);
304
305                                 nix = wrap_periodic(ix+1, width);
306                                 niy = wrap_periodic(iy+1, height);
307                                 niz = wrap_periodic(iz+1, depth);
308                                 break;
309                         case EXTENSION_CLIP:
310                                 if(x < 0.0f || y < 0.0f || z < 0.0f ||
311                                    x > 1.0f || y > 1.0f || z > 1.0f)
312                                 {
313                                         return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
314                                 }
315                                 ATTR_FALLTHROUGH;
316                         case EXTENSION_EXTEND:
317                                 nix = wrap_clamp(ix+1, width);
318                                 niy = wrap_clamp(iy+1, height);
319                                 niz = wrap_clamp(iz+1, depth);
320
321                                 ix = wrap_clamp(ix, width);
322                                 iy = wrap_clamp(iy, height);
323                                 iz = wrap_clamp(iz, depth);
324                                 break;
325                         default:
326                                 kernel_assert(0);
327                                 return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
328                 }
329
330                 const T *data = (const T*)info.data;
331                 float4 r;
332
333                 r  = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + iz*width*height]);
334                 r += (1.0f - tz)*(1.0f - ty)*tx*read(data[nix + iy*width + iz*width*height]);
335                 r += (1.0f - tz)*ty*(1.0f - tx)*read(data[ix + niy*width + iz*width*height]);
336                 r += (1.0f - tz)*ty*tx*read(data[nix + niy*width + iz*width*height]);
337
338                 r += tz*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + niz*width*height]);
339                 r += tz*(1.0f - ty)*tx*read(data[nix + iy*width + niz*width*height]);
340                 r += tz*ty*(1.0f - tx)*read(data[ix + niy*width + niz*width*height]);
341                 r += tz*ty*tx*read(data[nix + niy*width + niz*width*height]);
342
343                 return r;
344         }
345
346         /* TODO(sergey): For some unspeakable reason both GCC-6 and Clang-3.9 are
347          * causing stack overflow issue in this function unless it is inlined.
348          *
349          * Only happens for AVX2 kernel and global __KERNEL_SSE__ vectorization
350          * enabled.
351          */
352 #if defined(__GNUC__) || defined(__clang__)
353         static ccl_always_inline
354 #else
355         static ccl_never_inline
356 #endif
357         float4 interp_3d_tricubic(const TextureInfo& info, float x, float y, float z)
358         {
359                 int width = info.width;
360                 int height = info.height;
361                 int depth = info.depth;
362                 int ix, iy, iz;
363                 int nix, niy, niz;
364                 /* Tricubic b-spline interpolation. */
365                 const float tx = frac(x*(float)width - 0.5f, &ix);
366                 const float ty = frac(y*(float)height - 0.5f, &iy);
367                 const float tz = frac(z*(float)depth - 0.5f, &iz);
368                 int pix, piy, piz, nnix, nniy, nniz;
369
370                 switch(info.extension) {
371                         case EXTENSION_REPEAT:
372                                 ix = wrap_periodic(ix, width);
373                                 iy = wrap_periodic(iy, height);
374                                 iz = wrap_periodic(iz, depth);
375
376                                 pix = wrap_periodic(ix-1, width);
377                                 piy = wrap_periodic(iy-1, height);
378                                 piz = wrap_periodic(iz-1, depth);
379
380                                 nix = wrap_periodic(ix+1, width);
381                                 niy = wrap_periodic(iy+1, height);
382                                 niz = wrap_periodic(iz+1, depth);
383
384                                 nnix = wrap_periodic(ix+2, width);
385                                 nniy = wrap_periodic(iy+2, height);
386                                 nniz = wrap_periodic(iz+2, depth);
387                                 break;
388                         case EXTENSION_CLIP:
389                                 if(x < 0.0f || y < 0.0f || z < 0.0f ||
390                                    x > 1.0f || y > 1.0f || z > 1.0f)
391                                 {
392                                         return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
393                                 }
394                                 ATTR_FALLTHROUGH;
395                         case EXTENSION_EXTEND:
396                                 pix = wrap_clamp(ix-1, width);
397                                 piy = wrap_clamp(iy-1, height);
398                                 piz = wrap_clamp(iz-1, depth);
399
400                                 nix = wrap_clamp(ix+1, width);
401                                 niy = wrap_clamp(iy+1, height);
402                                 niz = wrap_clamp(iz+1, depth);
403
404                                 nnix = wrap_clamp(ix+2, width);
405                                 nniy = wrap_clamp(iy+2, height);
406                                 nniz = wrap_clamp(iz+2, depth);
407
408                                 ix = wrap_clamp(ix, width);
409                                 iy = wrap_clamp(iy, height);
410                                 iz = wrap_clamp(iz, depth);
411                                 break;
412                         default:
413                                 kernel_assert(0);
414                                 return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
415                 }
416
417                 const int xc[4] = {pix, ix, nix, nnix};
418                 const int yc[4] = {width * piy,
419                                    width * iy,
420                                    width * niy,
421                                    width * nniy};
422                 const int zc[4] = {width * height * piz,
423                                    width * height * iz,
424                                    width * height * niz,
425                                    width * height * nniz};
426                 float u[4], v[4], w[4];
427
428                 /* Some helper macro to keep code reasonable size,
429                  * let compiler to inline all the matrix multiplications.
430                  */
431 #define DATA(x, y, z) (read(data[xc[x] + yc[y] + zc[z]]))
432 #define COL_TERM(col, row) \
433                 (v[col] * (u[0] * DATA(0, col, row) + \
434                            u[1] * DATA(1, col, row) + \
435                            u[2] * DATA(2, col, row) + \
436                            u[3] * DATA(3, col, row)))
437 #define ROW_TERM(row) \
438                 (w[row] * (COL_TERM(0, row) + \
439                            COL_TERM(1, row) + \
440                            COL_TERM(2, row) + \
441                            COL_TERM(3, row)))
442
443                 SET_CUBIC_SPLINE_WEIGHTS(u, tx);
444                 SET_CUBIC_SPLINE_WEIGHTS(v, ty);
445                 SET_CUBIC_SPLINE_WEIGHTS(w, tz);
446
447                 /* Actual interpolation. */
448                 const T *data = (const T*)info.data;
449                 return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
450
451 #undef COL_TERM
452 #undef ROW_TERM
453 #undef DATA
454         }
455
456         static ccl_always_inline float4 interp_3d(const TextureInfo& info,
457                                                   float x, float y, float z,
458                                                   InterpolationType interp)
459         {
460                 if(UNLIKELY(!info.data))
461                         return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
462
463                 switch((interp == INTERPOLATION_NONE)? info.interpolation: interp) {
464                         case INTERPOLATION_CLOSEST:
465                                 return interp_3d_closest(info, x, y, z);
466                         case INTERPOLATION_LINEAR:
467                                 return interp_3d_linear(info, x, y, z);
468                         default:
469                                 return interp_3d_tricubic(info, x, y, z);
470                 }
471         }
472 #undef SET_CUBIC_SPLINE_WEIGHTS
473 };
474
475 ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
476 {
477         const TextureInfo& info = kernel_tex_fetch(__texture_info, id);
478
479         switch(kernel_tex_type(id)) {
480                 case IMAGE_DATA_TYPE_HALF:
481                         return TextureInterpolator<half>::interp(info, x, y);
482                 case IMAGE_DATA_TYPE_BYTE:
483                         return TextureInterpolator<uchar>::interp(info, x, y);
484                 case IMAGE_DATA_TYPE_FLOAT:
485                         return TextureInterpolator<float>::interp(info, x, y);
486                 case IMAGE_DATA_TYPE_HALF4:
487                         return TextureInterpolator<half4>::interp(info, x, y);
488                 case IMAGE_DATA_TYPE_BYTE4:
489                         return TextureInterpolator<uchar4>::interp(info, x, y);
490                 case IMAGE_DATA_TYPE_FLOAT4:
491                 default:
492                         return TextureInterpolator<float4>::interp(info, x, y);
493         }
494 }
495
496 ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp)
497 {
498         const TextureInfo& info = kernel_tex_fetch(__texture_info, id);
499
500         switch(kernel_tex_type(id)) {
501                 case IMAGE_DATA_TYPE_HALF:
502                         return TextureInterpolator<half>::interp_3d(info, x, y, z, interp);
503                 case IMAGE_DATA_TYPE_BYTE:
504                         return TextureInterpolator<uchar>::interp_3d(info, x, y, z, interp);
505                 case IMAGE_DATA_TYPE_FLOAT:
506                         return TextureInterpolator<float>::interp_3d(info, x, y, z, interp);
507                 case IMAGE_DATA_TYPE_HALF4:
508                         return TextureInterpolator<half4>::interp_3d(info, x, y, z, interp);
509                 case IMAGE_DATA_TYPE_BYTE4:
510                         return TextureInterpolator<uchar4>::interp_3d(info, x, y, z, interp);
511                 case IMAGE_DATA_TYPE_FLOAT4:
512                 default:
513                         return TextureInterpolator<float4>::interp_3d(info, x, y, z, interp);
514         }
515 }
516
517 CCL_NAMESPACE_END
518
519 #endif // __KERNEL_CPU_IMAGE_H__