Cycles: Cleanup, split 2D interpolation function
[blender.git] / intern / cycles / kernel / kernels / cpu / kernel_cpu_image.h
1 /*
2  * Copyright 2011-2016 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #ifndef __KERNEL_CPU_IMAGE_H__
18 #define __KERNEL_CPU_IMAGE_H__
19
20 CCL_NAMESPACE_BEGIN
21
22 template<typename T> struct TextureInterpolator  {
23 #define SET_CUBIC_SPLINE_WEIGHTS(u, t) \
24         { \
25                 u[0] = (((-1.0f/6.0f)* t + 0.5f) * t - 0.5f) * t + (1.0f/6.0f); \
26                 u[1] =  ((      0.5f * t - 1.0f) * t       ) * t + (2.0f/3.0f); \
27                 u[2] =  ((     -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \
28                 u[3] = (1.0f / 6.0f) * t * t * t; \
29         } (void)0
30
31         static ccl_always_inline float4 read(float4 r)
32         {
33                 return r;
34         }
35
36         static ccl_always_inline float4 read(uchar4 r)
37         {
38                 float f = 1.0f/255.0f;
39                 return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
40         }
41
42         static ccl_always_inline float4 read(uchar r)
43         {
44                 float f = r*(1.0f/255.0f);
45                 return make_float4(f, f, f, 1.0f);
46         }
47
48         static ccl_always_inline float4 read(float r)
49         {
50                 /* TODO(dingto): Optimize this, so interpolation
51                  * happens on float instead of float4 */
52                 return make_float4(r, r, r, 1.0f);
53         }
54
55         static ccl_always_inline float4 read(half4 r)
56         {
57                 return half4_to_float4(r);
58         }
59
60         static ccl_always_inline float4 read(half r)
61         {
62                 float f = half_to_float(r);
63                 return make_float4(f, f, f, 1.0f);
64         }
65
66         static ccl_always_inline int wrap_periodic(int x, int width)
67         {
68                 x %= width;
69                 if(x < 0)
70                         x += width;
71                 return x;
72         }
73
74         static ccl_always_inline int wrap_clamp(int x, int width)
75         {
76                 return clamp(x, 0, width-1);
77         }
78
79         static ccl_always_inline float frac(float x, int *ix)
80         {
81                 int i = float_to_int(x) - ((x < 0.0f)? 1: 0);
82                 *ix = i;
83                 return x - (float)i;
84         }
85
86         /* ********  2D interpolation ******** */
87
88         static ccl_always_inline float4 interp_closest(const TextureInfo& info,
89                                                        float x, float y)
90         {
91                 const T *data = (const T*)info.data;
92                 const int width = info.width;
93                 const int height = info.height;
94                 int ix, iy;
95                 frac(x*(float)width, &ix);
96                 frac(y*(float)height, &iy);
97                 switch(info.extension) {
98                         case EXTENSION_REPEAT:
99                                 ix = wrap_periodic(ix, width);
100                                 iy = wrap_periodic(iy, height);
101                                 break;
102                         case EXTENSION_CLIP:
103                                 if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
104                                         return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
105                                 }
106                                 ATTR_FALLTHROUGH;
107                         case EXTENSION_EXTEND:
108                                 ix = wrap_clamp(ix, width);
109                                 iy = wrap_clamp(iy, height);
110                                 break;
111                         default:
112                                 kernel_assert(0);
113                                 return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
114                 }
115                 return read(data[ix + iy*width]);
116         }
117
118         static ccl_always_inline float4 interp_linear(const TextureInfo& info,
119                                                       float x, float y)
120         {
121                 const T *data = (const T*)info.data;
122                 const int width = info.width;
123                 const int height = info.height;
124                 int ix, iy, nix, niy;
125                 const float tx = frac(x*(float)width - 0.5f, &ix);
126                 const float ty = frac(y*(float)height - 0.5f, &iy);
127                 switch(info.extension) {
128                         case EXTENSION_REPEAT:
129                                 ix = wrap_periodic(ix, width);
130                                 iy = wrap_periodic(iy, height);
131                                 nix = wrap_periodic(ix+1, width);
132                                 niy = wrap_periodic(iy+1, height);
133                                 break;
134                         case EXTENSION_CLIP:
135                                 if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
136                                         return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
137                                 }
138                                 ATTR_FALLTHROUGH;
139                         case EXTENSION_EXTEND:
140                                 nix = wrap_clamp(ix+1, width);
141                                 niy = wrap_clamp(iy+1, height);
142                                 ix = wrap_clamp(ix, width);
143                                 iy = wrap_clamp(iy, height);
144                                 break;
145                         default:
146                                 kernel_assert(0);
147                                 return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
148                 }
149                 float4 r = (1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width]);
150                 r += (1.0f - ty)*tx*read(data[nix + iy*width]);
151                 r += ty*(1.0f - tx)*read(data[ix + niy*width]);
152                 r += ty*tx*read(data[nix + niy*width]);
153                 return r;
154         }
155
156         static ccl_always_inline float4 interp_cubic(const TextureInfo& info,
157                                                      float x, float y)
158         {
159                 const T *data = (const T*)info.data;
160                 const int width = info.width;
161                 const int height = info.height;
162                 int ix, iy, nix, niy;
163                 const float tx = frac(x*(float)width - 0.5f, &ix);
164                 const float ty = frac(y*(float)height - 0.5f, &iy);
165                 int pix, piy, nnix, nniy;
166                 switch(info.extension) {
167                         case EXTENSION_REPEAT:
168                                 ix = wrap_periodic(ix, width);
169                                 iy = wrap_periodic(iy, height);
170                                 pix = wrap_periodic(ix-1, width);
171                                 piy = wrap_periodic(iy-1, height);
172                                 nix = wrap_periodic(ix+1, width);
173                                 niy = wrap_periodic(iy+1, height);
174                                 nnix = wrap_periodic(ix+2, width);
175                                 nniy = wrap_periodic(iy+2, height);
176                                 break;
177                         case EXTENSION_CLIP:
178                                 if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
179                                         return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
180                                 }
181                                 ATTR_FALLTHROUGH;
182                         case EXTENSION_EXTEND:
183                                 pix = wrap_clamp(ix-1, width);
184                                 piy = wrap_clamp(iy-1, height);
185                                 nix = wrap_clamp(ix+1, width);
186                                 niy = wrap_clamp(iy+1, height);
187                                 nnix = wrap_clamp(ix+2, width);
188                                 nniy = wrap_clamp(iy+2, height);
189                                 ix = wrap_clamp(ix, width);
190                                 iy = wrap_clamp(iy, height);
191                                 break;
192                         default:
193                                 kernel_assert(0);
194                                 return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
195                 }
196                 const int xc[4] = {pix, ix, nix, nnix};
197                 const int yc[4] = {width * piy,
198                                    width * iy,
199                                    width * niy,
200                                    width * nniy};
201                 float u[4], v[4];
202                 /* Some helper macro to keep code reasonable size,
203                  * let compiler to inline all the matrix multiplications.
204                  */
205 #define DATA(x, y) (read(data[xc[x] + yc[y]]))
206 #define TERM(col) \
207                 (v[col] * (u[0] * DATA(0, col) + \
208                            u[1] * DATA(1, col) + \
209                            u[2] * DATA(2, col) + \
210                            u[3] * DATA(3, col)))
211
212                 SET_CUBIC_SPLINE_WEIGHTS(u, tx);
213                 SET_CUBIC_SPLINE_WEIGHTS(v, ty);
214
215                 /* Actual interpolation. */
216                 return TERM(0) + TERM(1) + TERM(2) + TERM(3);
217 #undef TERM
218 #undef DATA
219         }
220
221         static ccl_always_inline float4 interp(const TextureInfo& info, float x, float y)
222         {
223                 if(UNLIKELY(!info.data)) {
224                         return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
225                 }
226                 switch(info.interpolation) {
227                         case INTERPOLATION_CLOSEST:
228                                 return interp_closest(info, x, y);
229                         case INTERPOLATION_LINEAR:
230                                 return interp_linear(info, x, y);
231                         default:
232                                 return interp_cubic(info, x, y);
233                 }
234         }
235
236         /* ********  3D interpolation ******** */
237
238         static ccl_always_inline float4 interp_3d_closest(const TextureInfo& info,
239                                                           float x, float y, float z)
240         {
241                 int width = info.width;
242                 int height = info.height;
243                 int depth = info.depth;
244                 int ix, iy, iz;
245
246                 frac(x*(float)width, &ix);
247                 frac(y*(float)height, &iy);
248                 frac(z*(float)depth, &iz);
249
250                 switch(info.extension) {
251                         case EXTENSION_REPEAT:
252                                 ix = wrap_periodic(ix, width);
253                                 iy = wrap_periodic(iy, height);
254                                 iz = wrap_periodic(iz, depth);
255                                 break;
256                         case EXTENSION_CLIP:
257                                 if(x < 0.0f || y < 0.0f || z < 0.0f ||
258                                    x > 1.0f || y > 1.0f || z > 1.0f)
259                                 {
260                                         return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
261                                 }
262                                 ATTR_FALLTHROUGH;
263                         case EXTENSION_EXTEND:
264                                 ix = wrap_clamp(ix, width);
265                                 iy = wrap_clamp(iy, height);
266                                 iz = wrap_clamp(iz, depth);
267                                 break;
268                         default:
269                                 kernel_assert(0);
270                                 return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
271                 }
272
273                 const T *data = (const T*)info.data;
274                 return read(data[ix + iy*width + iz*width*height]);
275         }
276
277         static ccl_always_inline float4 interp_3d_linear(const TextureInfo& info,
278                                                          float x, float y, float z)
279         {
280                 int width = info.width;
281                 int height = info.height;
282                 int depth = info.depth;
283                 int ix, iy, iz;
284                 int nix, niy, niz;
285
286                 float tx = frac(x*(float)width - 0.5f, &ix);
287                 float ty = frac(y*(float)height - 0.5f, &iy);
288                 float tz = frac(z*(float)depth - 0.5f, &iz);
289
290                 switch(info.extension) {
291                         case EXTENSION_REPEAT:
292                                 ix = wrap_periodic(ix, width);
293                                 iy = wrap_periodic(iy, height);
294                                 iz = wrap_periodic(iz, depth);
295
296                                 nix = wrap_periodic(ix+1, width);
297                                 niy = wrap_periodic(iy+1, height);
298                                 niz = wrap_periodic(iz+1, depth);
299                                 break;
300                         case EXTENSION_CLIP:
301                                 if(x < 0.0f || y < 0.0f || z < 0.0f ||
302                                    x > 1.0f || y > 1.0f || z > 1.0f)
303                                 {
304                                         return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
305                                 }
306                                 ATTR_FALLTHROUGH;
307                         case EXTENSION_EXTEND:
308                                 nix = wrap_clamp(ix+1, width);
309                                 niy = wrap_clamp(iy+1, height);
310                                 niz = wrap_clamp(iz+1, depth);
311
312                                 ix = wrap_clamp(ix, width);
313                                 iy = wrap_clamp(iy, height);
314                                 iz = wrap_clamp(iz, depth);
315                                 break;
316                         default:
317                                 kernel_assert(0);
318                                 return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
319                 }
320
321                 const T *data = (const T*)info.data;
322                 float4 r;
323
324                 r  = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + iz*width*height]);
325                 r += (1.0f - tz)*(1.0f - ty)*tx*read(data[nix + iy*width + iz*width*height]);
326                 r += (1.0f - tz)*ty*(1.0f - tx)*read(data[ix + niy*width + iz*width*height]);
327                 r += (1.0f - tz)*ty*tx*read(data[nix + niy*width + iz*width*height]);
328
329                 r += tz*(1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width + niz*width*height]);
330                 r += tz*(1.0f - ty)*tx*read(data[nix + iy*width + niz*width*height]);
331                 r += tz*ty*(1.0f - tx)*read(data[ix + niy*width + niz*width*height]);
332                 r += tz*ty*tx*read(data[nix + niy*width + niz*width*height]);
333
334                 return r;
335         }
336
337         /* TODO(sergey): For some unspeakable reason both GCC-6 and Clang-3.9 are
338          * causing stack overflow issue in this function unless it is inlined.
339          *
340          * Only happens for AVX2 kernel and global __KERNEL_SSE__ vectorization
341          * enabled.
342          */
343 #ifdef __GNUC__
344         static ccl_always_inline
345 #else
346         static ccl_never_inline
347 #endif
348         float4 interp_3d_tricubic(const TextureInfo& info, float x, float y, float z)
349         {
350                 int width = info.width;
351                 int height = info.height;
352                 int depth = info.depth;
353                 int ix, iy, iz;
354                 int nix, niy, niz;
355                 /* Tricubic b-spline interpolation. */
356                 const float tx = frac(x*(float)width - 0.5f, &ix);
357                 const float ty = frac(y*(float)height - 0.5f, &iy);
358                 const float tz = frac(z*(float)depth - 0.5f, &iz);
359                 int pix, piy, piz, nnix, nniy, nniz;
360
361                 switch(info.extension) {
362                         case EXTENSION_REPEAT:
363                                 ix = wrap_periodic(ix, width);
364                                 iy = wrap_periodic(iy, height);
365                                 iz = wrap_periodic(iz, depth);
366
367                                 pix = wrap_periodic(ix-1, width);
368                                 piy = wrap_periodic(iy-1, height);
369                                 piz = wrap_periodic(iz-1, depth);
370
371                                 nix = wrap_periodic(ix+1, width);
372                                 niy = wrap_periodic(iy+1, height);
373                                 niz = wrap_periodic(iz+1, depth);
374
375                                 nnix = wrap_periodic(ix+2, width);
376                                 nniy = wrap_periodic(iy+2, height);
377                                 nniz = wrap_periodic(iz+2, depth);
378                                 break;
379                         case EXTENSION_CLIP:
380                                 if(x < 0.0f || y < 0.0f || z < 0.0f ||
381                                    x > 1.0f || y > 1.0f || z > 1.0f)
382                                 {
383                                         return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
384                                 }
385                                 ATTR_FALLTHROUGH;
386                         case EXTENSION_EXTEND:
387                                 pix = wrap_clamp(ix-1, width);
388                                 piy = wrap_clamp(iy-1, height);
389                                 piz = wrap_clamp(iz-1, depth);
390
391                                 nix = wrap_clamp(ix+1, width);
392                                 niy = wrap_clamp(iy+1, height);
393                                 niz = wrap_clamp(iz+1, depth);
394
395                                 nnix = wrap_clamp(ix+2, width);
396                                 nniy = wrap_clamp(iy+2, height);
397                                 nniz = wrap_clamp(iz+2, depth);
398
399                                 ix = wrap_clamp(ix, width);
400                                 iy = wrap_clamp(iy, height);
401                                 iz = wrap_clamp(iz, depth);
402                                 break;
403                         default:
404                                 kernel_assert(0);
405                                 return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
406                 }
407
408                 const int xc[4] = {pix, ix, nix, nnix};
409                 const int yc[4] = {width * piy,
410                                    width * iy,
411                                    width * niy,
412                                    width * nniy};
413                 const int zc[4] = {width * height * piz,
414                                    width * height * iz,
415                                    width * height * niz,
416                                    width * height * nniz};
417                 float u[4], v[4], w[4];
418
419                 /* Some helper macro to keep code reasonable size,
420                  * let compiler to inline all the matrix multiplications.
421                  */
422 #define DATA(x, y, z) (read(data[xc[x] + yc[y] + zc[z]]))
423 #define COL_TERM(col, row) \
424                 (v[col] * (u[0] * DATA(0, col, row) + \
425                            u[1] * DATA(1, col, row) + \
426                            u[2] * DATA(2, col, row) + \
427                            u[3] * DATA(3, col, row)))
428 #define ROW_TERM(row) \
429                 (w[row] * (COL_TERM(0, row) + \
430                            COL_TERM(1, row) + \
431                            COL_TERM(2, row) + \
432                            COL_TERM(3, row)))
433
434                 SET_CUBIC_SPLINE_WEIGHTS(u, tx);
435                 SET_CUBIC_SPLINE_WEIGHTS(v, ty);
436                 SET_CUBIC_SPLINE_WEIGHTS(w, tz);
437
438                 /* Actual interpolation. */
439                 const T *data = (const T*)info.data;
440                 return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
441
442 #undef COL_TERM
443 #undef ROW_TERM
444 #undef DATA
445         }
446
447         static ccl_always_inline float4 interp_3d(const TextureInfo& info,
448                                                   float x, float y, float z,
449                                                   InterpolationType interp)
450         {
451                 if(UNLIKELY(!info.data))
452                         return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
453
454                 switch((interp == INTERPOLATION_NONE)? info.interpolation: interp) {
455                         case INTERPOLATION_CLOSEST:
456                                 return interp_3d_closest(info, x, y, z);
457                         case INTERPOLATION_LINEAR:
458                                 return interp_3d_linear(info, x, y, z);
459                         default:
460                                 return interp_3d_tricubic(info, x, y, z);
461                 }
462         }
463 #undef SET_CUBIC_SPLINE_WEIGHTS
464 };
465
466 ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
467 {
468         const TextureInfo& info = kernel_tex_fetch(__texture_info, id);
469
470         switch(kernel_tex_type(id)) {
471                 case IMAGE_DATA_TYPE_HALF:
472                         return TextureInterpolator<half>::interp(info, x, y);
473                 case IMAGE_DATA_TYPE_BYTE:
474                         return TextureInterpolator<uchar>::interp(info, x, y);
475                 case IMAGE_DATA_TYPE_FLOAT:
476                         return TextureInterpolator<float>::interp(info, x, y);
477                 case IMAGE_DATA_TYPE_HALF4:
478                         return TextureInterpolator<half4>::interp(info, x, y);
479                 case IMAGE_DATA_TYPE_BYTE4:
480                         return TextureInterpolator<uchar4>::interp(info, x, y);
481                 case IMAGE_DATA_TYPE_FLOAT4:
482                 default:
483                         return TextureInterpolator<float4>::interp(info, x, y);
484         }
485 }
486
487 ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, InterpolationType interp)
488 {
489         const TextureInfo& info = kernel_tex_fetch(__texture_info, id);
490
491         switch(kernel_tex_type(id)) {
492                 case IMAGE_DATA_TYPE_HALF:
493                         return TextureInterpolator<half>::interp_3d(info, x, y, z, interp);
494                 case IMAGE_DATA_TYPE_BYTE:
495                         return TextureInterpolator<uchar>::interp_3d(info, x, y, z, interp);
496                 case IMAGE_DATA_TYPE_FLOAT:
497                         return TextureInterpolator<float>::interp_3d(info, x, y, z, interp);
498                 case IMAGE_DATA_TYPE_HALF4:
499                         return TextureInterpolator<half4>::interp_3d(info, x, y, z, interp);
500                 case IMAGE_DATA_TYPE_BYTE4:
501                         return TextureInterpolator<uchar4>::interp_3d(info, x, y, z, interp);
502                 case IMAGE_DATA_TYPE_FLOAT4:
503                 default:
504                         return TextureInterpolator<float4>::interp_3d(info, x, y, z, interp);
505         }
506 }
507
508 CCL_NAMESPACE_END
509
510 #endif // __KERNEL_CPU_IMAGE_H__