Cycles: OpenCL bicubic and tricubic texture interpolation support.
[blender-staging.git] / intern / cycles / kernel / kernels / opencl / kernel_opencl_image.h
1 /*
2  * Copyright 2016 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /* For OpenCL we do manual lookup and interpolation. */
18
19 ccl_device_inline ccl_global TextureInfo* kernel_tex_info(KernelGlobals *kg, uint id) {
20         const uint tex_offset = id
21 #define KERNEL_TEX(type, name) + 1
22 #include "kernel/kernel_textures.h"
23         ;
24
25         return &((ccl_global TextureInfo*)kg->buffers[0])[tex_offset];
26 }
27
28 #define tex_fetch(type, info, index) ((ccl_global type*)(kg->buffers[info->cl_buffer] + info->data))[(index)]
29
30 ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int offset)
31 {
32         const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
33         const int texture_type = kernel_tex_type(id);
34
35         /* Float4 */
36         if(texture_type == IMAGE_DATA_TYPE_FLOAT4) {
37                 return tex_fetch(float4, info, offset);
38         }
39         /* Byte4 */
40         else if(texture_type == IMAGE_DATA_TYPE_BYTE4) {
41                 uchar4 r = tex_fetch(uchar4, info, offset);
42                 float f = 1.0f/255.0f;
43                 return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
44         }
45         /* Float */
46         else if(texture_type == IMAGE_DATA_TYPE_FLOAT) {
47                 float f = tex_fetch(float, info, offset);
48                 return make_float4(f, f, f, 1.0f);
49         }
50         /* Byte */
51         else {
52                 uchar r = tex_fetch(uchar, info, offset);
53                 float f = r * (1.0f/255.0f);
54                 return make_float4(f, f, f, 1.0f);
55         }
56 }
57
58 ccl_device_inline int svm_image_texture_wrap_periodic(int x, int width)
59 {
60         x %= width;
61         if(x < 0)
62                 x += width;
63         return x;
64 }
65
66 ccl_device_inline int svm_image_texture_wrap_clamp(int x, int width)
67 {
68         return clamp(x, 0, width-1);
69 }
70
71 ccl_device_inline float svm_image_texture_frac(float x, int *ix)
72 {
73         int i = float_to_int(x) - ((x < 0.0f)? 1: 0);
74         *ix = i;
75         return x - (float)i;
76 }
77
78 #define SET_CUBIC_SPLINE_WEIGHTS(u, t) \
79         { \
80                 u[0] = (((-1.0f/6.0f)* t + 0.5f) * t - 0.5f) * t + (1.0f/6.0f); \
81                 u[1] =  ((      0.5f * t - 1.0f) * t       ) * t + (2.0f/3.0f); \
82                 u[2] =  ((     -0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f/6.0f); \
83                 u[3] = (1.0f / 6.0f) * t * t * t; \
84         } (void)0
85
86 ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
87 {
88         const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
89
90         uint width = info->width;
91         uint height = info->height;
92         uint interpolation = info->interpolation;
93         uint extension = info->extension;
94
95         /* Actual sampling. */
96         if(interpolation == INTERPOLATION_CLOSEST) {
97                 int ix, iy;
98                 svm_image_texture_frac(x*width, &ix);
99                 svm_image_texture_frac(y*height, &iy);
100
101                 if(extension == EXTENSION_REPEAT) {
102                         ix = svm_image_texture_wrap_periodic(ix, width);
103                         iy = svm_image_texture_wrap_periodic(iy, height);
104                 }
105                 else {
106                         if(extension == EXTENSION_CLIP) {
107                                 if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
108                                         return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
109                                 }
110                         }
111                         /* Fall through. */
112                         /* EXTENSION_EXTEND */
113                         ix = svm_image_texture_wrap_clamp(ix, width);
114                         iy = svm_image_texture_wrap_clamp(iy, height);
115                 }
116
117                 return svm_image_texture_read(kg, id, ix + iy*width);
118         }
119         else {
120                 /* Bilinear or bicubic interpolation. */
121                 int ix, iy, nix, niy;
122                 float tx = svm_image_texture_frac(x*width - 0.5f, &ix);
123                 float ty = svm_image_texture_frac(y*height - 0.5f, &iy);
124
125                 if(extension == EXTENSION_REPEAT) {
126                         ix = svm_image_texture_wrap_periodic(ix, width);
127                         iy = svm_image_texture_wrap_periodic(iy, height);
128                         nix = svm_image_texture_wrap_periodic(ix+1, width);
129                         niy = svm_image_texture_wrap_periodic(iy+1, height);
130                 }
131                 else {
132                         if(extension == EXTENSION_CLIP) {
133                                 if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
134                                         return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
135                                 }
136                         }
137                         ix = svm_image_texture_wrap_clamp(ix, width);
138                         iy = svm_image_texture_wrap_clamp(iy, height);
139                         nix = svm_image_texture_wrap_clamp(ix+1, width);
140                         niy = svm_image_texture_wrap_clamp(iy+1, height);
141                 }
142
143                 if(interpolation == INTERPOLATION_LINEAR) {
144                         /* Bilinear interpolation. */
145                         float4 r;
146                         r = (1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, ix + iy*width);
147                         r += (1.0f - ty)*tx*svm_image_texture_read(kg, id, nix + iy*width);
148                         r += ty*(1.0f - tx)*svm_image_texture_read(kg, id, ix + niy*width);
149                         r += ty*tx*svm_image_texture_read(kg, id, nix + niy*width);
150                         return r;
151                 }
152
153                 /* Bicubic interpolation. */
154                 int pix, piy, nnix, nniy;
155                 if(extension == EXTENSION_REPEAT) {
156                         pix = svm_image_texture_wrap_periodic(ix-1, width);
157                         piy = svm_image_texture_wrap_periodic(iy-1, height);
158                         nnix = svm_image_texture_wrap_periodic(ix+2, width);
159                         nniy = svm_image_texture_wrap_periodic(iy+2, height);
160                 }
161                 else {
162                         pix = svm_image_texture_wrap_clamp(ix-1, width);
163                         piy = svm_image_texture_wrap_clamp(iy-1, height);
164                         nnix = svm_image_texture_wrap_clamp(ix+2, width);
165                         nniy = svm_image_texture_wrap_clamp(iy+2, height);
166                 }
167
168                 const int xc[4] = {pix, ix, nix, nnix};
169                 const int yc[4] = {width * piy,
170                                    width * iy,
171                                    width * niy,
172                                    width * nniy};
173                 float u[4], v[4];
174                 /* Some helper macro to keep code reasonable size,
175                  * let compiler to inline all the matrix multiplications.
176                  */
177 #define DATA(x, y) (svm_image_texture_read(kg, id, xc[x] + yc[y]))
178 #define TERM(col) \
179                 (v[col] * (u[0] * DATA(0, col) + \
180                            u[1] * DATA(1, col) + \
181                            u[2] * DATA(2, col) + \
182                            u[3] * DATA(3, col)))
183
184                 SET_CUBIC_SPLINE_WEIGHTS(u, tx);
185                 SET_CUBIC_SPLINE_WEIGHTS(v, ty);
186
187                 /* Actual interpolation. */
188                 return TERM(0) + TERM(1) + TERM(2) + TERM(3);
189 #undef TERM
190 #undef DATA
191         }
192 }
193
194
195 ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, int interp)
196 {
197         const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
198
199         uint width = info->width;
200         uint height = info->height;
201         uint depth = info->depth;
202         uint interpolation = (interp == INTERPOLATION_NONE)? info->interpolation: interp;
203         uint extension = info->extension;
204
205         /* Actual sampling. */
206         if(interpolation == INTERPOLATION_CLOSEST) {
207                 int ix, iy, iz;
208                 svm_image_texture_frac(x*width, &ix);
209                 svm_image_texture_frac(y*height, &iy);
210                 svm_image_texture_frac(z*depth, &iz);
211
212                 if(extension == EXTENSION_REPEAT) {
213                         ix = svm_image_texture_wrap_periodic(ix, width);
214                         iy = svm_image_texture_wrap_periodic(iy, height);
215                         iz = svm_image_texture_wrap_periodic(iz, depth);
216                 }
217                 else {
218                         if(extension == EXTENSION_CLIP) {
219                                 if(x < 0.0f || y < 0.0f || z < 0.0f ||
220                                    x > 1.0f || y > 1.0f || z > 1.0f)
221                                 {
222                                         return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
223                                 }
224                         }
225                         /* Fall through. */
226                         /* EXTENSION_EXTEND */
227                         ix = svm_image_texture_wrap_clamp(ix, width);
228                         iy = svm_image_texture_wrap_clamp(iy, height);
229                         iz = svm_image_texture_wrap_clamp(iz, depth);
230                 }
231                 return svm_image_texture_read(kg, id, ix + iy*width + iz*width*height);
232         }
233         else {
234                 /* Bilinear or bicubic interpolation. */
235                 int ix, iy, iz, nix, niy, niz;
236                 float tx = svm_image_texture_frac(x*(float)width - 0.5f, &ix);
237                 float ty = svm_image_texture_frac(y*(float)height - 0.5f, &iy);
238                 float tz = svm_image_texture_frac(z*(float)depth - 0.5f, &iz);
239
240                 if(extension == EXTENSION_REPEAT) {
241                         ix = svm_image_texture_wrap_periodic(ix, width);
242                         iy = svm_image_texture_wrap_periodic(iy, height);
243                         iz = svm_image_texture_wrap_periodic(iz, depth);
244
245                         nix = svm_image_texture_wrap_periodic(ix+1, width);
246                         niy = svm_image_texture_wrap_periodic(iy+1, height);
247                         niz = svm_image_texture_wrap_periodic(iz+1, depth);
248                 }
249                 else {
250                         if(extension == EXTENSION_CLIP) {
251                                 if(x < 0.0f || y < 0.0f || z < 0.0f ||
252                                    x > 1.0f || y > 1.0f || z > 1.0f)
253                                 {
254                                         return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
255                                 }
256                         }
257                         /* Fall through. */
258                         /*  EXTENSION_EXTEND */
259                         nix = svm_image_texture_wrap_clamp(ix+1, width);
260                         niy = svm_image_texture_wrap_clamp(iy+1, height);
261                         niz = svm_image_texture_wrap_clamp(iz+1, depth);
262
263                         ix = svm_image_texture_wrap_clamp(ix, width);
264                         iy = svm_image_texture_wrap_clamp(iy, height);
265                         iz = svm_image_texture_wrap_clamp(iz, depth);
266                 }
267
268                 if(interpolation == INTERPOLATION_LINEAR) {
269                         /* Bilinear interpolation. */
270                         float4 r;
271                         r  = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, ix + iy*width + iz*width*height);
272                         r += (1.0f - tz)*(1.0f - ty)*tx*svm_image_texture_read(kg, id, nix + iy*width + iz*width*height);
273                         r += (1.0f - tz)*ty*(1.0f - tx)*svm_image_texture_read(kg, id, ix + niy*width + iz*width*height);
274                         r += (1.0f - tz)*ty*tx*svm_image_texture_read(kg, id, nix + niy*width + iz*width*height);
275
276                         r += tz*(1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, ix + iy*width + niz*width*height);
277                         r += tz*(1.0f - ty)*tx*svm_image_texture_read(kg, id, nix + iy*width + niz*width*height);
278                         r += tz*ty*(1.0f - tx)*svm_image_texture_read(kg, id, ix + niy*width + niz*width*height);
279                         r += tz*ty*tx*svm_image_texture_read(kg, id, nix + niy*width + niz*width*height);
280                         return r;
281                 }
282
283                 /* Bicubic interpolation. */
284                 int pix, piy, piz, nnix, nniy, nniz;
285                 if(extension == EXTENSION_REPEAT) {
286                         pix = svm_image_texture_wrap_periodic(ix-1, width);
287                         piy = svm_image_texture_wrap_periodic(iy-1, height);
288                         piz = svm_image_texture_wrap_periodic(iz-1, depth);
289                         nnix = svm_image_texture_wrap_periodic(ix+2, width);
290                         nniy = svm_image_texture_wrap_periodic(iy+2, height);
291                         nniz = svm_image_texture_wrap_periodic(iz+2, depth);
292                 }
293                 else {
294                         pix = svm_image_texture_wrap_clamp(ix-1, width);
295                         piy = svm_image_texture_wrap_clamp(iy-1, height);
296                         piz = svm_image_texture_wrap_clamp(iz-1, depth);
297                         nnix = svm_image_texture_wrap_clamp(ix+2, width);
298                         nniy = svm_image_texture_wrap_clamp(iy+2, height);
299                         nniz = svm_image_texture_wrap_clamp(iz+2, depth);
300                 }
301
302                 const int xc[4] = {pix, ix, nix, nnix};
303                 const int yc[4] = {width * piy,
304                                    width * iy,
305                                    width * niy,
306                                    width * nniy};
307                 const int zc[4] = {width * height * piz,
308                                    width * height * iz,
309                                    width * height * niz,
310                                    width * height * nniz};
311                 float u[4], v[4], w[4];
312
313                 /* Some helper macro to keep code reasonable size,
314                  * let compiler to inline all the matrix multiplications.
315                  */
316 #define DATA(x, y, z) (svm_image_texture_read(kg, id, xc[x] + yc[y] + zc[z]))
317 #define COL_TERM(col, row) \
318                 (v[col] * (u[0] * DATA(0, col, row) + \
319                            u[1] * DATA(1, col, row) + \
320                            u[2] * DATA(2, col, row) + \
321                            u[3] * DATA(3, col, row)))
322 #define ROW_TERM(row) \
323                 (w[row] * (COL_TERM(0, row) + \
324                            COL_TERM(1, row) + \
325                            COL_TERM(2, row) + \
326                            COL_TERM(3, row)))
327
328                 SET_CUBIC_SPLINE_WEIGHTS(u, tx);
329                 SET_CUBIC_SPLINE_WEIGHTS(v, ty);
330                 SET_CUBIC_SPLINE_WEIGHTS(w, tz);
331
332                 /* Actual interpolation. */
333                 return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
334
335 #undef COL_TERM
336 #undef ROW_TERM
337 #undef DATA
338         }
339 }
340
341 #undef SET_CUBIC_SPLINE_WEIGHTS