Cycles: CUDA bicubic and tricubic texture interpolation support.
[blender.git] / intern / cycles / kernel / kernels / opencl / kernel_opencl_image.h
1 /*
2  * Copyright 2016 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /* For OpenCL we do manual lookup and interpolation. */
18
19 ccl_device_inline ccl_global TextureInfo* kernel_tex_info(KernelGlobals *kg, uint id) {
20         const uint tex_offset = id
21 #define KERNEL_TEX(type, name) + 1
22 #include "kernel/kernel_textures.h"
23         ;
24
25         return &((ccl_global TextureInfo*)kg->buffers[0])[tex_offset];
26 }
27
28 #define tex_fetch(type, info, index) ((ccl_global type*)(kg->buffers[info->cl_buffer] + info->data))[(index)]
29
30 ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int offset)
31 {
32         const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
33         const int texture_type = kernel_tex_type(id);
34
35         /* Float4 */
36         if(texture_type == IMAGE_DATA_TYPE_FLOAT4) {
37                 return tex_fetch(float4, info, offset);
38         }
39         /* Byte4 */
40         else if(texture_type == IMAGE_DATA_TYPE_BYTE4) {
41                 uchar4 r = tex_fetch(uchar4, info, offset);
42                 float f = 1.0f/255.0f;
43                 return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
44         }
45         /* Float */
46         else if(texture_type == IMAGE_DATA_TYPE_FLOAT) {
47                 float f = tex_fetch(float, info, offset);
48                 return make_float4(f, f, f, 1.0f);
49         }
50         /* Byte */
51         else {
52                 uchar r = tex_fetch(uchar, info, offset);
53                 float f = r * (1.0f/255.0f);
54                 return make_float4(f, f, f, 1.0f);
55         }
56 }
57
58 ccl_device_inline int svm_image_texture_wrap_periodic(int x, int width)
59 {
60         x %= width;
61         if(x < 0)
62                 x += width;
63         return x;
64 }
65
66 ccl_device_inline int svm_image_texture_wrap_clamp(int x, int width)
67 {
68         return clamp(x, 0, width-1);
69 }
70
71 ccl_device_inline float svm_image_texture_frac(float x, int *ix)
72 {
73         int i = float_to_int(x) - ((x < 0.0f)? 1: 0);
74         *ix = i;
75         return x - (float)i;
76 }
77
78 ccl_device float4 kernel_tex_image_interp(KernelGlobals *kg, int id, float x, float y)
79 {
80         const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
81
82         uint width = info->width;
83         uint height = info->height;
84         uint offset = 0;
85         uint interpolation = info->interpolation;
86         uint extension = info->extension;
87
88         /* Actual sampling. */
89         float4 r;
90         int ix, iy, nix, niy;
91         if(interpolation == INTERPOLATION_CLOSEST) {
92                 svm_image_texture_frac(x*width, &ix);
93                 svm_image_texture_frac(y*height, &iy);
94
95                 if(extension == EXTENSION_REPEAT) {
96                         ix = svm_image_texture_wrap_periodic(ix, width);
97                         iy = svm_image_texture_wrap_periodic(iy, height);
98                 }
99                 else {
100                         if(extension == EXTENSION_CLIP) {
101                                 if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
102                                         return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
103                                 }
104                         }
105                         /* Fall through. */
106                         /* EXTENSION_EXTEND */
107                         ix = svm_image_texture_wrap_clamp(ix, width);
108                         iy = svm_image_texture_wrap_clamp(iy, height);
109                 }
110
111                 r = svm_image_texture_read(kg, id, offset + ix + iy*width);
112         }
113         else { /* INTERPOLATION_LINEAR */
114                 float tx = svm_image_texture_frac(x*width - 0.5f, &ix);
115                 float ty = svm_image_texture_frac(y*height - 0.5f, &iy);
116
117                 if(extension == EXTENSION_REPEAT) {
118                         ix = svm_image_texture_wrap_periodic(ix, width);
119                         iy = svm_image_texture_wrap_periodic(iy, height);
120
121                         nix = svm_image_texture_wrap_periodic(ix+1, width);
122                         niy = svm_image_texture_wrap_periodic(iy+1, height);
123                 }
124                 else {
125                         if(extension == EXTENSION_CLIP) {
126                                 if(x < 0.0f || y < 0.0f || x > 1.0f || y > 1.0f) {
127                                         return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
128                                 }
129                         }
130                         nix = svm_image_texture_wrap_clamp(ix+1, width);
131                         niy = svm_image_texture_wrap_clamp(iy+1, height);
132                         ix = svm_image_texture_wrap_clamp(ix, width);
133                         iy = svm_image_texture_wrap_clamp(iy, height);
134                 }
135
136                 r = (1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + iy*width);
137                 r += (1.0f - ty)*tx*svm_image_texture_read(kg, id, offset + nix + iy*width);
138                 r += ty*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + niy*width);
139                 r += ty*tx*svm_image_texture_read(kg, id, offset + nix + niy*width);
140         }
141         return r;
142 }
143
144
145 ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals *kg, int id, float x, float y, float z, int interp)
146 {
147         const ccl_global TextureInfo *info = kernel_tex_info(kg, id);
148
149         uint width = info->width;
150         uint height = info->height;
151         uint offset = 0;
152         uint depth = info->depth;
153         uint interpolation = (interp == INTERPOLATION_NONE)? info->interpolation: interp;
154         uint extension = info->extension;
155
156         /* Actual sampling. */
157         float4 r;
158         int ix, iy, iz, nix, niy, niz;
159         if(interpolation == INTERPOLATION_CLOSEST) {
160                 svm_image_texture_frac(x*width, &ix);
161                 svm_image_texture_frac(y*height, &iy);
162                 svm_image_texture_frac(z*depth, &iz);
163
164                 if(extension == EXTENSION_REPEAT) {
165                         ix = svm_image_texture_wrap_periodic(ix, width);
166                         iy = svm_image_texture_wrap_periodic(iy, height);
167                         iz = svm_image_texture_wrap_periodic(iz, depth);
168                 }
169                 else {
170                         if(extension == EXTENSION_CLIP) {
171                                 if(x < 0.0f || y < 0.0f || z < 0.0f ||
172                                    x > 1.0f || y > 1.0f || z > 1.0f)
173                                 {
174                                         return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
175                                 }
176                         }
177                         /* Fall through. */
178                         /* EXTENSION_EXTEND */
179                         ix = svm_image_texture_wrap_clamp(ix, width);
180                         iy = svm_image_texture_wrap_clamp(iy, height);
181                         iz = svm_image_texture_wrap_clamp(iz, depth);
182                 }
183                 r = svm_image_texture_read(kg, id, offset + ix + iy*width + iz*width*height);
184         }
185         else { /* INTERPOLATION_LINEAR */
186                 float tx = svm_image_texture_frac(x*(float)width - 0.5f, &ix);
187                 float ty = svm_image_texture_frac(y*(float)height - 0.5f, &iy);
188                 float tz = svm_image_texture_frac(z*(float)depth - 0.5f, &iz);
189
190                 if(extension == EXTENSION_REPEAT) {
191                         ix = svm_image_texture_wrap_periodic(ix, width);
192                         iy = svm_image_texture_wrap_periodic(iy, height);
193                         iz = svm_image_texture_wrap_periodic(iz, depth);
194
195                         nix = svm_image_texture_wrap_periodic(ix+1, width);
196                         niy = svm_image_texture_wrap_periodic(iy+1, height);
197                         niz = svm_image_texture_wrap_periodic(iz+1, depth);
198                 }
199                 else {
200                         if(extension == EXTENSION_CLIP) {
201                                 if(x < 0.0f || y < 0.0f || z < 0.0f ||
202                                    x > 1.0f || y > 1.0f || z > 1.0f)
203                                 {
204                                         return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
205                                 }
206                         }
207                         /* Fall through. */
208                         /*  EXTENSION_EXTEND */
209                         nix = svm_image_texture_wrap_clamp(ix+1, width);
210                         niy = svm_image_texture_wrap_clamp(iy+1, height);
211                         niz = svm_image_texture_wrap_clamp(iz+1, depth);
212
213                         ix = svm_image_texture_wrap_clamp(ix, width);
214                         iy = svm_image_texture_wrap_clamp(iy, height);
215                         iz = svm_image_texture_wrap_clamp(iz, depth);
216                 }
217
218                 r  = (1.0f - tz)*(1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + iy*width + iz*width*height);
219                 r += (1.0f - tz)*(1.0f - ty)*tx*svm_image_texture_read(kg, id, offset + nix + iy*width + iz*width*height);
220                 r += (1.0f - tz)*ty*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + niy*width + iz*width*height);
221                 r += (1.0f - tz)*ty*tx*svm_image_texture_read(kg, id, offset + nix + niy*width + iz*width*height);
222
223                 r += tz*(1.0f - ty)*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + iy*width + niz*width*height);
224                 r += tz*(1.0f - ty)*tx*svm_image_texture_read(kg, id, offset + nix + iy*width + niz*width*height);
225                 r += tz*ty*(1.0f - tx)*svm_image_texture_read(kg, id, offset + ix + niy*width + niz*width*height);
226                 r += tz*ty*tx*svm_image_texture_read(kg, id, offset + nix + niy*width + niz*width*height);
227         }
228         return r;
229 }