2 * Copyright 2011, Blender Foundation.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version 2
7 * of the License, or (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software Foundation,
16 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 #ifndef __KERNEL_COMPAT_CPU_H__
20 #define __KERNEL_COMPAT_CPU_H__
22 #define __KERNEL_CPU__
24 #include "util_debug.h"
25 #include "util_math.h"
26 #include "util_types.h"
30 /* Assertions inside the kernel only work for the CPU device, so we wrap it in
31 a macro which is empty for other devices */
33 #define kernel_assert(cond) assert(cond)
35 /* Texture types to be compatible with CUDA textures. These are really just
36 simple arrays and after inlining fetch hopefully revert to being a simple
39 template<typename T> struct texture {
42 kernel_assert(index >= 0 && index < width);
46 /*__m128 fetch_m128(int index)
48 kernel_assert(index >= 0 && index < width);
49 return ((__m128*)data)[index];
52 __m128i fetch_m128i(int index)
54 kernel_assert(index >= 0 && index < width);
55 return ((__m128i*)data)[index];
60 x = clamp(x, 0.0f, 1.0f)*width;
62 int index = min((int)x, width-1);
63 int nindex = min(index+1, width-1);
66 return (1.0f - t)*data[index] + t*data[nindex];
73 template<typename T> struct texture_image {
81 float f = 1.0f/255.0f;
82 return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
85 int wrap_periodic(int x, int width)
93 int wrap_clamp(int x, int width)
95 return clamp(x, 0, width-1);
98 float frac(float x, int *ix)
100 int i = (int)x - ((x < 0.0f)? 1: 0);
105 float4 interp(float x, float y, bool periodic = true)
108 return make_float4(0.0f, 0.0f, 0.0f, 0.0f);
110 int ix, iy, nix, niy;
111 float tx = frac(x*width, &ix);
112 float ty = frac(y*height, &iy);
115 ix = wrap_periodic(ix, width);
116 iy = wrap_periodic(iy, height);
118 nix = wrap_periodic(ix+1, width);
119 niy = wrap_periodic(iy+1, height);
122 ix = wrap_clamp(ix, width);
123 iy = wrap_clamp(iy, height);
125 nix = wrap_clamp(ix+1, width);
126 niy = wrap_clamp(iy+1, height);
129 float4 r = (1.0f - ty)*(1.0f - tx)*read(data[ix + iy*width]);
130 r += (1.0f - ty)*tx*read(data[nix + iy*width]);
131 r += ty*(1.0f - tx)*read(data[ix + niy*width]);
132 r += ty*tx*read(data[nix + niy*width]);
141 typedef texture<float4> texture_float4;
142 typedef texture<float> texture_float;
143 typedef texture<uint> texture_uint;
144 typedef texture<int> texture_int;
145 typedef texture<uint4> texture_uint4;
146 typedef texture_image<float4> texture_image_float4;
147 typedef texture_image<uchar4> texture_image_uchar4;
149 /* Macros to handle different memory storage on different devices */
151 #define kernel_tex_fetch(tex, index) (kg->tex.fetch(index))
152 #define kernel_tex_fetch_m128(tex, index) (kg->tex.fetch_m128(index))
153 #define kernel_tex_fetch_m128i(tex, index) (kg->tex.fetch_m128i(index))
154 #define kernel_tex_interp(tex, t) (kg->tex.interp(t))
155 #define kernel_tex_image_interp(tex, x, y) (kg->tex.interp(x, y))
157 #define kernel_data (kg->__data)
161 #endif /* __KERNEL_COMPAT_CPU_H__ */