c73beab98dc70cf6f7baf5f6378c2dc206b72be9
[blender.git] / intern / cycles / util / util_color.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #ifndef __UTIL_COLOR_H__
18 #define __UTIL_COLOR_H__
19
20 #include "util/util_math.h"
21 #include "util/util_types.h"
22
23 #ifdef __KERNEL_SSE2__
24 #include "util/util_simd.h"
25 #endif
26
27 CCL_NAMESPACE_BEGIN
28
29 ccl_device uchar float_to_byte(float val)
30 {
31         return ((val <= 0.0f) ? 0 : ((val > (1.0f - 0.5f / 255.0f)) ? 255 : (uchar)((255.0f * val) + 0.5f)));
32 }
33
34 ccl_device uchar4 color_float_to_byte(float3 c)
35 {
36         uchar r, g, b;
37
38         r = float_to_byte(c.x);
39         g = float_to_byte(c.y);
40         b = float_to_byte(c.z);
41
42         return make_uchar4(r, g, b, 0);
43 }
44
45 ccl_device_inline float3 color_byte_to_float(uchar4 c)
46 {
47         return make_float3(c.x*(1.0f/255.0f), c.y*(1.0f/255.0f), c.z*(1.0f/255.0f));
48 }
49
50 ccl_device float color_srgb_to_scene_linear(float c)
51 {
52         if(c < 0.04045f)
53                 return (c < 0.0f)? 0.0f: c * (1.0f/12.92f);
54         else
55                 return powf((c + 0.055f) * (1.0f / 1.055f), 2.4f);
56 }
57
58 ccl_device float color_scene_linear_to_srgb(float c)
59 {
60         if(c < 0.0031308f)
61                 return (c < 0.0f)? 0.0f: c * 12.92f;
62         else
63                 return 1.055f * powf(c, 1.0f / 2.4f) - 0.055f;
64 }
65
66 ccl_device float3 rgb_to_hsv(float3 rgb)
67 {
68         float cmax, cmin, h, s, v, cdelta;
69         float3 c;
70
71         cmax = fmaxf(rgb.x, fmaxf(rgb.y, rgb.z));
72         cmin = min(rgb.x, min(rgb.y, rgb.z));
73         cdelta = cmax - cmin;
74
75         v = cmax;
76
77         if(cmax != 0.0f) {
78                 s = cdelta/cmax;
79         }
80         else {
81                 s = 0.0f;
82                 h = 0.0f;
83         }
84
85         if(s != 0.0f) {
86                 float3 cmax3 = make_float3(cmax, cmax, cmax);
87                 c = (cmax3 - rgb)/cdelta;
88
89                 if     (rgb.x == cmax) h =        c.z - c.y;
90                 else if(rgb.y == cmax) h = 2.0f + c.x - c.z;
91                 else                   h = 4.0f + c.y - c.x;
92
93                 h /= 6.0f;
94
95                 if(h < 0.0f)
96                         h += 1.0f;
97         }
98         else {
99                 h = 0.0f;
100         }
101
102         return make_float3(h, s, v);
103 }
104
105 ccl_device float3 hsv_to_rgb(float3 hsv)
106 {
107         float i, f, p, q, t, h, s, v;
108         float3 rgb;
109
110         h = hsv.x;
111         s = hsv.y;
112         v = hsv.z;
113
114         if(s != 0.0f) {
115                 if(h == 1.0f)
116                         h = 0.0f;
117
118                 h *= 6.0f;
119                 i = floorf(h);
120                 f = h - i;
121                 rgb = make_float3(f, f, f);
122                 p = v*(1.0f-s);
123                 q = v*(1.0f-(s*f));
124                 t = v*(1.0f-(s*(1.0f-f)));
125
126                 if     (i == 0.0f) rgb = make_float3(v, t, p);
127                 else if(i == 1.0f) rgb = make_float3(q, v, p);
128                 else if(i == 2.0f) rgb = make_float3(p, v, t);
129                 else if(i == 3.0f) rgb = make_float3(p, q, v);
130                 else if(i == 4.0f) rgb = make_float3(t, p, v);
131                 else               rgb = make_float3(v, p, q);
132         }
133         else {
134                 rgb = make_float3(v, v, v);
135         }
136
137         return rgb;
138 }
139
140 ccl_device float3 xyY_to_xyz(float x, float y, float Y)
141 {
142         float X, Z;
143
144         if(y != 0.0f) X = (x / y) * Y;
145         else X = 0.0f;
146
147         if(y != 0.0f && Y != 0.0f) Z = (1.0f - x - y) / y * Y;
148         else Z = 0.0f;
149
150         return make_float3(X, Y, Z);
151 }
152
153 ccl_device float3 xyz_to_rgb(float x, float y, float z)
154 {
155         return make_float3(3.240479f * x + -1.537150f * y + -0.498535f * z,
156                           -0.969256f * x +  1.875991f * y +  0.041556f * z,
157                            0.055648f * x + -0.204043f * y +  1.057311f * z);
158 }
159
160 #ifdef __KERNEL_SSE2__
161 /*
162  * Calculate initial guess for arg^exp based on float representation
163  * This method gives a constant bias, which can be easily compensated by multiplication with bias_coeff.
164  * Gives better results for exponents near 1 (e. g. 4/5).
165  * exp = exponent, encoded as uint32_t
166  * e2coeff = 2^(127/exponent - 127) * bias_coeff^(1/exponent), encoded as uint32_t
167  */
168 template<unsigned exp, unsigned e2coeff>
169 ccl_device_inline ssef fastpow(const ssef &arg)
170 {
171         ssef ret;
172         ret = arg * cast(ssei(e2coeff));
173         ret = ssef(cast(ret));
174         ret = ret * cast(ssei(exp));
175         ret = cast(ssei(ret));
176         return ret;
177 }
178
179 /* Improve x ^ 1.0f/5.0f solution with Newton-Raphson method */
180 ccl_device_inline ssef improve_5throot_solution(const ssef &old_result, const ssef &x)
181 {
182         ssef approx2 = old_result * old_result;
183         ssef approx4 = approx2 * approx2;
184         ssef t = x / approx4;
185         ssef summ = madd(ssef(4.0f), old_result, t);
186         return summ * ssef(1.0f/5.0f);
187 }
188
189 /* Calculate powf(x, 2.4). Working domain: 1e-10 < x < 1e+10 */
190 ccl_device_inline ssef fastpow24(const ssef &arg)
191 {
192         /* max, avg and |avg| errors were calculated in gcc without FMA instructions
193          * The final precision should be better than powf in glibc */
194
195         /* Calculate x^4/5, coefficient 0.994 was constructed manually to minimize avg error */
196         /* 0x3F4CCCCD = 4/5 */
197         /* 0x4F55A7FB = 2^(127/(4/5) - 127) * 0.994^(1/(4/5)) */
198         ssef x = fastpow<0x3F4CCCCD, 0x4F55A7FB>(arg); // error max = 0.17      avg = 0.0018    |avg| = 0.05
199         ssef arg2 = arg * arg;
200         ssef arg4 = arg2 * arg2;
201         x = improve_5throot_solution(x, arg4); /* error max = 0.018             avg = 0.0031    |avg| = 0.0031  */
202         x = improve_5throot_solution(x, arg4); /* error max = 0.00021   avg = 1.6e-05   |avg| = 1.6e-05 */
203         x = improve_5throot_solution(x, arg4); /* error max = 6.1e-07   avg = 5.2e-08   |avg| = 1.1e-07 */
204         return x * (x * x);
205 }
206
207 ccl_device ssef color_srgb_to_scene_linear(const ssef &c)
208 {
209         sseb cmp = c < ssef(0.04045f);
210         ssef lt = max(c * ssef(1.0f/12.92f), ssef(0.0f));
211         ssef gtebase = (c + ssef(0.055f)) * ssef(1.0f/1.055f); /* fma */
212         ssef gte = fastpow24(gtebase);
213         return select(cmp, lt, gte);
214 }
215 #endif  /* __KERNEL_SSE2__ */
216
217 ccl_device float3 color_srgb_to_scene_linear_v3(float3 c)
218 {
219         return make_float3(color_srgb_to_scene_linear(c.x),
220                            color_srgb_to_scene_linear(c.y),
221                            color_srgb_to_scene_linear(c.z));
222 }
223
224 ccl_device float3 color_scene_linear_to_srgb_v3(float3 c)
225 {
226         return make_float3(color_scene_linear_to_srgb(c.x),
227                            color_scene_linear_to_srgb(c.y),
228                            color_scene_linear_to_srgb(c.z));
229 }
230
231 ccl_device float4 color_srgb_to_scene_linear_v4(float4 c)
232 {
233 #ifdef __KERNEL_SSE2__
234         ssef r_ssef;
235         float4 &r = (float4 &)r_ssef;
236         r = c;
237         r_ssef = color_srgb_to_scene_linear(r_ssef);
238         r.w = c.w;
239         return r;
240 #else
241         return make_float4(color_srgb_to_scene_linear(c.x),
242                            color_srgb_to_scene_linear(c.y),
243                            color_srgb_to_scene_linear(c.z),
244                            c.w);
245 #endif
246 }
247
248 ccl_device float linear_rgb_to_gray(float3 c)
249 {
250         return c.x*0.2126f + c.y*0.7152f + c.z*0.0722f;
251 }
252
253 CCL_NAMESPACE_END
254
255 #endif /* __UTIL_COLOR_H__ */
256