Cycles: Query XYZ to/from Scene Linear conversion from OCIO instead of assuming sRGB
[blender.git] / intern / cycles / util / util_color.h
1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #ifndef __UTIL_COLOR_H__
18 #define __UTIL_COLOR_H__
19
20 #include "util/util_math.h"
21 #include "util/util_types.h"
22
23 #ifdef __KERNEL_SSE2__
24 #include "util/util_simd.h"
25 #endif
26
27 CCL_NAMESPACE_BEGIN
28
29 ccl_device uchar float_to_byte(float val)
30 {
31         return ((val <= 0.0f) ? 0 : ((val > (1.0f - 0.5f / 255.0f)) ? 255 : (uchar)((255.0f * val) + 0.5f)));
32 }
33
34 ccl_device uchar4 color_float_to_byte(float3 c)
35 {
36         uchar r, g, b;
37
38         r = float_to_byte(c.x);
39         g = float_to_byte(c.y);
40         b = float_to_byte(c.z);
41
42         return make_uchar4(r, g, b, 0);
43 }
44
45 ccl_device_inline float3 color_byte_to_float(uchar4 c)
46 {
47         return make_float3(c.x*(1.0f/255.0f), c.y*(1.0f/255.0f), c.z*(1.0f/255.0f));
48 }
49
50 ccl_device float color_srgb_to_linear(float c)
51 {
52         if(c < 0.04045f)
53                 return (c < 0.0f)? 0.0f: c * (1.0f/12.92f);
54         else
55                 return powf((c + 0.055f) * (1.0f / 1.055f), 2.4f);
56 }
57
58 ccl_device float color_linear_to_srgb(float c)
59 {
60         if(c < 0.0031308f)
61                 return (c < 0.0f)? 0.0f: c * 12.92f;
62         else
63                 return 1.055f * powf(c, 1.0f / 2.4f) - 0.055f;
64 }
65
66 ccl_device float3 rgb_to_hsv(float3 rgb)
67 {
68         float cmax, cmin, h, s, v, cdelta;
69         float3 c;
70
71         cmax = fmaxf(rgb.x, fmaxf(rgb.y, rgb.z));
72         cmin = min(rgb.x, min(rgb.y, rgb.z));
73         cdelta = cmax - cmin;
74
75         v = cmax;
76
77         if(cmax != 0.0f) {
78                 s = cdelta/cmax;
79         }
80         else {
81                 s = 0.0f;
82                 h = 0.0f;
83         }
84
85         if(s != 0.0f) {
86                 float3 cmax3 = make_float3(cmax, cmax, cmax);
87                 c = (cmax3 - rgb)/cdelta;
88
89                 if     (rgb.x == cmax) h =        c.z - c.y;
90                 else if(rgb.y == cmax) h = 2.0f + c.x - c.z;
91                 else                   h = 4.0f + c.y - c.x;
92
93                 h /= 6.0f;
94
95                 if(h < 0.0f)
96                         h += 1.0f;
97         }
98         else {
99                 h = 0.0f;
100         }
101
102         return make_float3(h, s, v);
103 }
104
105 ccl_device float3 hsv_to_rgb(float3 hsv)
106 {
107         float i, f, p, q, t, h, s, v;
108         float3 rgb;
109
110         h = hsv.x;
111         s = hsv.y;
112         v = hsv.z;
113
114         if(s != 0.0f) {
115                 if(h == 1.0f)
116                         h = 0.0f;
117
118                 h *= 6.0f;
119                 i = floorf(h);
120                 f = h - i;
121                 rgb = make_float3(f, f, f);
122                 p = v*(1.0f-s);
123                 q = v*(1.0f-(s*f));
124                 t = v*(1.0f-(s*(1.0f-f)));
125
126                 if     (i == 0.0f) rgb = make_float3(v, t, p);
127                 else if(i == 1.0f) rgb = make_float3(q, v, p);
128                 else if(i == 2.0f) rgb = make_float3(p, v, t);
129                 else if(i == 3.0f) rgb = make_float3(p, q, v);
130                 else if(i == 4.0f) rgb = make_float3(t, p, v);
131                 else               rgb = make_float3(v, p, q);
132         }
133         else {
134                 rgb = make_float3(v, v, v);
135         }
136
137         return rgb;
138 }
139
140 ccl_device float3 xyY_to_xyz(float x, float y, float Y)
141 {
142         float X, Z;
143
144         if(y != 0.0f) X = (x / y) * Y;
145         else X = 0.0f;
146
147         if(y != 0.0f && Y != 0.0f) Z = (1.0f - x - y) / y * Y;
148         else Z = 0.0f;
149
150         return make_float3(X, Y, Z);
151 }
152
153 #ifdef __KERNEL_SSE2__
154 /*
155  * Calculate initial guess for arg^exp based on float representation
156  * This method gives a constant bias, which can be easily compensated by multiplication with bias_coeff.
157  * Gives better results for exponents near 1 (e. g. 4/5).
158  * exp = exponent, encoded as uint32_t
159  * e2coeff = 2^(127/exponent - 127) * bias_coeff^(1/exponent), encoded as uint32_t
160  */
161 template<unsigned exp, unsigned e2coeff>
162 ccl_device_inline ssef fastpow(const ssef &arg)
163 {
164         ssef ret;
165         ret = arg * cast(ssei(e2coeff));
166         ret = ssef(cast(ret));
167         ret = ret * cast(ssei(exp));
168         ret = cast(ssei(ret));
169         return ret;
170 }
171
172 /* Improve x ^ 1.0f/5.0f solution with Newton-Raphson method */
173 ccl_device_inline ssef improve_5throot_solution(const ssef &old_result, const ssef &x)
174 {
175         ssef approx2 = old_result * old_result;
176         ssef approx4 = approx2 * approx2;
177         ssef t = x / approx4;
178         ssef summ = madd(ssef(4.0f), old_result, t);
179         return summ * ssef(1.0f/5.0f);
180 }
181
182 /* Calculate powf(x, 2.4). Working domain: 1e-10 < x < 1e+10 */
183 ccl_device_inline ssef fastpow24(const ssef &arg)
184 {
185         /* max, avg and |avg| errors were calculated in gcc without FMA instructions
186          * The final precision should be better than powf in glibc */
187
188         /* Calculate x^4/5, coefficient 0.994 was constructed manually to minimize avg error */
189         /* 0x3F4CCCCD = 4/5 */
190         /* 0x4F55A7FB = 2^(127/(4/5) - 127) * 0.994^(1/(4/5)) */
191         ssef x = fastpow<0x3F4CCCCD, 0x4F55A7FB>(arg); // error max = 0.17      avg = 0.0018    |avg| = 0.05
192         ssef arg2 = arg * arg;
193         ssef arg4 = arg2 * arg2;
194         x = improve_5throot_solution(x, arg4); /* error max = 0.018             avg = 0.0031    |avg| = 0.0031  */
195         x = improve_5throot_solution(x, arg4); /* error max = 0.00021   avg = 1.6e-05   |avg| = 1.6e-05 */
196         x = improve_5throot_solution(x, arg4); /* error max = 6.1e-07   avg = 5.2e-08   |avg| = 1.1e-07 */
197         return x * (x * x);
198 }
199
200 ccl_device ssef color_srgb_to_linear(const ssef &c)
201 {
202         sseb cmp = c < ssef(0.04045f);
203         ssef lt = max(c * ssef(1.0f/12.92f), ssef(0.0f));
204         ssef gtebase = (c + ssef(0.055f)) * ssef(1.0f/1.055f); /* fma */
205         ssef gte = fastpow24(gtebase);
206         return select(cmp, lt, gte);
207 }
208 #endif  /* __KERNEL_SSE2__ */
209
210 ccl_device float3 color_srgb_to_linear_v3(float3 c)
211 {
212         return make_float3(color_srgb_to_linear(c.x),
213                            color_srgb_to_linear(c.y),
214                            color_srgb_to_linear(c.z));
215 }
216
217 ccl_device float3 color_linear_to_srgb_v3(float3 c)
218 {
219         return make_float3(color_linear_to_srgb(c.x),
220                            color_linear_to_srgb(c.y),
221                            color_linear_to_srgb(c.z));
222 }
223
224 ccl_device float4 color_srgb_to_linear_v4(float4 c)
225 {
226 #ifdef __KERNEL_SSE2__
227         ssef r_ssef;
228         float4 &r = (float4 &)r_ssef;
229         r = c;
230         r_ssef = color_srgb_to_linear(r_ssef);
231         r.w = c.w;
232         return r;
233 #else
234         return make_float4(color_srgb_to_linear(c.x),
235                            color_srgb_to_linear(c.y),
236                            color_srgb_to_linear(c.z),
237                            c.w);
238 #endif
239 }
240
241 CCL_NAMESPACE_END
242
243 #endif /* __UTIL_COLOR_H__ */
244