Merge branch 'blender2.7'
[blender.git] / intern / utfconv / utfconv.c
1 /*
2  * This program is free software; you can redistribute it and/or
3  * modify it under the terms of the GNU General Public License
4  * as published by the Free Software Foundation; either version 2
5  * of the License, or (at your option) any later version. 
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * along with this program; if not, write to the Free Software Foundation,
14  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
15  *
16  * The Original Code is Copyright (C) 2012 Blender Foundation.
17  * All rights reserved.
18  * 
19  */
20
21 #include "utfconv.h"
22
23 size_t count_utf_8_from_16(const wchar_t *string16)
24 {
25         int i;
26         size_t count = 0;
27         wchar_t u = 0;
28         if (!string16) {
29                 return 0;
30         }
31
32         for (i = 0; (u = string16[i]); i++) {
33                 if (u < 0x0080) {
34                         count += 1;
35                 }
36                 else {
37                         if (u < 0x0800) {
38                                 count += 2;
39                         }
40                         else {
41                                 if (u < 0xD800) {
42                                         count += 3;
43                                 }
44                                 else {
45                                         if (u < 0xDC00) {
46                                                 i++;
47                                                 if ((u = string16[i]) == 0) {
48                                                         break;
49                                                 }
50                                                 if (u >= 0xDC00 && u < 0xE000) {
51                                                         count += 4;
52                                                 }
53                                         }
54                                         else {
55                                                 if (u < 0xE000) {
56                                                         /*illigal*/;
57                                                 }
58                                                 else {
59                                                         count += 3;
60                                                 }
61                                         }
62                                 }
63                         }
64                 }
65         }
66
67         return ++count;
68 }
69
70
71 size_t count_utf_16_from_8(const char *string8)
72 {
73         size_t count = 0;
74         char u;
75         char type = 0;
76         unsigned int u32 = 0;
77
78         if (!string8) return 0;
79
80         for (; (u = *string8); string8++) {
81                 if (type == 0) {
82                         if ((u & 0x01 << 7) == 0)     { count++; u32 = 0; continue; }       //1 utf-8 char
83                         if ((u & 0x07 << 5) == 0xC0)  { type = 1; u32 = u & 0x1F; continue; } //2 utf-8 char
84                         if ((u & 0x0F << 4) == 0xE0)  { type = 2; u32 = u & 0x0F; continue; } //3 utf-8 char
85                         if ((u & 0x1F << 3) == 0xF0)  { type = 3; u32 = u & 0x07; continue; } //4 utf-8 char
86                         continue;
87                 }
88                 else {
89                         if ((u & 0xC0) == 0x80) {
90                                 u32 = (u32 << 6) | (u & 0x3F);
91                                 type--;
92                         }
93                         else {
94                                 u32 = 0;
95                                 type = 0;
96                         }
97                 }
98
99                 if (type == 0) {
100                         if ((0 < u32 && u32 < 0xD800) || (0xE000 <= u32 && u32 < 0x10000)) count++;
101                         else if (0x10000 <= u32 && u32 < 0x110000) count += 2;
102                         u32 = 0;
103                 }
104         }
105
106         return ++count;
107 }
108
109
110 int conv_utf_16_to_8(const wchar_t *in16, char *out8, size_t size8)
111 {
112         char *out8end = out8 + size8;
113         wchar_t u = 0;
114         int err = 0;
115         if (!size8 || !in16 || !out8) return UTF_ERROR_NULL_IN;
116         out8end--;
117
118         for (; out8 < out8end && (u = *in16); in16++, out8++) {
119                 if (u < 0x0080) {
120                         *out8 = u;
121                 }
122                 else if (u < 0x0800) {
123                         if (out8 + 1 >= out8end) break;
124                         *out8++ = (0x3 << 6) | (0x1F & (u >> 6));
125                         *out8  = (0x1 << 7) | (0x3F & (u));
126                 }
127                 else if (u < 0xD800 || u >= 0xE000) {
128                         if (out8 + 2 >= out8end) break;
129                         *out8++ = (0x7 << 5) | (0xF & (u >> 12));
130                         *out8++ = (0x1 << 7) | (0x3F & (u >> 6));
131                         *out8  = (0x1 << 7) | (0x3F & (u));
132                 }
133                 else if (u < 0xDC00) {
134                         wchar_t u2 = *++in16;
135
136                         if (!u2) break;
137                         if (u2 >= 0xDC00 && u2 < 0xE000) {
138                                 if (out8 + 3 >= out8end) break; else {
139                                         unsigned int uc = 0x10000 + (u2 - 0xDC00) + ((u - 0xD800) << 10);
140
141                                         *out8++ = (0xF << 4) | (0x7 & (uc >> 18));
142                                         *out8++ = (0x1 << 7) | (0x3F & (uc >> 12));
143                                         *out8++ = (0x1 << 7) | (0x3F & (uc >> 6));
144                                         *out8  = (0x1 << 7) | (0x3F & (uc));
145                                 }
146                         }
147                         else {
148                                 out8--; err |= UTF_ERROR_ILLCHAR;
149                         }
150                 }
151                 else if (u < 0xE000) {
152                         out8--; err |= UTF_ERROR_ILLCHAR;
153                 }
154         }
155
156         *out8 = *out8end = 0;
157
158         if (*in16) err |= UTF_ERROR_SMALL;
159
160         return err;
161 }
162
163
164 int conv_utf_8_to_16(const char *in8, wchar_t *out16, size_t size16)
165 {
166         char u;
167         char type = 0;
168         unsigned int u32 = 0;
169         wchar_t *out16end = out16 + size16;
170         int err = 0;
171         if (!size16 || !in8 || !out16) return UTF_ERROR_NULL_IN;
172         out16end--;
173
174         for (; out16 < out16end && (u = *in8); in8++) {
175                 if (type == 0) {
176                         if ((u & 0x01 << 7) == 0)     { *out16 = u; out16++; u32 = 0; continue; } //1 utf-8 char
177                         if ((u & 0x07 << 5) == 0xC0)  { type = 1; u32 = u & 0x1F; continue; }     //2 utf-8 char
178                         if ((u & 0x0F << 4) == 0xE0)  { type = 2; u32 = u & 0x0F; continue; }     //3 utf-8 char
179                         if ((u & 0x1F << 3) == 0xF0)  { type = 3; u32 = u & 0x07; continue; }     //4 utf-8 char
180                         err |= UTF_ERROR_ILLCHAR;
181                         continue;
182                 }
183                 else {
184                         if ((u & 0xC0) == 0x80) {
185                                 u32 = (u32 << 6) | (u & 0x3F);
186                                 type--;
187                         }
188                         else {
189                                 u32 = 0; type = 0; err |= UTF_ERROR_ILLSEQ;
190                         }
191                 }
192                 if (type == 0) {
193                         if ((0 < u32 && u32 < 0xD800) || (0xE000 <= u32 && u32 < 0x10000)) {
194                                 *out16 = u32;
195                                 out16++;
196                         }
197                         else if (0x10000 <= u32 && u32 < 0x110000) {
198                                 if (out16 + 1 >= out16end) break;
199                                 u32 -= 0x10000;
200                                 *out16 = 0xD800 + (u32 >> 10);
201                                 out16++;
202                                 *out16 = 0xDC00 + (u32 & 0x3FF);
203                                 out16++;
204                         }
205                         u32 = 0;
206                 }
207
208         }
209
210         *out16 = *out16end = 0;
211
212         if (*in8) err |= UTF_ERROR_SMALL;
213
214         return err;
215 }
216
217 /* UNUSED FUNCTIONS */
218 #if 0
219 static int is_ascii(const char *in8)
220 {
221         for (; *in8; in8++)
222                 if (0x80 & *in8) return 0;
223
224         return 1;
225 }
226
227 static void utf_8_cut_end(char *inout8, size_t maxcutpoint)
228 {
229         char *cur = inout8 + maxcutpoint;
230         char cc;
231         if (!inout8) return;
232
233         cc = *cur;
234 }
235 #endif
236
237
238 char *alloc_utf_8_from_16(const wchar_t *in16, size_t add)
239 {
240         size_t bsize = count_utf_8_from_16(in16);
241         char *out8 = NULL;
242         if (!bsize) return NULL;
243         out8 = (char *)malloc(sizeof(char) * (bsize + add));
244         conv_utf_16_to_8(in16, out8, bsize);
245         return out8;
246 }
247
248 wchar_t *alloc_utf16_from_8(const char *in8, size_t add)
249 {
250         size_t bsize = count_utf_16_from_8(in8);
251         wchar_t *out16 = NULL;
252         if (!bsize) return NULL;
253         out16 = (wchar_t *) malloc(sizeof(wchar_t) * (bsize + add));
254         conv_utf_8_to_16(in8, out16, bsize);
255         return out16;
256 }