Warning Fixes - const correctness in unicode encoding, unused variables in blenlib...
[blender.git] / intern / utfconv / utfconv.c
1 /*
2  * ***** BEGIN GPL LICENSE BLOCK *****
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version. 
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software Foundation,
16  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  *
18  * The Original Code is Copyright (C) 2009 Blender Foundation.
19  * All rights reserved.
20  * 
21  * Contributor(s): Alexandr Kuznetsov, Andrea Weikert
22  *
23  * ***** END GPL LICENSE BLOCK *****
24  */
25
26 #include "utfconv.h"
27
28 size_t count_utf_8_from_16(const wchar_t * string16)
29 {
30         int i;
31         size_t count = 0;
32         wchar_t u = 0;
33         if(!string16) return 0;
34
35         for(i=0;u = string16[i];i++)
36         {
37                 if(u < 0x0080) count+=1; else
38                 if(u < 0x0800) count+=2; else
39                 if(u < 0xD800) count+=3; else
40                 if(u < 0xDC00) {
41                         i++;
42                         if((u = string16[i])==0) break;
43                         if(u >= 0xDC00 && u < 0xE000)count+=4;
44                 } else
45                 if(u < 0xE000) /*illigal*/; else                        
46                 count+=3;
47         }
48
49         return ++count;
50 }
51
52
53 size_t count_utf_16_from_8(const char * string8)
54 {
55                 size_t count = 0;
56                 char u;
57                 char type = 0;
58                 unsigned int u32 = 0;
59
60                 if(!string8) return 0;
61
62                 for(;(u = *string8);string8++)
63                 {
64                         if(type==0)
65                         {
66                                 if((u&0x01<<7) == 0)     {count++; u32 = 0; continue;}          //1 utf-8 char
67                                 if((u&0x07<<5) == 0xC0)  {type=1; u32 = u & 0x1F; continue;}    //2 utf-8 char
68                                 if((u&0x0F<<4) == 0xE0)  {type=2; u32 = u & 0x0F; continue;}    //3 utf-8 char
69                                 if((u&0x1F<<3) == 0xF0)  {type=3; u32 = u & 0x07; continue;}    //4 utf-8 char
70                                         continue;
71                         } else 
72                         {
73                                 if((u & 0xC0) == 0x80) {u32=(u32<<6) | (u&0x3F); type--;} else
74                                 {u32 = 0; type = 0;};
75                         }
76                         if(type==0)
77                         {
78                                 if((0 < u32 && u32 < 0xD800) || (0xE000 <= u32 && u32 < 0x10000)) count++; else
79                                 if(0x10000 <= u32 && u32 < 0x110000) count+=2;
80                                 u32 = 0;
81                         }
82
83                 }
84
85         return ++count;
86 }
87
88
89
90
91 int conv_utf_16_to_8(const wchar_t * in16, char * out8, size_t size8)
92 {
93         char * out8end = out8+size8;
94         wchar_t u = 0;
95         int err = 0;
96         if(!size8 || !in16 || !out8) return UTF_ERROR_NULL_IN;
97         out8end--;
98
99         for(; out8 < out8end && (u=*in16); in16++, out8++)
100         {
101                 if(u < 0x0080) *out8 = u; else
102                 if(u < 0x0800) {
103                         if(out8 + 1 >= out8end) break;
104                         *out8++=(0x3<<6) | (0x1F & (u>>6));
105                         *out8  =(0x1<<7) | (0x3F & (u));                
106                 }else
107                 if(u < 0xD800 || u >= 0xE000) {
108                         if(out8 + 2 >= out8end) break;
109                         *out8++=(0x7<<5) | (0xF & (u>>12));
110                         *out8++=(0x1<<7) | (0x3F & (u>>6));;
111                         *out8  =(0x1<<7) | (0x3F & (u));
112                 }else
113                 if(u < 0xDC00) {
114                         wchar_t u2 = *++in16;
115
116                         if(!u2) break;
117                         if(u2 >= 0xDC00 && u2 < 0xE000)
118                         {
119                                 if(out8 + 3 >= out8end) break; else { 
120                                 unsigned int uc = 0x10000 + (u2 - 0xDC00) + ((u - 0xD800)<<10);
121
122                                 *out8++=(0xF<<4) | (0x7 & (uc>>18));
123                                 *out8++=(0x1<<7) | (0x3F & (uc>>12));
124                                 *out8++=(0x1<<7) | (0x3F & (uc>>6));
125                                 *out8  =(0x1<<7) | (0x3F & (uc));
126                                 }
127                         } else {out8--; err|=UTF_ERROR_ILLCHAR;};
128                 } else
129                 if(u < 0xE000) {out8--; err|=UTF_ERROR_ILLCHAR;}
130
131
132         }
133
134         *out8=*out8end=0;
135
136         if(*in16) err|=UTF_ERROR_SMALL;
137
138         return err;
139 }
140
141
142 int conv_utf_8_to_16(const char * in8, wchar_t * out16, size_t size16)
143 {
144         char u;
145         char type = 0;
146         wchar_t u32 = 0;
147         wchar_t * out16end = out16+size16;
148         int err = 0;
149         if(!size16 || !in8 || !out16) return UTF_ERROR_NULL_IN;
150         out16end--;
151
152         for(;out16<out16end && (u = *in8);in8++)
153         {
154                 if(type==0)
155                 {
156                         if((u&0x01<<7) == 0)     {*out16=u; out16++; u32 = 0; continue;}                //1 utf-8 char
157                         if((u&0x07<<5) == 0xC0)  {type=1; u32 = u & 0x1F; continue;}    //2 utf-8 char
158                         if((u&0x0F<<4) == 0xE0)  {type=2; u32 = u & 0x0F; continue;}    //3 utf-8 char
159                         if((u&0x1F<<3) == 0xF0)  {type=3; u32 = u & 0x07; continue;}    //4 utf-8 char
160                         err|=UTF_ERROR_ILLCHAR; continue;
161                 } else 
162                 {
163                         if((u & 0xC0) == 0x80) {u32=(u32<<6) | (u&0x3F); type--;} else
164                         {u32 = 0; type = 0; err|=UTF_ERROR_ILLSEQ;};
165                 }
166                 if(type==0)
167                 {
168                         if((0 < u32 && u32 < 0xD800) || (0xE000 <= u32 && u32 < 0x10000)) {*out16=u32; out16++;}else
169                         if(0x10000 <= u32 && u32 < 0x110000) {
170                                 if(out16 + 1 >= out16end) break;
171                                 u32-=0x10000;
172                                 *out16 = 0xD800 + (u32 >> 10);
173                                 out16++;
174                                 *out16 = 0xDC00 + (u32 & 0x3FF);
175                                 out16++;
176                         };
177                         u32 = 0;
178                 }
179
180         }
181
182         *out16=*out16end=0;
183
184         if(*in8) err|=UTF_ERROR_SMALL;
185
186         return err;
187 }
188
189 int is_ascii(const char * in8)
190 {
191         for(in8; *in8; in8++) 
192                 if(0x80 & *in8) return 0;
193
194         return 1;
195 }
196
197 void utf_8_cut_end(char * inout8, size_t maxcutpoint)
198 {
199         const char * start = inout8;
200         char * cur = inout8 + maxcutpoint;
201         char cc;
202         if(!inout8) return;
203
204         cc = *cur;
205
206
207         
208         
209 }
210
211
212
213 char * alloc_utf_8_from_16(const wchar_t * in16, size_t add)
214 {
215         size_t bsize = count_utf_8_from_16(in16);
216         char * out8 = NULL;
217         if(!bsize) return NULL;
218         out8 = (char*)malloc(sizeof(char) * (bsize + add));
219         conv_utf_16_to_8(in16,out8, bsize);
220         return out8;
221 }
222
223 wchar_t * alloc_utf16_from_8(const char * in8, size_t add)
224 {
225         size_t bsize = count_utf_16_from_8(in8);
226         wchar_t * out16 = NULL;
227         if(!bsize) return NULL;
228         out16 =(wchar_t*) malloc(sizeof(wchar_t) * (bsize + add));
229         conv_utf_8_to_16(in8,out16, bsize);
230         return out16;
231 }
232