blob: 6c177a719711cee6b22616d1c294255df48d9776 [file] [log] [blame]
DRC2ff39b82011-07-28 08:38:59 +00001/* "$Id: $"
2 *
3 * Author: Jean-Marc Lienher ( http://oksid.ch )
4 * Copyright 2000-2003 by O'ksi'D.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
19 * USA.
20 *
21 * Please report all bugs and problems on the following page:
22 *
23 * http://www.fltk.org/str.php
24 */
25
26/*
27 * Unicode to UTF-8 conversion functions.
28 */
29
30#if !defined(WIN32) && !defined(__APPLE__)
31
32#include "../../FL/Xutf8.h"
33
34/*** NOTE : all functions are LIMITED to 24 bits Unicode values !!! ***/
35
36/*
37 * Converts the first char of the UTF-8 string to an Unicode value
38 * Returns the byte length of the converted UTF-8 char
39 * Returns -1 if the UTF-8 string is not valid
40 */
41int
42XConvertUtf8ToUcs(const unsigned char *buf,
43 int len,
44 unsigned int *ucs) {
45
46 if (buf[0] & 0x80) {
47 if (buf[0] & 0x40) {
48 if (buf[0] & 0x20) {
49 if (buf[0] & 0x10) {
50 if (buf[0] & 0x08) {
51 if (buf[0] & 0x04) {
52 if (buf[0] & 0x02) {
53 /* bad UTF-8 string */
54 } else {
55 /* 0x04000000 - 0x7FFFFFFF */
56 }
57 } else if (len > 4
58 && (buf[1] & 0xC0) == 0x80
59 && (buf[2] & 0xC0) == 0x80
60 && (buf[3] & 0xC0) == 0x80
61 && (buf[4] & 0xC0) == 0x80) {
62 /* 0x00200000 - 0x03FFFFFF */
63 *ucs = ((buf[0] & ~0xF8) << 24) +
64 ((buf[1] & ~0x80) << 18) +
65 ((buf[2] & ~0x80) << 12) +
66 ((buf[3] & ~0x80) << 6) +
67 (buf[4] & ~0x80);
68 if (*ucs > 0x001FFFFF && *ucs < 0x01000000) return 5;
69 }
70 } else if (len > 3
71 && (buf[1] & 0xC0) == 0x80
72 && (buf[2] & 0xC0) == 0x80
73 && (buf[3] & 0xC0) == 0x80) {
74 /* 0x00010000 - 0x001FFFFF */
75 *ucs = ((buf[0] & ~0xF0) << 18) +
76 ((buf[1] & ~0x80) << 12) +
77 ((buf[2] & ~0x80) << 6) +
78 (buf[3] & ~0x80);
79 if (*ucs > 0x0000FFFF) return 4;
80 }
81 } else if (len > 2
82 && (buf[1] & 0xC0) == 0x80
83 && (buf[2] & 0xC0) == 0x80) {
84 /* 0x00000800 - 0x0000FFFF */
85 *ucs = ((buf[0] & ~0xE0) << 12) +
86 ((buf[1] & ~0x80) << 6) +
87 (buf[2] & ~0x80);
88 if (*ucs > 0x000007FF) return 3;
89 }
90 } else if (len > 1 && (buf[1] & 0xC0) == 0x80) {
91 /* 0x00000080 - 0x000007FF */
92 *ucs = ((buf[0] & ~0xC0) << 6) +
93 (buf[1] & ~0x80);
94 if (*ucs > 0x0000007F) return 2;
95 }
96 }
97 } else if (len > 0) {
98 /* 0x00000000 - 0x0000007F */
99 *ucs = buf[0];
100 return 1;
101 }
102
103 *ucs = (unsigned int) '?'; /* bad utf-8 string */
104 return -1;
105}
106
107/*
108 * Converts an Unicode value to an UTF-8 string
109 * NOTE : the buffer (buf) must be at least 5 bytes long !!!
110 */
111int
112XConvertUcsToUtf8(unsigned int ucs,
113 char *buf) {
114
115 if (ucs < 0x000080) {
116 buf[0] = ucs;
117 return 1;
118 } else if (ucs < 0x000800) {
119 buf[0] = 0xC0 | (ucs >> 6);
120 buf[1] = 0x80 | (ucs & 0x3F);
121 return 2;
122 } else if (ucs < 0x010000) {
123 buf[0] = 0xE0 | (ucs >> 12);
124 buf[1] = 0x80 | ((ucs >> 6) & 0x3F);
125 buf[2] = 0x80 | (ucs & 0x3F);
126 return 3;
127 } else if (ucs < 0x00200000) {
128 buf[0] = 0xF0 | (ucs >> 18);
129 buf[1] = 0x80 | ((ucs >> 12) & 0x3F);
130 buf[2] = 0x80 | ((ucs >> 6) & 0x3F);
131 buf[3] = 0x80 | (ucs & 0x3F);
132 return 4;
133 } else if (ucs < 0x01000000) {
134 buf[0] = 0xF8 | (ucs >> 24);
135 buf[1] = 0x80 | ((ucs >> 18) & 0x3F);
136 buf[2] = 0x80 | ((ucs >> 12) & 0x3F);
137 buf[3] = 0x80 | ((ucs >> 6) & 0x3F);
138 buf[4] = 0x80 | (ucs & 0x3F);
139 return 5;
140 }
141 buf[0] = '?';
142 return -1;
143}
144
145/*
146 * returns the byte length of the first UTF-8 char
147 * (returns -1 if not valid)
148 */
149int
150XUtf8CharByteLen(const unsigned char *buf,
151 int len) {
152 unsigned int ucs;
153 return XConvertUtf8ToUcs(buf, len, &ucs);
154}
155
156/*
157 * returns the quantity of Unicode chars in the UTF-8 string
158 */
159int
160XCountUtf8Char(const unsigned char *buf,
161 int len) {
162
163 int i = 0;
164 int nbc = 0;
165 while (i < len) {
166 int cl = XUtf8CharByteLen(buf + i, len - i);
167 if (cl < 1) cl = 1;
168 nbc++;
169 i += cl;
170 }
171 return nbc;
172}
173
174/*
175 * Same as XConvertUtf8ToUcs but no sanity check is done.
176 */
177int
178XFastConvertUtf8ToUcs(const unsigned char *buf,
179 int len,
180 unsigned int *ucs) {
181
182 if (buf[0] & 0x80) {
183 if (buf[0] & 0x40) {
184 if (buf[0] & 0x20) {
185 if (buf[0] & 0x10) {
186 if (buf[0] & 0x08) {
187 if (buf[0] & 0x04) {
188 if (buf[0] & 0x02) {
189 /* bad UTF-8 string */
190 } else {
191 /* 0x04000000 - 0x7FFFFFFF */
192 }
193 } else if (len > 4) {
194 /* 0x00200000 - 0x03FFFFFF */
195 *ucs = ((buf[0] & ~0xF8) << 24) +
196 ((buf[1] & ~0x80) << 18) +
197 ((buf[2] & ~0x80) << 12) +
198 ((buf[3] & ~0x80) << 6) +
199 (buf[4] & ~0x80);
200 return 5;
201 }
202 } else if (len > 3) {
203 /* 0x00010000 - 0x001FFFFF */
204 *ucs = ((buf[0] & ~0xF0) << 18) +
205 ((buf[1] & ~0x80) << 12) +
206 ((buf[2] & ~0x80) << 6) +
207 (buf[3] & ~0x80);
208 return 4;
209 }
210 } else if (len > 2) {
211 /* 0x00000800 - 0x0000FFFF */
212 *ucs = ((buf[0] & ~0xE0) << 12) +
213 ((buf[1] & ~0x80) << 6) +
214 (buf[2] & ~0x80);
215 return 3;
216 }
217 } else if (len > 1) {
218 /* 0x00000080 - 0x000007FF */
219 *ucs = ((buf[0] & ~0xC0) << 6) +
220 (buf[1] & ~0x80);
221 return 2;
222 }
223 }
224 } else if (len > 0) {
225 /* 0x00000000 - 0x0000007F */
226 *ucs = buf[0];
227 return 1;
228 }
229
230 *ucs = (unsigned int) '?'; /* bad utf-8 string */
231 return -1;
232}
233
234#endif /* X11 only */
235
236/*
237 * End of "$Id: $".
238 */