blob: fbce5770d644cda2bb60162ed548dc4ab4b7db5a [file] [log] [blame]
Bram Moolenaarab79bcb2004-07-18 21:34:53 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9/*
10 * os_mac_conv.c: Code specifically for Mac string conversions.
11 *
12 * This code has been put in a separate file to avoid the conflicts that are
13 * caused by including both the X11 and Carbon header files.
14 */
15
16#define NO_X11_INCLUDES
17#include "vim.h"
18
Bram Moolenaar5eb86f92004-07-26 12:53:41 +000019#ifdef FEAT_MBYTE
Bram Moolenaar26a60b42005-02-22 08:49:11 +000020static char_u *mac_utf16_to_utf8 __ARGS((UniChar *from, size_t fromLen, size_t *actualLen));
21static UniChar *mac_utf8_to_utf16 __ARGS((char_u *from, size_t fromLen, size_t *actualLen));
22
23/* Converter for composing decomposed HFS+ file paths */
24static TECObjectRef gPathConverter;
25/* Converter used by mac_utf16_to_utf8 */
26static TECObjectRef gUTF16ToUTF8Converter;
27
Bram Moolenaarab79bcb2004-07-18 21:34:53 +000028/*
29 * A Mac version of string_convert_ext() for special cases.
30 */
31 char_u *
32mac_string_convert(ptr, len, lenp, fail_on_error, from_enc, to_enc, unconvlenp)
33 char_u *ptr;
34 int len;
35 int *lenp;
36 int fail_on_error;
37 int from_enc;
38 int to_enc;
39 int *unconvlenp;
40{
41 char_u *retval, *d;
42 CFStringRef cfstr;
43 int buflen, in, out, l, i;
44 CFStringEncoding from;
45 CFStringEncoding to;
46
47 switch (from_enc)
48 {
49 case 'l': from = kCFStringEncodingISOLatin1; break;
50 case 'm': from = kCFStringEncodingMacRoman; break;
51 case 'u': from = kCFStringEncodingUTF8; break;
52 default: return NULL;
53 }
54 switch (to_enc)
55 {
56 case 'l': to = kCFStringEncodingISOLatin1; break;
57 case 'm': to = kCFStringEncodingMacRoman; break;
58 case 'u': to = kCFStringEncodingUTF8; break;
59 default: return NULL;
60 }
61
62 if (unconvlenp != NULL)
63 *unconvlenp = 0;
64 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0);
65
Bram Moolenaar26a60b42005-02-22 08:49:11 +000066 if(cfstr == NULL)
67 fprintf(stderr, "Encoding failed\n");
Bram Moolenaarab79bcb2004-07-18 21:34:53 +000068 /* When conversion failed, try excluding bytes from the end, helps when
69 * there is an incomplete byte sequence. Only do up to 6 bytes to avoid
70 * looping a long time when there really is something unconvertable. */
71 while (cfstr == NULL && unconvlenp != NULL && len > 1 && *unconvlenp < 6)
72 {
73 --len;
74 ++*unconvlenp;
75 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0);
76 }
77 if (cfstr == NULL)
78 return NULL;
Bram Moolenaar26a60b42005-02-22 08:49:11 +000079
Bram Moolenaarab79bcb2004-07-18 21:34:53 +000080 if (to == kCFStringEncodingUTF8)
81 buflen = len * 6 + 1;
82 else
83 buflen = len + 1;
84 retval = alloc(buflen);
85 if (retval == NULL)
86 {
87 CFRelease(cfstr);
88 return NULL;
89 }
Bram Moolenaar26a60b42005-02-22 08:49:11 +000090
91#if 0
92 CFRange convertRange = CFRangeMake(0, CFStringGetLength(cfstr));
93 /* Determine output buffer size */
94 CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, NULL, 0, (CFIndex *)&buflen);
95 retval = (buflen > 0) ? alloc(buflen) : NULL;
96 if (retval == NULL) {
97 CFRelease(cfstr);
98 return NULL;
99 }
100
101 if (lenp)
102 *lenp = buflen / sizeof(char_u);
103
104 if (!CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, retval, buflen, NULL))
105#endif
Bram Moolenaarda2303d2005-08-30 21:55:26 +0000106 if (!CFStringGetCString(cfstr, (char *)retval, buflen, to))
Bram Moolenaarab79bcb2004-07-18 21:34:53 +0000107 {
108 CFRelease(cfstr);
109 if (fail_on_error)
110 {
111 vim_free(retval);
112 return NULL;
113 }
114
Bram Moolenaar26a60b42005-02-22 08:49:11 +0000115 fprintf(stderr, "Trying char-by-char conversion...\n");
Bram Moolenaarab79bcb2004-07-18 21:34:53 +0000116 /* conversion failed for the whole string, but maybe it will work
117 * for each character */
118 for (d = retval, in = 0, out = 0; in < len && out < buflen - 1;)
119 {
120 if (from == kCFStringEncodingUTF8)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000121 l = utf_ptr2len(ptr + in);
Bram Moolenaarab79bcb2004-07-18 21:34:53 +0000122 else
123 l = 1;
124 cfstr = CFStringCreateWithBytes(NULL, ptr + in, l, from, 0);
125 if (cfstr == NULL)
126 {
127 *d++ = '?';
128 out++;
129 }
130 else
131 {
Bram Moolenaarda2303d2005-08-30 21:55:26 +0000132 if (!CFStringGetCString(cfstr, (char *)d, buflen - out, to))
Bram Moolenaarab79bcb2004-07-18 21:34:53 +0000133 {
134 *d++ = '?';
135 out++;
136 }
137 else
138 {
Bram Moolenaarda2303d2005-08-30 21:55:26 +0000139 i = STRLEN(d);
Bram Moolenaarab79bcb2004-07-18 21:34:53 +0000140 d += i;
141 out += i;
142 }
143 CFRelease(cfstr);
144 }
145 in += l;
146 }
147 *d = NUL;
148 if (lenp != NULL)
149 *lenp = out;
150 return retval;
151 }
152 CFRelease(cfstr);
153 if (lenp != NULL)
Bram Moolenaarda2303d2005-08-30 21:55:26 +0000154 *lenp = STRLEN(retval);
Bram Moolenaar26a60b42005-02-22 08:49:11 +0000155
Bram Moolenaarab79bcb2004-07-18 21:34:53 +0000156 return retval;
157}
158
159/*
160 * Conversion from Apple MacRoman char encoding to UTF-8 or latin1, using
161 * standard Carbon framework.
162 * Input: "ptr[*sizep]".
163 * "real_size" is the size of the buffer that "ptr" points to.
164 * output is in-place, "sizep" is adjusted.
165 * Returns OK or FAIL.
166 */
167 int
168macroman2enc(ptr, sizep, real_size)
169 char_u *ptr;
170 long *sizep;
171 long real_size;
172{
173 CFStringRef cfstr;
174 CFRange r;
175 CFIndex len = *sizep;
176
177 /* MacRoman is an 8-bit encoding, no need to move bytes to
178 * conv_rest[]. */
179 cfstr = CFStringCreateWithBytes(NULL, ptr, len,
180 kCFStringEncodingMacRoman, 0);
181 /*
182 * If there is a conversion error, try using another
183 * conversion.
184 */
185 if (cfstr == NULL)
186 return FAIL;
187
188 r.location = 0;
189 r.length = CFStringGetLength(cfstr);
190 if (r.length != CFStringGetBytes(cfstr, r,
191 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
192 0, /* no lossy conversion */
193 0, /* not external representation */
194 ptr + *sizep, real_size - *sizep, &len))
195 {
196 CFRelease(cfstr);
197 return FAIL;
198 }
199 CFRelease(cfstr);
200 mch_memmove(ptr, ptr + *sizep, len);
201 *sizep = len;
202
203 return OK;
204}
205
206/*
207 * Conversion from UTF-8 or latin1 to MacRoman.
208 * Input: "from[fromlen]"
209 * Output: "to[maxtolen]" length in "*tolenp"
210 * Unconverted rest in rest[*restlenp].
211 * Returns OK or FAIL.
212 */
213 int
214enc2macroman(from, fromlen, to, tolenp, maxtolen, rest, restlenp)
215 char_u *from;
216 size_t fromlen;
217 char_u *to;
218 int *tolenp;
219 int maxtolen;
220 char_u *rest;
221 int *restlenp;
222{
223 CFStringRef cfstr;
224 CFRange r;
225 CFIndex l;
226
227 *restlenp = 0;
228 cfstr = CFStringCreateWithBytes(NULL, from, fromlen,
229 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
230 0);
231 while (cfstr == NULL && *restlenp < 3 && fromlen > 1)
232 {
233 rest[*restlenp++] = from[--fromlen];
234 cfstr = CFStringCreateWithBytes(NULL, from, fromlen,
235 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
236 0);
237 }
238 if (cfstr == NULL)
239 return FAIL;
240
241 r.location = 0;
242 r.length = CFStringGetLength(cfstr);
243 if (r.length != CFStringGetBytes(cfstr, r,
244 kCFStringEncodingMacRoman,
245 0, /* no lossy conversion */
246 0, /* not external representation (since vim
247 * handles this internally */
248 to, maxtolen, &l))
249 {
250 CFRelease(cfstr);
251 return FAIL;
252 }
253 CFRelease(cfstr);
254 *tolenp = l;
255 return OK;
256}
Bram Moolenaar5eb86f92004-07-26 12:53:41 +0000257
Bram Moolenaar26a60b42005-02-22 08:49:11 +0000258/*
259 * Initializes text converters
260 */
261 void
262mac_conv_init()
263{
264 TextEncoding utf8_encoding;
265 TextEncoding utf8_hfsplus_encoding;
266 TextEncoding utf8_canon_encoding;
267 TextEncoding utf16_encoding;
268
269 utf8_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
270 kTextEncodingDefaultVariant, kUnicodeUTF8Format);
271 utf8_hfsplus_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
272 kUnicodeHFSPlusCompVariant, kUnicodeUTF8Format);
273 utf8_canon_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
274 kUnicodeCanonicalCompVariant, kUnicodeUTF8Format);
275 utf16_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
276 kTextEncodingDefaultVariant, kUnicode16BitFormat);
277
278 if (TECCreateConverter(&gPathConverter, utf8_encoding,
279 utf8_hfsplus_encoding) != noErr)
280 gPathConverter = NULL;
281
282 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding,
283 utf8_canon_encoding) != noErr)
Bram Moolenaar19a09a12005-03-04 23:39:37 +0000284 {
285 /* On pre-10.3, Unicode normalization is not available so
286 * fall back to non-normalizing converter */
287 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding,
288 utf8_encoding) != noErr)
289 gUTF16ToUTF8Converter = NULL;
290 }
Bram Moolenaar26a60b42005-02-22 08:49:11 +0000291}
292
293/*
294 * Destroys text converters
295 */
296 void
297mac_conv_cleanup()
298{
299 if (gUTF16ToUTF8Converter)
300 {
301 TECDisposeConverter(gUTF16ToUTF8Converter);
302 gUTF16ToUTF8Converter = NULL;
303 }
304
305 if (gPathConverter)
306 {
307 TECDisposeConverter(gPathConverter);
308 gPathConverter = NULL;
309 }
310}
311
312/*
313 * Conversion from UTF-16 UniChars to 'encoding'
314 */
315 char_u *
316mac_utf16_to_enc(from, fromLen, actualLen)
317 UniChar *from;
318 size_t fromLen;
319 size_t *actualLen;
320{
321 /* Following code borrows somewhat from os_mswin.c */
322 vimconv_T conv;
323 size_t utf8_len;
324 char_u *utf8_str;
325 char_u *result = NULL;
326
327 /* Convert to utf-8 first, works better with iconv */
328 utf8_len = 0;
329 utf8_str = mac_utf16_to_utf8(from, fromLen, &utf8_len);
330
331 if (utf8_str)
332 {
333 /* We might be called before we have p_enc set up. */
334 conv.vc_type = CONV_NONE;
335
336 /* If encoding (p_enc) is any unicode, it is actually in utf-8 (vim
337 * internal unicode is always utf-8) so don't convert in such cases */
338
339 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0)
340 convert_setup(&conv, (char_u *)"utf-8",
341 p_enc? p_enc: (char_u *)"macroman");
342 if (conv.vc_type == CONV_NONE)
343 {
344 /* p_enc is utf-8, so we're done. */
345 result = utf8_str;
346 }
347 else
348 {
349 result = string_convert(&conv, utf8_str, (int *)&utf8_len);
350 vim_free(utf8_str);
351 }
352
353 convert_setup(&conv, NULL, NULL);
354
355 if (actualLen)
356 *actualLen = utf8_len;
357 }
358 else if (actualLen)
359 *actualLen = 0;
360
361 return result;
362}
363
364/*
365 * Conversion from 'encoding' to UTF-16 UniChars
366 */
367 UniChar *
368mac_enc_to_utf16(from, fromLen, actualLen)
369 char_u *from;
370 size_t fromLen;
371 size_t *actualLen;
372{
373 /* Following code borrows somewhat from os_mswin.c */
374 vimconv_T conv;
375 size_t utf8_len;
376 char_u *utf8_str;
377 UniChar *result = NULL;
378 Boolean should_free_utf8 = FALSE;
379
380 do
381 {
382 /* Use MacRoman by default, we might be called before we have p_enc
383 * set up. Convert to utf-8 first, works better with iconv(). Does
384 * nothing if 'encoding' is "utf-8". */
385 conv.vc_type = CONV_NONE;
386 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0 &&
387 convert_setup(&conv, p_enc ? p_enc : (char_u *)"macroman",
388 (char_u *)"utf-8") == FAIL)
389 break;
390
391 if (conv.vc_type != CONV_NONE)
392 {
393 utf8_len = fromLen;
394 utf8_str = string_convert(&conv, from, (int *)&utf8_len);
395 should_free_utf8 = TRUE;
396 }
397 else
398 {
399 utf8_str = from;
400 utf8_len = fromLen;
401 }
402
403 if (utf8_str == NULL)
404 break;
405
406 convert_setup(&conv, NULL, NULL);
407
408 result = mac_utf8_to_utf16(utf8_str, utf8_len, actualLen);
409
410 if (should_free_utf8)
411 vim_free(utf8_str);
412 return result;
413 }
414 while (0);
415
416 if (actualLen)
417 *actualLen = 0;
418
419 return result;
420}
421
422/*
423 * Converts from UTF-16 UniChars to CFString
424 */
425 CFStringRef
426mac_enc_to_cfstring(from, fromLen)
427 char_u *from;
428 size_t fromLen;
429{
430 UniChar *utf16_str;
431 size_t utf16_len;
432 CFStringRef result = NULL;
433
434 utf16_str = mac_enc_to_utf16(from, fromLen, &utf16_len);
435 if (utf16_str)
436 {
437 result = CFStringCreateWithCharacters(NULL, utf16_str, utf16_len/sizeof(UniChar));
438 vim_free(utf16_str);
439 }
440
441 return result;
442}
443
444/*
445 * Converts a decomposed HFS+ UTF-8 path to precomposed UTF-8
446 */
447 char_u *
448mac_precompose_path(decompPath, decompLen, precompLen)
449 char_u *decompPath;
450 size_t decompLen;
451 size_t *precompLen;
452{
453 char_u *result = NULL;
454 size_t actualLen = 0;
455
456 if (gPathConverter)
457 {
458 result = alloc(decompLen);
459 if (result)
460 {
461 if (TECConvertText(gPathConverter, decompPath,
462 decompLen, &decompLen, result,
463 decompLen, &actualLen) != noErr)
464 {
465 vim_free(result);
466 result = NULL;
467 }
468 }
469 }
470
471 if (precompLen)
472 *precompLen = actualLen;
473
474 return result;
475}
476
477/*
478 * Converts from UTF-16 UniChars to precomposed UTF-8
479 */
480 char_u *
481mac_utf16_to_utf8(from, fromLen, actualLen)
482 UniChar *from;
483 size_t fromLen;
484 size_t *actualLen;
485{
486 ByteCount utf8_len;
487 ByteCount inputRead;
488 char_u *result;
489
490 if (gUTF16ToUTF8Converter)
491 {
492 result = alloc(fromLen * 6 + 1);
493 if (result && TECConvertText(gUTF16ToUTF8Converter, (ConstTextPtr)from,
494 fromLen, &inputRead, result,
495 (fromLen*6+1)*sizeof(char_u), &utf8_len) == noErr)
496 {
497 TECFlushText(gUTF16ToUTF8Converter, result, (fromLen*6+1)*sizeof(char_u), &inputRead);
498 utf8_len += inputRead;
499 }
500 else
501 {
502 vim_free(result);
503 result = NULL;
504 }
505 }
506 else
507 {
508 result = NULL;
509 }
510
511 if (actualLen)
512 *actualLen = result ? utf8_len : 0;
513
514 return result;
515}
516
517/*
518 * Converts from UTF-8 to UTF-16 UniChars
519 */
520 UniChar *
521mac_utf8_to_utf16(from, fromLen, actualLen)
522 char_u *from;
523 size_t fromLen;
524 size_t *actualLen;
525{
526 CFStringRef utf8_str;
527 CFRange convertRange;
528 UniChar *result = NULL;
529
530 utf8_str = CFStringCreateWithBytes(NULL, from, fromLen,
531 kCFStringEncodingUTF8, FALSE);
532
533 if (utf8_str == NULL) {
534 if (actualLen)
535 *actualLen = 0;
536 return NULL;
537 }
538
539 convertRange = CFRangeMake(0, CFStringGetLength(utf8_str));
540 result = (UniChar *)alloc(convertRange.length * sizeof(UniChar));
541
542 CFStringGetCharacters(utf8_str, convertRange, result);
543
544 CFRelease(utf8_str);
545
546 if (actualLen)
547 *actualLen = convertRange.length * sizeof(UniChar);
548
549 return result;
550}
Bram Moolenaar5eb86f92004-07-26 12:53:41 +0000551#endif /* FEAT_MBYTE */