blob: 56d8953c17e71d8635fb2b9d1d65cec1b13aefc4 [file] [log] [blame]
Bram Moolenaarab79bcb2004-07-18 21:34:53 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9/*
10 * os_mac_conv.c: Code specifically for Mac string conversions.
11 *
12 * This code has been put in a separate file to avoid the conflicts that are
13 * caused by including both the X11 and Carbon header files.
14 */
15
16#define NO_X11_INCLUDES
17#include "vim.h"
18
Bram Moolenaar7d47b6e2006-03-15 22:59:18 +000019#if defined(MACOS_CONVERT) || defined(PROTO)
20# ifdef PROTO
21/* A few dummy types to be able to generate function prototypes. */
22typedef int UniChar;
23typedef int *TECObjectRef;
24typedef int CFStringRef;
25# endif
26
Bram Moolenaar26a60b42005-02-22 08:49:11 +000027static char_u *mac_utf16_to_utf8 __ARGS((UniChar *from, size_t fromLen, size_t *actualLen));
28static UniChar *mac_utf8_to_utf16 __ARGS((char_u *from, size_t fromLen, size_t *actualLen));
29
30/* Converter for composing decomposed HFS+ file paths */
31static TECObjectRef gPathConverter;
32/* Converter used by mac_utf16_to_utf8 */
33static TECObjectRef gUTF16ToUTF8Converter;
34
Bram Moolenaarab79bcb2004-07-18 21:34:53 +000035/*
36 * A Mac version of string_convert_ext() for special cases.
37 */
38 char_u *
39mac_string_convert(ptr, len, lenp, fail_on_error, from_enc, to_enc, unconvlenp)
40 char_u *ptr;
41 int len;
42 int *lenp;
43 int fail_on_error;
44 int from_enc;
45 int to_enc;
46 int *unconvlenp;
47{
48 char_u *retval, *d;
49 CFStringRef cfstr;
50 int buflen, in, out, l, i;
51 CFStringEncoding from;
52 CFStringEncoding to;
53
54 switch (from_enc)
55 {
56 case 'l': from = kCFStringEncodingISOLatin1; break;
57 case 'm': from = kCFStringEncodingMacRoman; break;
58 case 'u': from = kCFStringEncodingUTF8; break;
59 default: return NULL;
60 }
61 switch (to_enc)
62 {
63 case 'l': to = kCFStringEncodingISOLatin1; break;
64 case 'm': to = kCFStringEncodingMacRoman; break;
65 case 'u': to = kCFStringEncodingUTF8; break;
66 default: return NULL;
67 }
68
69 if (unconvlenp != NULL)
70 *unconvlenp = 0;
71 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0);
72
Bram Moolenaar26a60b42005-02-22 08:49:11 +000073 if(cfstr == NULL)
74 fprintf(stderr, "Encoding failed\n");
Bram Moolenaarab79bcb2004-07-18 21:34:53 +000075 /* When conversion failed, try excluding bytes from the end, helps when
76 * there is an incomplete byte sequence. Only do up to 6 bytes to avoid
Bram Moolenaar720c7102007-05-10 18:07:50 +000077 * looping a long time when there really is something unconvertible. */
Bram Moolenaarab79bcb2004-07-18 21:34:53 +000078 while (cfstr == NULL && unconvlenp != NULL && len > 1 && *unconvlenp < 6)
79 {
80 --len;
81 ++*unconvlenp;
82 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0);
83 }
84 if (cfstr == NULL)
85 return NULL;
Bram Moolenaar26a60b42005-02-22 08:49:11 +000086
Bram Moolenaarab79bcb2004-07-18 21:34:53 +000087 if (to == kCFStringEncodingUTF8)
88 buflen = len * 6 + 1;
89 else
90 buflen = len + 1;
91 retval = alloc(buflen);
92 if (retval == NULL)
93 {
94 CFRelease(cfstr);
95 return NULL;
96 }
Bram Moolenaar26a60b42005-02-22 08:49:11 +000097
98#if 0
99 CFRange convertRange = CFRangeMake(0, CFStringGetLength(cfstr));
100 /* Determine output buffer size */
101 CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, NULL, 0, (CFIndex *)&buflen);
102 retval = (buflen > 0) ? alloc(buflen) : NULL;
103 if (retval == NULL) {
104 CFRelease(cfstr);
105 return NULL;
106 }
107
108 if (lenp)
109 *lenp = buflen / sizeof(char_u);
110
111 if (!CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, retval, buflen, NULL))
112#endif
Bram Moolenaarda2303d2005-08-30 21:55:26 +0000113 if (!CFStringGetCString(cfstr, (char *)retval, buflen, to))
Bram Moolenaarab79bcb2004-07-18 21:34:53 +0000114 {
115 CFRelease(cfstr);
116 if (fail_on_error)
117 {
118 vim_free(retval);
119 return NULL;
120 }
121
Bram Moolenaar26a60b42005-02-22 08:49:11 +0000122 fprintf(stderr, "Trying char-by-char conversion...\n");
Bram Moolenaarab79bcb2004-07-18 21:34:53 +0000123 /* conversion failed for the whole string, but maybe it will work
124 * for each character */
125 for (d = retval, in = 0, out = 0; in < len && out < buflen - 1;)
126 {
127 if (from == kCFStringEncodingUTF8)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000128 l = utf_ptr2len(ptr + in);
Bram Moolenaarab79bcb2004-07-18 21:34:53 +0000129 else
130 l = 1;
131 cfstr = CFStringCreateWithBytes(NULL, ptr + in, l, from, 0);
132 if (cfstr == NULL)
133 {
134 *d++ = '?';
135 out++;
136 }
137 else
138 {
Bram Moolenaarda2303d2005-08-30 21:55:26 +0000139 if (!CFStringGetCString(cfstr, (char *)d, buflen - out, to))
Bram Moolenaarab79bcb2004-07-18 21:34:53 +0000140 {
141 *d++ = '?';
142 out++;
143 }
144 else
145 {
Bram Moolenaarda2303d2005-08-30 21:55:26 +0000146 i = STRLEN(d);
Bram Moolenaarab79bcb2004-07-18 21:34:53 +0000147 d += i;
148 out += i;
149 }
150 CFRelease(cfstr);
151 }
152 in += l;
153 }
154 *d = NUL;
155 if (lenp != NULL)
156 *lenp = out;
157 return retval;
158 }
159 CFRelease(cfstr);
160 if (lenp != NULL)
Bram Moolenaarda2303d2005-08-30 21:55:26 +0000161 *lenp = STRLEN(retval);
Bram Moolenaar26a60b42005-02-22 08:49:11 +0000162
Bram Moolenaarab79bcb2004-07-18 21:34:53 +0000163 return retval;
164}
165
166/*
167 * Conversion from Apple MacRoman char encoding to UTF-8 or latin1, using
168 * standard Carbon framework.
169 * Input: "ptr[*sizep]".
170 * "real_size" is the size of the buffer that "ptr" points to.
171 * output is in-place, "sizep" is adjusted.
172 * Returns OK or FAIL.
173 */
174 int
175macroman2enc(ptr, sizep, real_size)
176 char_u *ptr;
177 long *sizep;
178 long real_size;
179{
180 CFStringRef cfstr;
181 CFRange r;
182 CFIndex len = *sizep;
183
184 /* MacRoman is an 8-bit encoding, no need to move bytes to
185 * conv_rest[]. */
186 cfstr = CFStringCreateWithBytes(NULL, ptr, len,
187 kCFStringEncodingMacRoman, 0);
188 /*
189 * If there is a conversion error, try using another
190 * conversion.
191 */
192 if (cfstr == NULL)
193 return FAIL;
194
195 r.location = 0;
196 r.length = CFStringGetLength(cfstr);
197 if (r.length != CFStringGetBytes(cfstr, r,
198 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
199 0, /* no lossy conversion */
200 0, /* not external representation */
201 ptr + *sizep, real_size - *sizep, &len))
202 {
203 CFRelease(cfstr);
204 return FAIL;
205 }
206 CFRelease(cfstr);
207 mch_memmove(ptr, ptr + *sizep, len);
208 *sizep = len;
209
210 return OK;
211}
212
213/*
214 * Conversion from UTF-8 or latin1 to MacRoman.
215 * Input: "from[fromlen]"
216 * Output: "to[maxtolen]" length in "*tolenp"
217 * Unconverted rest in rest[*restlenp].
218 * Returns OK or FAIL.
219 */
220 int
221enc2macroman(from, fromlen, to, tolenp, maxtolen, rest, restlenp)
222 char_u *from;
223 size_t fromlen;
224 char_u *to;
225 int *tolenp;
226 int maxtolen;
227 char_u *rest;
228 int *restlenp;
229{
230 CFStringRef cfstr;
231 CFRange r;
232 CFIndex l;
233
234 *restlenp = 0;
235 cfstr = CFStringCreateWithBytes(NULL, from, fromlen,
236 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
237 0);
238 while (cfstr == NULL && *restlenp < 3 && fromlen > 1)
239 {
240 rest[*restlenp++] = from[--fromlen];
241 cfstr = CFStringCreateWithBytes(NULL, from, fromlen,
242 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
243 0);
244 }
245 if (cfstr == NULL)
246 return FAIL;
247
248 r.location = 0;
249 r.length = CFStringGetLength(cfstr);
250 if (r.length != CFStringGetBytes(cfstr, r,
251 kCFStringEncodingMacRoman,
252 0, /* no lossy conversion */
253 0, /* not external representation (since vim
254 * handles this internally */
255 to, maxtolen, &l))
256 {
257 CFRelease(cfstr);
258 return FAIL;
259 }
260 CFRelease(cfstr);
261 *tolenp = l;
262 return OK;
263}
Bram Moolenaar5eb86f92004-07-26 12:53:41 +0000264
Bram Moolenaar26a60b42005-02-22 08:49:11 +0000265/*
266 * Initializes text converters
267 */
268 void
269mac_conv_init()
270{
271 TextEncoding utf8_encoding;
272 TextEncoding utf8_hfsplus_encoding;
273 TextEncoding utf8_canon_encoding;
274 TextEncoding utf16_encoding;
275
276 utf8_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
277 kTextEncodingDefaultVariant, kUnicodeUTF8Format);
278 utf8_hfsplus_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
279 kUnicodeHFSPlusCompVariant, kUnicodeUTF8Format);
280 utf8_canon_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
281 kUnicodeCanonicalCompVariant, kUnicodeUTF8Format);
282 utf16_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
283 kTextEncodingDefaultVariant, kUnicode16BitFormat);
284
285 if (TECCreateConverter(&gPathConverter, utf8_encoding,
286 utf8_hfsplus_encoding) != noErr)
287 gPathConverter = NULL;
288
289 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding,
290 utf8_canon_encoding) != noErr)
Bram Moolenaar19a09a12005-03-04 23:39:37 +0000291 {
292 /* On pre-10.3, Unicode normalization is not available so
293 * fall back to non-normalizing converter */
294 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding,
295 utf8_encoding) != noErr)
296 gUTF16ToUTF8Converter = NULL;
297 }
Bram Moolenaar26a60b42005-02-22 08:49:11 +0000298}
299
300/*
301 * Destroys text converters
302 */
303 void
304mac_conv_cleanup()
305{
306 if (gUTF16ToUTF8Converter)
307 {
308 TECDisposeConverter(gUTF16ToUTF8Converter);
309 gUTF16ToUTF8Converter = NULL;
310 }
311
312 if (gPathConverter)
313 {
314 TECDisposeConverter(gPathConverter);
315 gPathConverter = NULL;
316 }
317}
318
319/*
320 * Conversion from UTF-16 UniChars to 'encoding'
321 */
322 char_u *
323mac_utf16_to_enc(from, fromLen, actualLen)
324 UniChar *from;
325 size_t fromLen;
326 size_t *actualLen;
327{
328 /* Following code borrows somewhat from os_mswin.c */
329 vimconv_T conv;
330 size_t utf8_len;
331 char_u *utf8_str;
332 char_u *result = NULL;
333
334 /* Convert to utf-8 first, works better with iconv */
335 utf8_len = 0;
336 utf8_str = mac_utf16_to_utf8(from, fromLen, &utf8_len);
337
338 if (utf8_str)
339 {
340 /* We might be called before we have p_enc set up. */
341 conv.vc_type = CONV_NONE;
342
343 /* If encoding (p_enc) is any unicode, it is actually in utf-8 (vim
344 * internal unicode is always utf-8) so don't convert in such cases */
345
346 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0)
347 convert_setup(&conv, (char_u *)"utf-8",
348 p_enc? p_enc: (char_u *)"macroman");
349 if (conv.vc_type == CONV_NONE)
350 {
351 /* p_enc is utf-8, so we're done. */
352 result = utf8_str;
353 }
354 else
355 {
356 result = string_convert(&conv, utf8_str, (int *)&utf8_len);
357 vim_free(utf8_str);
358 }
359
360 convert_setup(&conv, NULL, NULL);
361
362 if (actualLen)
363 *actualLen = utf8_len;
364 }
365 else if (actualLen)
366 *actualLen = 0;
367
368 return result;
369}
370
371/*
372 * Conversion from 'encoding' to UTF-16 UniChars
373 */
374 UniChar *
375mac_enc_to_utf16(from, fromLen, actualLen)
376 char_u *from;
377 size_t fromLen;
378 size_t *actualLen;
379{
380 /* Following code borrows somewhat from os_mswin.c */
381 vimconv_T conv;
382 size_t utf8_len;
383 char_u *utf8_str;
384 UniChar *result = NULL;
385 Boolean should_free_utf8 = FALSE;
386
387 do
388 {
389 /* Use MacRoman by default, we might be called before we have p_enc
390 * set up. Convert to utf-8 first, works better with iconv(). Does
391 * nothing if 'encoding' is "utf-8". */
392 conv.vc_type = CONV_NONE;
393 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0 &&
394 convert_setup(&conv, p_enc ? p_enc : (char_u *)"macroman",
395 (char_u *)"utf-8") == FAIL)
396 break;
397
398 if (conv.vc_type != CONV_NONE)
399 {
400 utf8_len = fromLen;
401 utf8_str = string_convert(&conv, from, (int *)&utf8_len);
402 should_free_utf8 = TRUE;
403 }
404 else
405 {
406 utf8_str = from;
407 utf8_len = fromLen;
408 }
409
410 if (utf8_str == NULL)
411 break;
412
413 convert_setup(&conv, NULL, NULL);
414
415 result = mac_utf8_to_utf16(utf8_str, utf8_len, actualLen);
416
417 if (should_free_utf8)
418 vim_free(utf8_str);
419 return result;
420 }
421 while (0);
422
423 if (actualLen)
424 *actualLen = 0;
425
426 return result;
427}
428
429/*
430 * Converts from UTF-16 UniChars to CFString
431 */
432 CFStringRef
433mac_enc_to_cfstring(from, fromLen)
434 char_u *from;
435 size_t fromLen;
436{
437 UniChar *utf16_str;
438 size_t utf16_len;
439 CFStringRef result = NULL;
440
441 utf16_str = mac_enc_to_utf16(from, fromLen, &utf16_len);
442 if (utf16_str)
443 {
444 result = CFStringCreateWithCharacters(NULL, utf16_str, utf16_len/sizeof(UniChar));
445 vim_free(utf16_str);
446 }
447
448 return result;
449}
450
451/*
452 * Converts a decomposed HFS+ UTF-8 path to precomposed UTF-8
453 */
454 char_u *
455mac_precompose_path(decompPath, decompLen, precompLen)
456 char_u *decompPath;
457 size_t decompLen;
458 size_t *precompLen;
459{
460 char_u *result = NULL;
461 size_t actualLen = 0;
462
463 if (gPathConverter)
464 {
465 result = alloc(decompLen);
466 if (result)
467 {
468 if (TECConvertText(gPathConverter, decompPath,
469 decompLen, &decompLen, result,
470 decompLen, &actualLen) != noErr)
471 {
472 vim_free(result);
473 result = NULL;
474 }
475 }
476 }
477
478 if (precompLen)
479 *precompLen = actualLen;
480
481 return result;
482}
483
484/*
485 * Converts from UTF-16 UniChars to precomposed UTF-8
486 */
Bram Moolenaar7d47b6e2006-03-15 22:59:18 +0000487 static char_u *
Bram Moolenaar26a60b42005-02-22 08:49:11 +0000488mac_utf16_to_utf8(from, fromLen, actualLen)
489 UniChar *from;
490 size_t fromLen;
491 size_t *actualLen;
492{
493 ByteCount utf8_len;
494 ByteCount inputRead;
495 char_u *result;
496
497 if (gUTF16ToUTF8Converter)
498 {
499 result = alloc(fromLen * 6 + 1);
500 if (result && TECConvertText(gUTF16ToUTF8Converter, (ConstTextPtr)from,
501 fromLen, &inputRead, result,
502 (fromLen*6+1)*sizeof(char_u), &utf8_len) == noErr)
503 {
504 TECFlushText(gUTF16ToUTF8Converter, result, (fromLen*6+1)*sizeof(char_u), &inputRead);
505 utf8_len += inputRead;
506 }
507 else
508 {
509 vim_free(result);
510 result = NULL;
511 }
512 }
513 else
514 {
515 result = NULL;
516 }
517
518 if (actualLen)
519 *actualLen = result ? utf8_len : 0;
520
521 return result;
522}
523
524/*
525 * Converts from UTF-8 to UTF-16 UniChars
526 */
Bram Moolenaar7d47b6e2006-03-15 22:59:18 +0000527 static UniChar *
Bram Moolenaar26a60b42005-02-22 08:49:11 +0000528mac_utf8_to_utf16(from, fromLen, actualLen)
529 char_u *from;
530 size_t fromLen;
531 size_t *actualLen;
532{
533 CFStringRef utf8_str;
534 CFRange convertRange;
535 UniChar *result = NULL;
536
537 utf8_str = CFStringCreateWithBytes(NULL, from, fromLen,
538 kCFStringEncodingUTF8, FALSE);
539
540 if (utf8_str == NULL) {
541 if (actualLen)
542 *actualLen = 0;
543 return NULL;
544 }
545
546 convertRange = CFRangeMake(0, CFStringGetLength(utf8_str));
547 result = (UniChar *)alloc(convertRange.length * sizeof(UniChar));
548
549 CFStringGetCharacters(utf8_str, convertRange, result);
550
551 CFRelease(utf8_str);
552
553 if (actualLen)
554 *actualLen = convertRange.length * sizeof(UniChar);
555
556 return result;
557}
Bram Moolenaar7d47b6e2006-03-15 22:59:18 +0000558#endif /* MACOS_CONVERT */