blob: 3ede6480db3ae4f0c45af599753159f9559f4004 [file] [log] [blame]
Bram Moolenaarab79bcb2004-07-18 21:34:53 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9/*
10 * os_mac_conv.c: Code specifically for Mac string conversions.
11 *
12 * This code has been put in a separate file to avoid the conflicts that are
13 * caused by including both the X11 and Carbon header files.
14 */
15
16#define NO_X11_INCLUDES
17#include "vim.h"
Bram Moolenaar164fca32010-07-14 13:58:07 +020018#ifndef FEAT_GUI_MAC
19# include <CoreServices/CoreServices.h>
20#endif
21
Bram Moolenaarab79bcb2004-07-18 21:34:53 +000022
Bram Moolenaar7d47b6e2006-03-15 22:59:18 +000023#if defined(MACOS_CONVERT) || defined(PROTO)
Bram Moolenaar164fca32010-07-14 13:58:07 +020024
Bram Moolenaar7d47b6e2006-03-15 22:59:18 +000025# ifdef PROTO
26/* A few dummy types to be able to generate function prototypes. */
27typedef int UniChar;
28typedef int *TECObjectRef;
29typedef int CFStringRef;
30# endif
31
Bram Moolenaar26a60b42005-02-22 08:49:11 +000032static char_u *mac_utf16_to_utf8 __ARGS((UniChar *from, size_t fromLen, size_t *actualLen));
33static UniChar *mac_utf8_to_utf16 __ARGS((char_u *from, size_t fromLen, size_t *actualLen));
34
35/* Converter for composing decomposed HFS+ file paths */
36static TECObjectRef gPathConverter;
37/* Converter used by mac_utf16_to_utf8 */
38static TECObjectRef gUTF16ToUTF8Converter;
39
Bram Moolenaarab79bcb2004-07-18 21:34:53 +000040/*
41 * A Mac version of string_convert_ext() for special cases.
42 */
43 char_u *
44mac_string_convert(ptr, len, lenp, fail_on_error, from_enc, to_enc, unconvlenp)
45 char_u *ptr;
46 int len;
47 int *lenp;
48 int fail_on_error;
49 int from_enc;
50 int to_enc;
51 int *unconvlenp;
52{
53 char_u *retval, *d;
54 CFStringRef cfstr;
55 int buflen, in, out, l, i;
56 CFStringEncoding from;
57 CFStringEncoding to;
58
59 switch (from_enc)
60 {
61 case 'l': from = kCFStringEncodingISOLatin1; break;
62 case 'm': from = kCFStringEncodingMacRoman; break;
63 case 'u': from = kCFStringEncodingUTF8; break;
64 default: return NULL;
65 }
66 switch (to_enc)
67 {
68 case 'l': to = kCFStringEncodingISOLatin1; break;
69 case 'm': to = kCFStringEncodingMacRoman; break;
70 case 'u': to = kCFStringEncodingUTF8; break;
71 default: return NULL;
72 }
73
74 if (unconvlenp != NULL)
75 *unconvlenp = 0;
76 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0);
77
Bram Moolenaar26a60b42005-02-22 08:49:11 +000078 if(cfstr == NULL)
79 fprintf(stderr, "Encoding failed\n");
Bram Moolenaarab79bcb2004-07-18 21:34:53 +000080 /* When conversion failed, try excluding bytes from the end, helps when
81 * there is an incomplete byte sequence. Only do up to 6 bytes to avoid
Bram Moolenaar720c7102007-05-10 18:07:50 +000082 * looping a long time when there really is something unconvertible. */
Bram Moolenaarab79bcb2004-07-18 21:34:53 +000083 while (cfstr == NULL && unconvlenp != NULL && len > 1 && *unconvlenp < 6)
84 {
85 --len;
86 ++*unconvlenp;
87 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0);
88 }
89 if (cfstr == NULL)
90 return NULL;
Bram Moolenaar26a60b42005-02-22 08:49:11 +000091
Bram Moolenaarab79bcb2004-07-18 21:34:53 +000092 if (to == kCFStringEncodingUTF8)
93 buflen = len * 6 + 1;
94 else
95 buflen = len + 1;
96 retval = alloc(buflen);
97 if (retval == NULL)
98 {
99 CFRelease(cfstr);
100 return NULL;
101 }
Bram Moolenaar26a60b42005-02-22 08:49:11 +0000102
103#if 0
104 CFRange convertRange = CFRangeMake(0, CFStringGetLength(cfstr));
105 /* Determine output buffer size */
106 CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, NULL, 0, (CFIndex *)&buflen);
107 retval = (buflen > 0) ? alloc(buflen) : NULL;
108 if (retval == NULL) {
109 CFRelease(cfstr);
110 return NULL;
111 }
112
113 if (lenp)
114 *lenp = buflen / sizeof(char_u);
115
116 if (!CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, retval, buflen, NULL))
117#endif
Bram Moolenaarda2303d2005-08-30 21:55:26 +0000118 if (!CFStringGetCString(cfstr, (char *)retval, buflen, to))
Bram Moolenaarab79bcb2004-07-18 21:34:53 +0000119 {
120 CFRelease(cfstr);
121 if (fail_on_error)
122 {
123 vim_free(retval);
124 return NULL;
125 }
126
Bram Moolenaar26a60b42005-02-22 08:49:11 +0000127 fprintf(stderr, "Trying char-by-char conversion...\n");
Bram Moolenaarab79bcb2004-07-18 21:34:53 +0000128 /* conversion failed for the whole string, but maybe it will work
129 * for each character */
130 for (d = retval, in = 0, out = 0; in < len && out < buflen - 1;)
131 {
132 if (from == kCFStringEncodingUTF8)
Bram Moolenaar0fa313a2005-08-10 21:07:57 +0000133 l = utf_ptr2len(ptr + in);
Bram Moolenaarab79bcb2004-07-18 21:34:53 +0000134 else
135 l = 1;
136 cfstr = CFStringCreateWithBytes(NULL, ptr + in, l, from, 0);
137 if (cfstr == NULL)
138 {
139 *d++ = '?';
140 out++;
141 }
142 else
143 {
Bram Moolenaarda2303d2005-08-30 21:55:26 +0000144 if (!CFStringGetCString(cfstr, (char *)d, buflen - out, to))
Bram Moolenaarab79bcb2004-07-18 21:34:53 +0000145 {
146 *d++ = '?';
147 out++;
148 }
149 else
150 {
Bram Moolenaarda2303d2005-08-30 21:55:26 +0000151 i = STRLEN(d);
Bram Moolenaarab79bcb2004-07-18 21:34:53 +0000152 d += i;
153 out += i;
154 }
155 CFRelease(cfstr);
156 }
157 in += l;
158 }
159 *d = NUL;
160 if (lenp != NULL)
161 *lenp = out;
162 return retval;
163 }
164 CFRelease(cfstr);
165 if (lenp != NULL)
Bram Moolenaarda2303d2005-08-30 21:55:26 +0000166 *lenp = STRLEN(retval);
Bram Moolenaar26a60b42005-02-22 08:49:11 +0000167
Bram Moolenaarab79bcb2004-07-18 21:34:53 +0000168 return retval;
169}
170
171/*
172 * Conversion from Apple MacRoman char encoding to UTF-8 or latin1, using
173 * standard Carbon framework.
174 * Input: "ptr[*sizep]".
175 * "real_size" is the size of the buffer that "ptr" points to.
176 * output is in-place, "sizep" is adjusted.
177 * Returns OK or FAIL.
178 */
179 int
180macroman2enc(ptr, sizep, real_size)
181 char_u *ptr;
182 long *sizep;
183 long real_size;
184{
185 CFStringRef cfstr;
186 CFRange r;
187 CFIndex len = *sizep;
188
189 /* MacRoman is an 8-bit encoding, no need to move bytes to
190 * conv_rest[]. */
191 cfstr = CFStringCreateWithBytes(NULL, ptr, len,
192 kCFStringEncodingMacRoman, 0);
193 /*
194 * If there is a conversion error, try using another
195 * conversion.
196 */
197 if (cfstr == NULL)
198 return FAIL;
199
200 r.location = 0;
201 r.length = CFStringGetLength(cfstr);
202 if (r.length != CFStringGetBytes(cfstr, r,
203 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
204 0, /* no lossy conversion */
205 0, /* not external representation */
206 ptr + *sizep, real_size - *sizep, &len))
207 {
208 CFRelease(cfstr);
209 return FAIL;
210 }
211 CFRelease(cfstr);
212 mch_memmove(ptr, ptr + *sizep, len);
213 *sizep = len;
214
215 return OK;
216}
217
218/*
219 * Conversion from UTF-8 or latin1 to MacRoman.
220 * Input: "from[fromlen]"
221 * Output: "to[maxtolen]" length in "*tolenp"
222 * Unconverted rest in rest[*restlenp].
223 * Returns OK or FAIL.
224 */
225 int
226enc2macroman(from, fromlen, to, tolenp, maxtolen, rest, restlenp)
227 char_u *from;
228 size_t fromlen;
229 char_u *to;
230 int *tolenp;
231 int maxtolen;
232 char_u *rest;
233 int *restlenp;
234{
235 CFStringRef cfstr;
236 CFRange r;
237 CFIndex l;
238
239 *restlenp = 0;
240 cfstr = CFStringCreateWithBytes(NULL, from, fromlen,
241 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
242 0);
243 while (cfstr == NULL && *restlenp < 3 && fromlen > 1)
244 {
245 rest[*restlenp++] = from[--fromlen];
246 cfstr = CFStringCreateWithBytes(NULL, from, fromlen,
247 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
248 0);
249 }
250 if (cfstr == NULL)
251 return FAIL;
252
253 r.location = 0;
254 r.length = CFStringGetLength(cfstr);
255 if (r.length != CFStringGetBytes(cfstr, r,
256 kCFStringEncodingMacRoman,
257 0, /* no lossy conversion */
258 0, /* not external representation (since vim
259 * handles this internally */
260 to, maxtolen, &l))
261 {
262 CFRelease(cfstr);
263 return FAIL;
264 }
265 CFRelease(cfstr);
266 *tolenp = l;
267 return OK;
268}
Bram Moolenaar5eb86f92004-07-26 12:53:41 +0000269
Bram Moolenaar26a60b42005-02-22 08:49:11 +0000270/*
271 * Initializes text converters
272 */
273 void
274mac_conv_init()
275{
276 TextEncoding utf8_encoding;
277 TextEncoding utf8_hfsplus_encoding;
278 TextEncoding utf8_canon_encoding;
279 TextEncoding utf16_encoding;
280
281 utf8_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
282 kTextEncodingDefaultVariant, kUnicodeUTF8Format);
283 utf8_hfsplus_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
284 kUnicodeHFSPlusCompVariant, kUnicodeUTF8Format);
285 utf8_canon_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
286 kUnicodeCanonicalCompVariant, kUnicodeUTF8Format);
287 utf16_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
288 kTextEncodingDefaultVariant, kUnicode16BitFormat);
289
290 if (TECCreateConverter(&gPathConverter, utf8_encoding,
291 utf8_hfsplus_encoding) != noErr)
292 gPathConverter = NULL;
293
294 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding,
295 utf8_canon_encoding) != noErr)
Bram Moolenaar19a09a12005-03-04 23:39:37 +0000296 {
297 /* On pre-10.3, Unicode normalization is not available so
298 * fall back to non-normalizing converter */
299 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding,
300 utf8_encoding) != noErr)
301 gUTF16ToUTF8Converter = NULL;
302 }
Bram Moolenaar26a60b42005-02-22 08:49:11 +0000303}
304
305/*
306 * Destroys text converters
307 */
308 void
309mac_conv_cleanup()
310{
311 if (gUTF16ToUTF8Converter)
312 {
313 TECDisposeConverter(gUTF16ToUTF8Converter);
314 gUTF16ToUTF8Converter = NULL;
315 }
316
317 if (gPathConverter)
318 {
319 TECDisposeConverter(gPathConverter);
320 gPathConverter = NULL;
321 }
322}
323
324/*
325 * Conversion from UTF-16 UniChars to 'encoding'
Bram Moolenaar446cb832008-06-24 21:56:24 +0000326 * The function signature uses the real type of UniChar (as typedef'ed in
327 * CFBase.h) to avoid clashes with X11 header files in the .pro file
Bram Moolenaar26a60b42005-02-22 08:49:11 +0000328 */
329 char_u *
330mac_utf16_to_enc(from, fromLen, actualLen)
Bram Moolenaar446cb832008-06-24 21:56:24 +0000331 unsigned short *from;
Bram Moolenaar26a60b42005-02-22 08:49:11 +0000332 size_t fromLen;
333 size_t *actualLen;
334{
335 /* Following code borrows somewhat from os_mswin.c */
336 vimconv_T conv;
337 size_t utf8_len;
338 char_u *utf8_str;
339 char_u *result = NULL;
340
341 /* Convert to utf-8 first, works better with iconv */
342 utf8_len = 0;
343 utf8_str = mac_utf16_to_utf8(from, fromLen, &utf8_len);
344
345 if (utf8_str)
346 {
347 /* We might be called before we have p_enc set up. */
348 conv.vc_type = CONV_NONE;
349
350 /* If encoding (p_enc) is any unicode, it is actually in utf-8 (vim
351 * internal unicode is always utf-8) so don't convert in such cases */
352
353 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0)
354 convert_setup(&conv, (char_u *)"utf-8",
355 p_enc? p_enc: (char_u *)"macroman");
356 if (conv.vc_type == CONV_NONE)
357 {
358 /* p_enc is utf-8, so we're done. */
359 result = utf8_str;
360 }
361 else
362 {
363 result = string_convert(&conv, utf8_str, (int *)&utf8_len);
364 vim_free(utf8_str);
365 }
366
367 convert_setup(&conv, NULL, NULL);
368
369 if (actualLen)
370 *actualLen = utf8_len;
371 }
372 else if (actualLen)
373 *actualLen = 0;
374
375 return result;
376}
377
378/*
379 * Conversion from 'encoding' to UTF-16 UniChars
Bram Moolenaar446cb832008-06-24 21:56:24 +0000380 * The function return uses the real type of UniChar (as typedef'ed in
381 * CFBase.h) to avoid clashes with X11 header files in the .pro file
Bram Moolenaar26a60b42005-02-22 08:49:11 +0000382 */
Bram Moolenaar446cb832008-06-24 21:56:24 +0000383 unsigned short *
Bram Moolenaar26a60b42005-02-22 08:49:11 +0000384mac_enc_to_utf16(from, fromLen, actualLen)
385 char_u *from;
386 size_t fromLen;
387 size_t *actualLen;
388{
389 /* Following code borrows somewhat from os_mswin.c */
390 vimconv_T conv;
391 size_t utf8_len;
392 char_u *utf8_str;
393 UniChar *result = NULL;
394 Boolean should_free_utf8 = FALSE;
395
396 do
397 {
398 /* Use MacRoman by default, we might be called before we have p_enc
399 * set up. Convert to utf-8 first, works better with iconv(). Does
400 * nothing if 'encoding' is "utf-8". */
401 conv.vc_type = CONV_NONE;
402 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0 &&
403 convert_setup(&conv, p_enc ? p_enc : (char_u *)"macroman",
404 (char_u *)"utf-8") == FAIL)
405 break;
406
407 if (conv.vc_type != CONV_NONE)
408 {
409 utf8_len = fromLen;
410 utf8_str = string_convert(&conv, from, (int *)&utf8_len);
411 should_free_utf8 = TRUE;
412 }
413 else
414 {
415 utf8_str = from;
416 utf8_len = fromLen;
417 }
418
419 if (utf8_str == NULL)
420 break;
421
422 convert_setup(&conv, NULL, NULL);
423
424 result = mac_utf8_to_utf16(utf8_str, utf8_len, actualLen);
425
426 if (should_free_utf8)
427 vim_free(utf8_str);
428 return result;
429 }
430 while (0);
431
432 if (actualLen)
433 *actualLen = 0;
434
435 return result;
436}
437
438/*
439 * Converts from UTF-16 UniChars to CFString
Bram Moolenaar446cb832008-06-24 21:56:24 +0000440 * The void * return type is actually a CFStringRef
Bram Moolenaar26a60b42005-02-22 08:49:11 +0000441 */
Bram Moolenaar446cb832008-06-24 21:56:24 +0000442 void *
Bram Moolenaar26a60b42005-02-22 08:49:11 +0000443mac_enc_to_cfstring(from, fromLen)
444 char_u *from;
445 size_t fromLen;
446{
447 UniChar *utf16_str;
448 size_t utf16_len;
449 CFStringRef result = NULL;
450
451 utf16_str = mac_enc_to_utf16(from, fromLen, &utf16_len);
452 if (utf16_str)
453 {
454 result = CFStringCreateWithCharacters(NULL, utf16_str, utf16_len/sizeof(UniChar));
455 vim_free(utf16_str);
456 }
457
Bram Moolenaar446cb832008-06-24 21:56:24 +0000458 return (void *)result;
Bram Moolenaar26a60b42005-02-22 08:49:11 +0000459}
460
461/*
462 * Converts a decomposed HFS+ UTF-8 path to precomposed UTF-8
463 */
464 char_u *
465mac_precompose_path(decompPath, decompLen, precompLen)
466 char_u *decompPath;
467 size_t decompLen;
468 size_t *precompLen;
469{
470 char_u *result = NULL;
471 size_t actualLen = 0;
472
473 if (gPathConverter)
474 {
475 result = alloc(decompLen);
476 if (result)
477 {
478 if (TECConvertText(gPathConverter, decompPath,
479 decompLen, &decompLen, result,
480 decompLen, &actualLen) != noErr)
481 {
482 vim_free(result);
483 result = NULL;
484 }
485 }
486 }
487
488 if (precompLen)
489 *precompLen = actualLen;
490
491 return result;
492}
493
494/*
495 * Converts from UTF-16 UniChars to precomposed UTF-8
496 */
Bram Moolenaar7d47b6e2006-03-15 22:59:18 +0000497 static char_u *
Bram Moolenaar26a60b42005-02-22 08:49:11 +0000498mac_utf16_to_utf8(from, fromLen, actualLen)
499 UniChar *from;
500 size_t fromLen;
501 size_t *actualLen;
502{
503 ByteCount utf8_len;
504 ByteCount inputRead;
505 char_u *result;
506
507 if (gUTF16ToUTF8Converter)
508 {
509 result = alloc(fromLen * 6 + 1);
510 if (result && TECConvertText(gUTF16ToUTF8Converter, (ConstTextPtr)from,
511 fromLen, &inputRead, result,
512 (fromLen*6+1)*sizeof(char_u), &utf8_len) == noErr)
513 {
514 TECFlushText(gUTF16ToUTF8Converter, result, (fromLen*6+1)*sizeof(char_u), &inputRead);
515 utf8_len += inputRead;
516 }
517 else
518 {
519 vim_free(result);
520 result = NULL;
521 }
522 }
523 else
524 {
525 result = NULL;
526 }
527
528 if (actualLen)
529 *actualLen = result ? utf8_len : 0;
530
531 return result;
532}
533
534/*
535 * Converts from UTF-8 to UTF-16 UniChars
536 */
Bram Moolenaar7d47b6e2006-03-15 22:59:18 +0000537 static UniChar *
Bram Moolenaar26a60b42005-02-22 08:49:11 +0000538mac_utf8_to_utf16(from, fromLen, actualLen)
539 char_u *from;
540 size_t fromLen;
541 size_t *actualLen;
542{
543 CFStringRef utf8_str;
544 CFRange convertRange;
545 UniChar *result = NULL;
546
547 utf8_str = CFStringCreateWithBytes(NULL, from, fromLen,
548 kCFStringEncodingUTF8, FALSE);
549
550 if (utf8_str == NULL) {
551 if (actualLen)
552 *actualLen = 0;
553 return NULL;
554 }
555
556 convertRange = CFRangeMake(0, CFStringGetLength(utf8_str));
557 result = (UniChar *)alloc(convertRange.length * sizeof(UniChar));
558
559 CFStringGetCharacters(utf8_str, convertRange, result);
560
561 CFRelease(utf8_str);
562
563 if (actualLen)
564 *actualLen = convertRange.length * sizeof(UniChar);
565
566 return result;
567}
Bram Moolenaar446cb832008-06-24 21:56:24 +0000568
569/*
570 * Sets LANG environment variable in Vim from Mac locale
571 */
572 void
573mac_lang_init() {
574 if (mch_getenv((char_u *)"LANG") == NULL)
575 {
576 char buf[20];
577 if (LocaleRefGetPartString(NULL,
578 kLocaleLanguageMask | kLocaleLanguageVariantMask |
579 kLocaleRegionMask | kLocaleRegionVariantMask,
580 sizeof buf, buf) == noErr && *buf)
581 {
582 vim_setenv((char_u *)"LANG", (char_u *)buf);
583# ifdef HAVE_LOCALE_H
584 setlocale(LC_ALL, "");
585# endif
586 }
587 }
588}
Bram Moolenaar7d47b6e2006-03-15 22:59:18 +0000589#endif /* MACOS_CONVERT */