blob: 68d46c0f2b2b582da8ad50df261927a29843f6dc [file] [log] [blame]
Bram Moolenaarab79bcb2004-07-18 21:34:53 +00001/* vi:set ts=8 sts=4 sw=4:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9/*
10 * os_mac_conv.c: Code specifically for Mac string conversions.
11 *
12 * This code has been put in a separate file to avoid the conflicts that are
13 * caused by including both the X11 and Carbon header files.
14 */
15
16#define NO_X11_INCLUDES
17#include "vim.h"
18
Bram Moolenaar5eb86f92004-07-26 12:53:41 +000019#ifdef FEAT_MBYTE
Bram Moolenaarab79bcb2004-07-18 21:34:53 +000020extern char_u *mac_string_convert __ARGS((char_u *ptr, int len, int *lenp, int fail_on_error, int from, int to, int *unconvlenp));
21extern int macroman2enc __ARGS((char_u *ptr, long *sizep, long real_size));
22extern int enc2macroman __ARGS((char_u *from, size_t fromlen, char_u *to, int *tolenp, int maxtolen, char_u *rest, int *restlenp));
23
Bram Moolenaar26a60b42005-02-22 08:49:11 +000024extern void mac_conv_init __ARGS((void));
25extern void mac_conv_cleanup __ARGS((void));
26extern char_u *mac_utf16_to_enc __ARGS((UniChar *from, size_t fromLen, size_t *actualLen));
27extern UniChar *mac_enc_to_utf16 __ARGS((char_u *from, size_t fromLen, size_t *actualLen));
28extern CFStringRef mac_enc_to_cfstring __ARGS((char_u *from, size_t fromLen));
29extern char_u *mac_precompose_path __ARGS((char_u *decompPath, size_t decompLen, size_t *precompLen));
30
31static char_u *mac_utf16_to_utf8 __ARGS((UniChar *from, size_t fromLen, size_t *actualLen));
32static UniChar *mac_utf8_to_utf16 __ARGS((char_u *from, size_t fromLen, size_t *actualLen));
33
34/* Converter for composing decomposed HFS+ file paths */
35static TECObjectRef gPathConverter;
36/* Converter used by mac_utf16_to_utf8 */
37static TECObjectRef gUTF16ToUTF8Converter;
38
Bram Moolenaarab79bcb2004-07-18 21:34:53 +000039/*
40 * A Mac version of string_convert_ext() for special cases.
41 */
42 char_u *
43mac_string_convert(ptr, len, lenp, fail_on_error, from_enc, to_enc, unconvlenp)
44 char_u *ptr;
45 int len;
46 int *lenp;
47 int fail_on_error;
48 int from_enc;
49 int to_enc;
50 int *unconvlenp;
51{
52 char_u *retval, *d;
53 CFStringRef cfstr;
54 int buflen, in, out, l, i;
55 CFStringEncoding from;
56 CFStringEncoding to;
57
58 switch (from_enc)
59 {
60 case 'l': from = kCFStringEncodingISOLatin1; break;
61 case 'm': from = kCFStringEncodingMacRoman; break;
62 case 'u': from = kCFStringEncodingUTF8; break;
63 default: return NULL;
64 }
65 switch (to_enc)
66 {
67 case 'l': to = kCFStringEncodingISOLatin1; break;
68 case 'm': to = kCFStringEncodingMacRoman; break;
69 case 'u': to = kCFStringEncodingUTF8; break;
70 default: return NULL;
71 }
72
73 if (unconvlenp != NULL)
74 *unconvlenp = 0;
75 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0);
76
Bram Moolenaar26a60b42005-02-22 08:49:11 +000077 if(cfstr == NULL)
78 fprintf(stderr, "Encoding failed\n");
Bram Moolenaarab79bcb2004-07-18 21:34:53 +000079 /* When conversion failed, try excluding bytes from the end, helps when
80 * there is an incomplete byte sequence. Only do up to 6 bytes to avoid
81 * looping a long time when there really is something unconvertable. */
82 while (cfstr == NULL && unconvlenp != NULL && len > 1 && *unconvlenp < 6)
83 {
84 --len;
85 ++*unconvlenp;
86 cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0);
87 }
88 if (cfstr == NULL)
89 return NULL;
Bram Moolenaar26a60b42005-02-22 08:49:11 +000090
Bram Moolenaarab79bcb2004-07-18 21:34:53 +000091 if (to == kCFStringEncodingUTF8)
92 buflen = len * 6 + 1;
93 else
94 buflen = len + 1;
95 retval = alloc(buflen);
96 if (retval == NULL)
97 {
98 CFRelease(cfstr);
99 return NULL;
100 }
Bram Moolenaar26a60b42005-02-22 08:49:11 +0000101
102#if 0
103 CFRange convertRange = CFRangeMake(0, CFStringGetLength(cfstr));
104 /* Determine output buffer size */
105 CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, NULL, 0, (CFIndex *)&buflen);
106 retval = (buflen > 0) ? alloc(buflen) : NULL;
107 if (retval == NULL) {
108 CFRelease(cfstr);
109 return NULL;
110 }
111
112 if (lenp)
113 *lenp = buflen / sizeof(char_u);
114
115 if (!CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, retval, buflen, NULL))
116#endif
Bram Moolenaarab79bcb2004-07-18 21:34:53 +0000117 if (!CFStringGetCString(cfstr, retval, buflen, to))
118 {
119 CFRelease(cfstr);
120 if (fail_on_error)
121 {
122 vim_free(retval);
123 return NULL;
124 }
125
Bram Moolenaar26a60b42005-02-22 08:49:11 +0000126 fprintf(stderr, "Trying char-by-char conversion...\n");
Bram Moolenaarab79bcb2004-07-18 21:34:53 +0000127 /* conversion failed for the whole string, but maybe it will work
128 * for each character */
129 for (d = retval, in = 0, out = 0; in < len && out < buflen - 1;)
130 {
131 if (from == kCFStringEncodingUTF8)
132 l = utf_ptr2len_check(ptr + in);
133 else
134 l = 1;
135 cfstr = CFStringCreateWithBytes(NULL, ptr + in, l, from, 0);
136 if (cfstr == NULL)
137 {
138 *d++ = '?';
139 out++;
140 }
141 else
142 {
143 if (!CFStringGetCString(cfstr, d, buflen - out, to))
144 {
145 *d++ = '?';
146 out++;
147 }
148 else
149 {
150 i = strlen(d);
151 d += i;
152 out += i;
153 }
154 CFRelease(cfstr);
155 }
156 in += l;
157 }
158 *d = NUL;
159 if (lenp != NULL)
160 *lenp = out;
161 return retval;
162 }
163 CFRelease(cfstr);
164 if (lenp != NULL)
165 *lenp = strlen(retval);
Bram Moolenaar26a60b42005-02-22 08:49:11 +0000166
Bram Moolenaarab79bcb2004-07-18 21:34:53 +0000167 return retval;
168}
169
170/*
171 * Conversion from Apple MacRoman char encoding to UTF-8 or latin1, using
172 * standard Carbon framework.
173 * Input: "ptr[*sizep]".
174 * "real_size" is the size of the buffer that "ptr" points to.
175 * output is in-place, "sizep" is adjusted.
176 * Returns OK or FAIL.
177 */
178 int
179macroman2enc(ptr, sizep, real_size)
180 char_u *ptr;
181 long *sizep;
182 long real_size;
183{
184 CFStringRef cfstr;
185 CFRange r;
186 CFIndex len = *sizep;
187
188 /* MacRoman is an 8-bit encoding, no need to move bytes to
189 * conv_rest[]. */
190 cfstr = CFStringCreateWithBytes(NULL, ptr, len,
191 kCFStringEncodingMacRoman, 0);
192 /*
193 * If there is a conversion error, try using another
194 * conversion.
195 */
196 if (cfstr == NULL)
197 return FAIL;
198
199 r.location = 0;
200 r.length = CFStringGetLength(cfstr);
201 if (r.length != CFStringGetBytes(cfstr, r,
202 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
203 0, /* no lossy conversion */
204 0, /* not external representation */
205 ptr + *sizep, real_size - *sizep, &len))
206 {
207 CFRelease(cfstr);
208 return FAIL;
209 }
210 CFRelease(cfstr);
211 mch_memmove(ptr, ptr + *sizep, len);
212 *sizep = len;
213
214 return OK;
215}
216
217/*
218 * Conversion from UTF-8 or latin1 to MacRoman.
219 * Input: "from[fromlen]"
220 * Output: "to[maxtolen]" length in "*tolenp"
221 * Unconverted rest in rest[*restlenp].
222 * Returns OK or FAIL.
223 */
224 int
225enc2macroman(from, fromlen, to, tolenp, maxtolen, rest, restlenp)
226 char_u *from;
227 size_t fromlen;
228 char_u *to;
229 int *tolenp;
230 int maxtolen;
231 char_u *rest;
232 int *restlenp;
233{
234 CFStringRef cfstr;
235 CFRange r;
236 CFIndex l;
237
238 *restlenp = 0;
239 cfstr = CFStringCreateWithBytes(NULL, from, fromlen,
240 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
241 0);
242 while (cfstr == NULL && *restlenp < 3 && fromlen > 1)
243 {
244 rest[*restlenp++] = from[--fromlen];
245 cfstr = CFStringCreateWithBytes(NULL, from, fromlen,
246 (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
247 0);
248 }
249 if (cfstr == NULL)
250 return FAIL;
251
252 r.location = 0;
253 r.length = CFStringGetLength(cfstr);
254 if (r.length != CFStringGetBytes(cfstr, r,
255 kCFStringEncodingMacRoman,
256 0, /* no lossy conversion */
257 0, /* not external representation (since vim
258 * handles this internally */
259 to, maxtolen, &l))
260 {
261 CFRelease(cfstr);
262 return FAIL;
263 }
264 CFRelease(cfstr);
265 *tolenp = l;
266 return OK;
267}
Bram Moolenaar5eb86f92004-07-26 12:53:41 +0000268
Bram Moolenaar26a60b42005-02-22 08:49:11 +0000269/*
270 * Initializes text converters
271 */
272 void
273mac_conv_init()
274{
275 TextEncoding utf8_encoding;
276 TextEncoding utf8_hfsplus_encoding;
277 TextEncoding utf8_canon_encoding;
278 TextEncoding utf16_encoding;
279
280 utf8_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
281 kTextEncodingDefaultVariant, kUnicodeUTF8Format);
282 utf8_hfsplus_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
283 kUnicodeHFSPlusCompVariant, kUnicodeUTF8Format);
284 utf8_canon_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
285 kUnicodeCanonicalCompVariant, kUnicodeUTF8Format);
286 utf16_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
287 kTextEncodingDefaultVariant, kUnicode16BitFormat);
288
289 if (TECCreateConverter(&gPathConverter, utf8_encoding,
290 utf8_hfsplus_encoding) != noErr)
291 gPathConverter = NULL;
292
293 if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding,
294 utf8_canon_encoding) != noErr)
295 gUTF16ToUTF8Converter = NULL;
296}
297
298/*
299 * Destroys text converters
300 */
301 void
302mac_conv_cleanup()
303{
304 if (gUTF16ToUTF8Converter)
305 {
306 TECDisposeConverter(gUTF16ToUTF8Converter);
307 gUTF16ToUTF8Converter = NULL;
308 }
309
310 if (gPathConverter)
311 {
312 TECDisposeConverter(gPathConverter);
313 gPathConverter = NULL;
314 }
315}
316
317/*
318 * Conversion from UTF-16 UniChars to 'encoding'
319 */
320 char_u *
321mac_utf16_to_enc(from, fromLen, actualLen)
322 UniChar *from;
323 size_t fromLen;
324 size_t *actualLen;
325{
326 /* Following code borrows somewhat from os_mswin.c */
327 vimconv_T conv;
328 size_t utf8_len;
329 char_u *utf8_str;
330 char_u *result = NULL;
331
332 /* Convert to utf-8 first, works better with iconv */
333 utf8_len = 0;
334 utf8_str = mac_utf16_to_utf8(from, fromLen, &utf8_len);
335
336 if (utf8_str)
337 {
338 /* We might be called before we have p_enc set up. */
339 conv.vc_type = CONV_NONE;
340
341 /* If encoding (p_enc) is any unicode, it is actually in utf-8 (vim
342 * internal unicode is always utf-8) so don't convert in such cases */
343
344 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0)
345 convert_setup(&conv, (char_u *)"utf-8",
346 p_enc? p_enc: (char_u *)"macroman");
347 if (conv.vc_type == CONV_NONE)
348 {
349 /* p_enc is utf-8, so we're done. */
350 result = utf8_str;
351 }
352 else
353 {
354 result = string_convert(&conv, utf8_str, (int *)&utf8_len);
355 vim_free(utf8_str);
356 }
357
358 convert_setup(&conv, NULL, NULL);
359
360 if (actualLen)
361 *actualLen = utf8_len;
362 }
363 else if (actualLen)
364 *actualLen = 0;
365
366 return result;
367}
368
369/*
370 * Conversion from 'encoding' to UTF-16 UniChars
371 */
372 UniChar *
373mac_enc_to_utf16(from, fromLen, actualLen)
374 char_u *from;
375 size_t fromLen;
376 size_t *actualLen;
377{
378 /* Following code borrows somewhat from os_mswin.c */
379 vimconv_T conv;
380 size_t utf8_len;
381 char_u *utf8_str;
382 UniChar *result = NULL;
383 Boolean should_free_utf8 = FALSE;
384
385 do
386 {
387 /* Use MacRoman by default, we might be called before we have p_enc
388 * set up. Convert to utf-8 first, works better with iconv(). Does
389 * nothing if 'encoding' is "utf-8". */
390 conv.vc_type = CONV_NONE;
391 if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0 &&
392 convert_setup(&conv, p_enc ? p_enc : (char_u *)"macroman",
393 (char_u *)"utf-8") == FAIL)
394 break;
395
396 if (conv.vc_type != CONV_NONE)
397 {
398 utf8_len = fromLen;
399 utf8_str = string_convert(&conv, from, (int *)&utf8_len);
400 should_free_utf8 = TRUE;
401 }
402 else
403 {
404 utf8_str = from;
405 utf8_len = fromLen;
406 }
407
408 if (utf8_str == NULL)
409 break;
410
411 convert_setup(&conv, NULL, NULL);
412
413 result = mac_utf8_to_utf16(utf8_str, utf8_len, actualLen);
414
415 if (should_free_utf8)
416 vim_free(utf8_str);
417 return result;
418 }
419 while (0);
420
421 if (actualLen)
422 *actualLen = 0;
423
424 return result;
425}
426
427/*
428 * Converts from UTF-16 UniChars to CFString
429 */
430 CFStringRef
431mac_enc_to_cfstring(from, fromLen)
432 char_u *from;
433 size_t fromLen;
434{
435 UniChar *utf16_str;
436 size_t utf16_len;
437 CFStringRef result = NULL;
438
439 utf16_str = mac_enc_to_utf16(from, fromLen, &utf16_len);
440 if (utf16_str)
441 {
442 result = CFStringCreateWithCharacters(NULL, utf16_str, utf16_len/sizeof(UniChar));
443 vim_free(utf16_str);
444 }
445
446 return result;
447}
448
449/*
450 * Converts a decomposed HFS+ UTF-8 path to precomposed UTF-8
451 */
452 char_u *
453mac_precompose_path(decompPath, decompLen, precompLen)
454 char_u *decompPath;
455 size_t decompLen;
456 size_t *precompLen;
457{
458 char_u *result = NULL;
459 size_t actualLen = 0;
460
461 if (gPathConverter)
462 {
463 result = alloc(decompLen);
464 if (result)
465 {
466 if (TECConvertText(gPathConverter, decompPath,
467 decompLen, &decompLen, result,
468 decompLen, &actualLen) != noErr)
469 {
470 vim_free(result);
471 result = NULL;
472 }
473 }
474 }
475
476 if (precompLen)
477 *precompLen = actualLen;
478
479 return result;
480}
481
482/*
483 * Converts from UTF-16 UniChars to precomposed UTF-8
484 */
485 char_u *
486mac_utf16_to_utf8(from, fromLen, actualLen)
487 UniChar *from;
488 size_t fromLen;
489 size_t *actualLen;
490{
491 ByteCount utf8_len;
492 ByteCount inputRead;
493 char_u *result;
494
495 if (gUTF16ToUTF8Converter)
496 {
497 result = alloc(fromLen * 6 + 1);
498 if (result && TECConvertText(gUTF16ToUTF8Converter, (ConstTextPtr)from,
499 fromLen, &inputRead, result,
500 (fromLen*6+1)*sizeof(char_u), &utf8_len) == noErr)
501 {
502 TECFlushText(gUTF16ToUTF8Converter, result, (fromLen*6+1)*sizeof(char_u), &inputRead);
503 utf8_len += inputRead;
504 }
505 else
506 {
507 vim_free(result);
508 result = NULL;
509 }
510 }
511 else
512 {
513 result = NULL;
514 }
515
516 if (actualLen)
517 *actualLen = result ? utf8_len : 0;
518
519 return result;
520}
521
522/*
523 * Converts from UTF-8 to UTF-16 UniChars
524 */
525 UniChar *
526mac_utf8_to_utf16(from, fromLen, actualLen)
527 char_u *from;
528 size_t fromLen;
529 size_t *actualLen;
530{
531 CFStringRef utf8_str;
532 CFRange convertRange;
533 UniChar *result = NULL;
534
535 utf8_str = CFStringCreateWithBytes(NULL, from, fromLen,
536 kCFStringEncodingUTF8, FALSE);
537
538 if (utf8_str == NULL) {
539 if (actualLen)
540 *actualLen = 0;
541 return NULL;
542 }
543
544 convertRange = CFRangeMake(0, CFStringGetLength(utf8_str));
545 result = (UniChar *)alloc(convertRange.length * sizeof(UniChar));
546
547 CFStringGetCharacters(utf8_str, convertRange, result);
548
549 CFRelease(utf8_str);
550
551 if (actualLen)
552 *actualLen = convertRange.length * sizeof(UniChar);
553
554 return result;
555}
Bram Moolenaar5eb86f92004-07-26 12:53:41 +0000556#endif /* FEAT_MBYTE */