Bram Moolenaar | ab79bcb | 2004-07-18 21:34:53 +0000 | [diff] [blame] | 1 | /* vi:set ts=8 sts=4 sw=4: |
| 2 | * |
| 3 | * VIM - Vi IMproved by Bram Moolenaar |
| 4 | * |
| 5 | * Do ":help uganda" in Vim to read copying and usage conditions. |
| 6 | * Do ":help credits" in Vim to see a list of people who contributed. |
| 7 | * See README.txt for an overview of the Vim source code. |
| 8 | */ |
| 9 | /* |
| 10 | * os_mac_conv.c: Code specifically for Mac string conversions. |
| 11 | * |
| 12 | * This code has been put in a separate file to avoid the conflicts that are |
| 13 | * caused by including both the X11 and Carbon header files. |
| 14 | */ |
| 15 | |
| 16 | #define NO_X11_INCLUDES |
| 17 | #include "vim.h" |
| 18 | |
Bram Moolenaar | 5eb86f9 | 2004-07-26 12:53:41 +0000 | [diff] [blame] | 19 | #ifdef FEAT_MBYTE |
Bram Moolenaar | ab79bcb | 2004-07-18 21:34:53 +0000 | [diff] [blame] | 20 | extern char_u *mac_string_convert __ARGS((char_u *ptr, int len, int *lenp, int fail_on_error, int from, int to, int *unconvlenp)); |
| 21 | extern int macroman2enc __ARGS((char_u *ptr, long *sizep, long real_size)); |
| 22 | extern int enc2macroman __ARGS((char_u *from, size_t fromlen, char_u *to, int *tolenp, int maxtolen, char_u *rest, int *restlenp)); |
| 23 | |
| 24 | /* |
| 25 | * A Mac version of string_convert_ext() for special cases. |
| 26 | */ |
| 27 | char_u * |
| 28 | mac_string_convert(ptr, len, lenp, fail_on_error, from_enc, to_enc, unconvlenp) |
| 29 | char_u *ptr; |
| 30 | int len; |
| 31 | int *lenp; |
| 32 | int fail_on_error; |
| 33 | int from_enc; |
| 34 | int to_enc; |
| 35 | int *unconvlenp; |
| 36 | { |
| 37 | char_u *retval, *d; |
| 38 | CFStringRef cfstr; |
| 39 | int buflen, in, out, l, i; |
| 40 | CFStringEncoding from; |
| 41 | CFStringEncoding to; |
| 42 | |
| 43 | switch (from_enc) |
| 44 | { |
| 45 | case 'l': from = kCFStringEncodingISOLatin1; break; |
| 46 | case 'm': from = kCFStringEncodingMacRoman; break; |
| 47 | case 'u': from = kCFStringEncodingUTF8; break; |
| 48 | default: return NULL; |
| 49 | } |
| 50 | switch (to_enc) |
| 51 | { |
| 52 | case 'l': to = kCFStringEncodingISOLatin1; break; |
| 53 | case 'm': to = kCFStringEncodingMacRoman; break; |
| 54 | case 'u': to = kCFStringEncodingUTF8; break; |
| 55 | default: return NULL; |
| 56 | } |
| 57 | |
| 58 | if (unconvlenp != NULL) |
| 59 | *unconvlenp = 0; |
| 60 | cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0); |
| 61 | |
| 62 | /* When conversion failed, try excluding bytes from the end, helps when |
| 63 | * there is an incomplete byte sequence. Only do up to 6 bytes to avoid |
| 64 | * looping a long time when there really is something unconvertable. */ |
| 65 | while (cfstr == NULL && unconvlenp != NULL && len > 1 && *unconvlenp < 6) |
| 66 | { |
| 67 | --len; |
| 68 | ++*unconvlenp; |
| 69 | cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0); |
| 70 | } |
| 71 | if (cfstr == NULL) |
| 72 | return NULL; |
| 73 | if (to == kCFStringEncodingUTF8) |
| 74 | buflen = len * 6 + 1; |
| 75 | else |
| 76 | buflen = len + 1; |
| 77 | retval = alloc(buflen); |
| 78 | if (retval == NULL) |
| 79 | { |
| 80 | CFRelease(cfstr); |
| 81 | return NULL; |
| 82 | } |
| 83 | if (!CFStringGetCString(cfstr, retval, buflen, to)) |
| 84 | { |
| 85 | CFRelease(cfstr); |
| 86 | if (fail_on_error) |
| 87 | { |
| 88 | vim_free(retval); |
| 89 | return NULL; |
| 90 | } |
| 91 | |
| 92 | /* conversion failed for the whole string, but maybe it will work |
| 93 | * for each character */ |
| 94 | for (d = retval, in = 0, out = 0; in < len && out < buflen - 1;) |
| 95 | { |
| 96 | if (from == kCFStringEncodingUTF8) |
| 97 | l = utf_ptr2len_check(ptr + in); |
| 98 | else |
| 99 | l = 1; |
| 100 | cfstr = CFStringCreateWithBytes(NULL, ptr + in, l, from, 0); |
| 101 | if (cfstr == NULL) |
| 102 | { |
| 103 | *d++ = '?'; |
| 104 | out++; |
| 105 | } |
| 106 | else |
| 107 | { |
| 108 | if (!CFStringGetCString(cfstr, d, buflen - out, to)) |
| 109 | { |
| 110 | *d++ = '?'; |
| 111 | out++; |
| 112 | } |
| 113 | else |
| 114 | { |
| 115 | i = strlen(d); |
| 116 | d += i; |
| 117 | out += i; |
| 118 | } |
| 119 | CFRelease(cfstr); |
| 120 | } |
| 121 | in += l; |
| 122 | } |
| 123 | *d = NUL; |
| 124 | if (lenp != NULL) |
| 125 | *lenp = out; |
| 126 | return retval; |
| 127 | } |
| 128 | CFRelease(cfstr); |
| 129 | if (lenp != NULL) |
| 130 | *lenp = strlen(retval); |
| 131 | return retval; |
| 132 | } |
| 133 | |
| 134 | /* |
| 135 | * Conversion from Apple MacRoman char encoding to UTF-8 or latin1, using |
| 136 | * standard Carbon framework. |
| 137 | * Input: "ptr[*sizep]". |
| 138 | * "real_size" is the size of the buffer that "ptr" points to. |
| 139 | * output is in-place, "sizep" is adjusted. |
| 140 | * Returns OK or FAIL. |
| 141 | */ |
| 142 | int |
| 143 | macroman2enc(ptr, sizep, real_size) |
| 144 | char_u *ptr; |
| 145 | long *sizep; |
| 146 | long real_size; |
| 147 | { |
| 148 | CFStringRef cfstr; |
| 149 | CFRange r; |
| 150 | CFIndex len = *sizep; |
| 151 | |
| 152 | /* MacRoman is an 8-bit encoding, no need to move bytes to |
| 153 | * conv_rest[]. */ |
| 154 | cfstr = CFStringCreateWithBytes(NULL, ptr, len, |
| 155 | kCFStringEncodingMacRoman, 0); |
| 156 | /* |
| 157 | * If there is a conversion error, try using another |
| 158 | * conversion. |
| 159 | */ |
| 160 | if (cfstr == NULL) |
| 161 | return FAIL; |
| 162 | |
| 163 | r.location = 0; |
| 164 | r.length = CFStringGetLength(cfstr); |
| 165 | if (r.length != CFStringGetBytes(cfstr, r, |
| 166 | (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1, |
| 167 | 0, /* no lossy conversion */ |
| 168 | 0, /* not external representation */ |
| 169 | ptr + *sizep, real_size - *sizep, &len)) |
| 170 | { |
| 171 | CFRelease(cfstr); |
| 172 | return FAIL; |
| 173 | } |
| 174 | CFRelease(cfstr); |
| 175 | mch_memmove(ptr, ptr + *sizep, len); |
| 176 | *sizep = len; |
| 177 | |
| 178 | return OK; |
| 179 | } |
| 180 | |
| 181 | /* |
| 182 | * Conversion from UTF-8 or latin1 to MacRoman. |
| 183 | * Input: "from[fromlen]" |
| 184 | * Output: "to[maxtolen]" length in "*tolenp" |
| 185 | * Unconverted rest in rest[*restlenp]. |
| 186 | * Returns OK or FAIL. |
| 187 | */ |
| 188 | int |
| 189 | enc2macroman(from, fromlen, to, tolenp, maxtolen, rest, restlenp) |
| 190 | char_u *from; |
| 191 | size_t fromlen; |
| 192 | char_u *to; |
| 193 | int *tolenp; |
| 194 | int maxtolen; |
| 195 | char_u *rest; |
| 196 | int *restlenp; |
| 197 | { |
| 198 | CFStringRef cfstr; |
| 199 | CFRange r; |
| 200 | CFIndex l; |
| 201 | |
| 202 | *restlenp = 0; |
| 203 | cfstr = CFStringCreateWithBytes(NULL, from, fromlen, |
| 204 | (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1, |
| 205 | 0); |
| 206 | while (cfstr == NULL && *restlenp < 3 && fromlen > 1) |
| 207 | { |
| 208 | rest[*restlenp++] = from[--fromlen]; |
| 209 | cfstr = CFStringCreateWithBytes(NULL, from, fromlen, |
| 210 | (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1, |
| 211 | 0); |
| 212 | } |
| 213 | if (cfstr == NULL) |
| 214 | return FAIL; |
| 215 | |
| 216 | r.location = 0; |
| 217 | r.length = CFStringGetLength(cfstr); |
| 218 | if (r.length != CFStringGetBytes(cfstr, r, |
| 219 | kCFStringEncodingMacRoman, |
| 220 | 0, /* no lossy conversion */ |
| 221 | 0, /* not external representation (since vim |
| 222 | * handles this internally */ |
| 223 | to, maxtolen, &l)) |
| 224 | { |
| 225 | CFRelease(cfstr); |
| 226 | return FAIL; |
| 227 | } |
| 228 | CFRelease(cfstr); |
| 229 | *tolenp = l; |
| 230 | return OK; |
| 231 | } |
Bram Moolenaar | 5eb86f9 | 2004-07-26 12:53:41 +0000 | [diff] [blame] | 232 | |
| 233 | #endif /* FEAT_MBYTE */ |