Bram Moolenaar | ab79bcb | 2004-07-18 21:34:53 +0000 | [diff] [blame] | 1 | /* vi:set ts=8 sts=4 sw=4: |
| 2 | * |
| 3 | * VIM - Vi IMproved by Bram Moolenaar |
| 4 | * |
| 5 | * Do ":help uganda" in Vim to read copying and usage conditions. |
| 6 | * Do ":help credits" in Vim to see a list of people who contributed. |
| 7 | * See README.txt for an overview of the Vim source code. |
| 8 | */ |
| 9 | /* |
| 10 | * os_mac_conv.c: Code specifically for Mac string conversions. |
| 11 | * |
| 12 | * This code has been put in a separate file to avoid the conflicts that are |
| 13 | * caused by including both the X11 and Carbon header files. |
| 14 | */ |
| 15 | |
| 16 | #define NO_X11_INCLUDES |
| 17 | #include "vim.h" |
| 18 | |
| 19 | extern char_u *mac_string_convert __ARGS((char_u *ptr, int len, int *lenp, int fail_on_error, int from, int to, int *unconvlenp)); |
| 20 | extern int macroman2enc __ARGS((char_u *ptr, long *sizep, long real_size)); |
| 21 | extern int enc2macroman __ARGS((char_u *from, size_t fromlen, char_u *to, int *tolenp, int maxtolen, char_u *rest, int *restlenp)); |
| 22 | |
| 23 | /* |
| 24 | * A Mac version of string_convert_ext() for special cases. |
| 25 | */ |
| 26 | char_u * |
| 27 | mac_string_convert(ptr, len, lenp, fail_on_error, from_enc, to_enc, unconvlenp) |
| 28 | char_u *ptr; |
| 29 | int len; |
| 30 | int *lenp; |
| 31 | int fail_on_error; |
| 32 | int from_enc; |
| 33 | int to_enc; |
| 34 | int *unconvlenp; |
| 35 | { |
| 36 | char_u *retval, *d; |
| 37 | CFStringRef cfstr; |
| 38 | int buflen, in, out, l, i; |
| 39 | CFStringEncoding from; |
| 40 | CFStringEncoding to; |
| 41 | |
| 42 | switch (from_enc) |
| 43 | { |
| 44 | case 'l': from = kCFStringEncodingISOLatin1; break; |
| 45 | case 'm': from = kCFStringEncodingMacRoman; break; |
| 46 | case 'u': from = kCFStringEncodingUTF8; break; |
| 47 | default: return NULL; |
| 48 | } |
| 49 | switch (to_enc) |
| 50 | { |
| 51 | case 'l': to = kCFStringEncodingISOLatin1; break; |
| 52 | case 'm': to = kCFStringEncodingMacRoman; break; |
| 53 | case 'u': to = kCFStringEncodingUTF8; break; |
| 54 | default: return NULL; |
| 55 | } |
| 56 | |
| 57 | if (unconvlenp != NULL) |
| 58 | *unconvlenp = 0; |
| 59 | cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0); |
| 60 | |
| 61 | /* When conversion failed, try excluding bytes from the end, helps when |
| 62 | * there is an incomplete byte sequence. Only do up to 6 bytes to avoid |
| 63 | * looping a long time when there really is something unconvertable. */ |
| 64 | while (cfstr == NULL && unconvlenp != NULL && len > 1 && *unconvlenp < 6) |
| 65 | { |
| 66 | --len; |
| 67 | ++*unconvlenp; |
| 68 | cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0); |
| 69 | } |
| 70 | if (cfstr == NULL) |
| 71 | return NULL; |
| 72 | if (to == kCFStringEncodingUTF8) |
| 73 | buflen = len * 6 + 1; |
| 74 | else |
| 75 | buflen = len + 1; |
| 76 | retval = alloc(buflen); |
| 77 | if (retval == NULL) |
| 78 | { |
| 79 | CFRelease(cfstr); |
| 80 | return NULL; |
| 81 | } |
| 82 | if (!CFStringGetCString(cfstr, retval, buflen, to)) |
| 83 | { |
| 84 | CFRelease(cfstr); |
| 85 | if (fail_on_error) |
| 86 | { |
| 87 | vim_free(retval); |
| 88 | return NULL; |
| 89 | } |
| 90 | |
| 91 | /* conversion failed for the whole string, but maybe it will work |
| 92 | * for each character */ |
| 93 | for (d = retval, in = 0, out = 0; in < len && out < buflen - 1;) |
| 94 | { |
| 95 | if (from == kCFStringEncodingUTF8) |
| 96 | l = utf_ptr2len_check(ptr + in); |
| 97 | else |
| 98 | l = 1; |
| 99 | cfstr = CFStringCreateWithBytes(NULL, ptr + in, l, from, 0); |
| 100 | if (cfstr == NULL) |
| 101 | { |
| 102 | *d++ = '?'; |
| 103 | out++; |
| 104 | } |
| 105 | else |
| 106 | { |
| 107 | if (!CFStringGetCString(cfstr, d, buflen - out, to)) |
| 108 | { |
| 109 | *d++ = '?'; |
| 110 | out++; |
| 111 | } |
| 112 | else |
| 113 | { |
| 114 | i = strlen(d); |
| 115 | d += i; |
| 116 | out += i; |
| 117 | } |
| 118 | CFRelease(cfstr); |
| 119 | } |
| 120 | in += l; |
| 121 | } |
| 122 | *d = NUL; |
| 123 | if (lenp != NULL) |
| 124 | *lenp = out; |
| 125 | return retval; |
| 126 | } |
| 127 | CFRelease(cfstr); |
| 128 | if (lenp != NULL) |
| 129 | *lenp = strlen(retval); |
| 130 | return retval; |
| 131 | } |
| 132 | |
| 133 | /* |
| 134 | * Conversion from Apple MacRoman char encoding to UTF-8 or latin1, using |
| 135 | * standard Carbon framework. |
| 136 | * Input: "ptr[*sizep]". |
| 137 | * "real_size" is the size of the buffer that "ptr" points to. |
| 138 | * output is in-place, "sizep" is adjusted. |
| 139 | * Returns OK or FAIL. |
| 140 | */ |
| 141 | int |
| 142 | macroman2enc(ptr, sizep, real_size) |
| 143 | char_u *ptr; |
| 144 | long *sizep; |
| 145 | long real_size; |
| 146 | { |
| 147 | CFStringRef cfstr; |
| 148 | CFRange r; |
| 149 | CFIndex len = *sizep; |
| 150 | |
| 151 | /* MacRoman is an 8-bit encoding, no need to move bytes to |
| 152 | * conv_rest[]. */ |
| 153 | cfstr = CFStringCreateWithBytes(NULL, ptr, len, |
| 154 | kCFStringEncodingMacRoman, 0); |
| 155 | /* |
| 156 | * If there is a conversion error, try using another |
| 157 | * conversion. |
| 158 | */ |
| 159 | if (cfstr == NULL) |
| 160 | return FAIL; |
| 161 | |
| 162 | r.location = 0; |
| 163 | r.length = CFStringGetLength(cfstr); |
| 164 | if (r.length != CFStringGetBytes(cfstr, r, |
| 165 | (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1, |
| 166 | 0, /* no lossy conversion */ |
| 167 | 0, /* not external representation */ |
| 168 | ptr + *sizep, real_size - *sizep, &len)) |
| 169 | { |
| 170 | CFRelease(cfstr); |
| 171 | return FAIL; |
| 172 | } |
| 173 | CFRelease(cfstr); |
| 174 | mch_memmove(ptr, ptr + *sizep, len); |
| 175 | *sizep = len; |
| 176 | |
| 177 | return OK; |
| 178 | } |
| 179 | |
| 180 | /* |
| 181 | * Conversion from UTF-8 or latin1 to MacRoman. |
| 182 | * Input: "from[fromlen]" |
| 183 | * Output: "to[maxtolen]" length in "*tolenp" |
| 184 | * Unconverted rest in rest[*restlenp]. |
| 185 | * Returns OK or FAIL. |
| 186 | */ |
| 187 | int |
| 188 | enc2macroman(from, fromlen, to, tolenp, maxtolen, rest, restlenp) |
| 189 | char_u *from; |
| 190 | size_t fromlen; |
| 191 | char_u *to; |
| 192 | int *tolenp; |
| 193 | int maxtolen; |
| 194 | char_u *rest; |
| 195 | int *restlenp; |
| 196 | { |
| 197 | CFStringRef cfstr; |
| 198 | CFRange r; |
| 199 | CFIndex l; |
| 200 | |
| 201 | *restlenp = 0; |
| 202 | cfstr = CFStringCreateWithBytes(NULL, from, fromlen, |
| 203 | (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1, |
| 204 | 0); |
| 205 | while (cfstr == NULL && *restlenp < 3 && fromlen > 1) |
| 206 | { |
| 207 | rest[*restlenp++] = from[--fromlen]; |
| 208 | cfstr = CFStringCreateWithBytes(NULL, from, fromlen, |
| 209 | (enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1, |
| 210 | 0); |
| 211 | } |
| 212 | if (cfstr == NULL) |
| 213 | return FAIL; |
| 214 | |
| 215 | r.location = 0; |
| 216 | r.length = CFStringGetLength(cfstr); |
| 217 | if (r.length != CFStringGetBytes(cfstr, r, |
| 218 | kCFStringEncodingMacRoman, |
| 219 | 0, /* no lossy conversion */ |
| 220 | 0, /* not external representation (since vim |
| 221 | * handles this internally */ |
| 222 | to, maxtolen, &l)) |
| 223 | { |
| 224 | CFRelease(cfstr); |
| 225 | return FAIL; |
| 226 | } |
| 227 | CFRelease(cfstr); |
| 228 | *tolenp = l; |
| 229 | return OK; |
| 230 | } |