Blame - src/os_mac_conv.c - android_external_vim

blob: 68d46c0f2b2b582da8ad50df261927a29843f6dc [file] [log] [blame]

Bram Moolenaar	ab79bcb	2004-07-18 21:34:53 +0000	[diff] [blame]	1	/* vi:set ts=8 sts=4 sw=4:
				2	*
				3	* VIM - Vi IMproved by Bram Moolenaar
				4	*
				5	* Do ":help uganda" in Vim to read copying and usage conditions.
				6	* Do ":help credits" in Vim to see a list of people who contributed.
				7	* See README.txt for an overview of the Vim source code.
				8	*/
				9	/*
				10	* os_mac_conv.c: Code specifically for Mac string conversions.
				11	*
				12	* This code has been put in a separate file to avoid the conflicts that are
				13	* caused by including both the X11 and Carbon header files.
				14	*/
				15
				16	#define NO_X11_INCLUDES
				17	#include "vim.h"
				18
Bram Moolenaar	5eb86f9	2004-07-26 12:53:41 +0000	[diff] [blame]	19	#ifdef FEAT_MBYTE
Bram Moolenaar	ab79bcb	2004-07-18 21:34:53 +0000	[diff] [blame]	20	extern char_u mac_string_convert __ARGS((char_u ptr, int len, int lenp, int fail_on_error, int from, int to, int unconvlenp));
				21	extern int macroman2enc __ARGS((char_u ptr, long sizep, long real_size));
				22	extern int enc2macroman __ARGS((char_u from, size_t fromlen, char_u to, int tolenp, int maxtolen, char_u rest, int *restlenp));
				23
Bram Moolenaar	26a60b4	2005-02-22 08:49:11 +0000	[diff] [blame]	24	extern void mac_conv_init __ARGS((void));
				25	extern void mac_conv_cleanup __ARGS((void));
				26	extern char_u mac_utf16_to_enc __ARGS((UniChar from, size_t fromLen, size_t *actualLen));
				27	extern UniChar mac_enc_to_utf16 __ARGS((char_u from, size_t fromLen, size_t *actualLen));
				28	extern CFStringRef mac_enc_to_cfstring __ARGS((char_u *from, size_t fromLen));
				29	extern char_u mac_precompose_path __ARGS((char_u decompPath, size_t decompLen, size_t *precompLen));
				30
				31	static char_u mac_utf16_to_utf8 __ARGS((UniChar from, size_t fromLen, size_t *actualLen));
				32	static UniChar mac_utf8_to_utf16 __ARGS((char_u from, size_t fromLen, size_t *actualLen));
				33
				34	/* Converter for composing decomposed HFS+ file paths */
				35	static TECObjectRef gPathConverter;
				36	/* Converter used by mac_utf16_to_utf8 */
				37	static TECObjectRef gUTF16ToUTF8Converter;
				38
Bram Moolenaar	ab79bcb	2004-07-18 21:34:53 +0000	[diff] [blame]	39	/*
				40	* A Mac version of string_convert_ext() for special cases.
				41	*/
				42	char_u *
				43	mac_string_convert(ptr, len, lenp, fail_on_error, from_enc, to_enc, unconvlenp)
				44	char_u *ptr;
				45	int len;
				46	int *lenp;
				47	int fail_on_error;
				48	int from_enc;
				49	int to_enc;
				50	int *unconvlenp;
				51	{
				52	char_u retval, d;
				53	CFStringRef cfstr;
				54	int buflen, in, out, l, i;
				55	CFStringEncoding from;
				56	CFStringEncoding to;
				57
				58	switch (from_enc)
				59	{
				60	case 'l': from = kCFStringEncodingISOLatin1; break;
				61	case 'm': from = kCFStringEncodingMacRoman; break;
				62	case 'u': from = kCFStringEncodingUTF8; break;
				63	default: return NULL;
				64	}
				65	switch (to_enc)
				66	{
				67	case 'l': to = kCFStringEncodingISOLatin1; break;
				68	case 'm': to = kCFStringEncodingMacRoman; break;
				69	case 'u': to = kCFStringEncodingUTF8; break;
				70	default: return NULL;
				71	}
				72
				73	if (unconvlenp != NULL)
				74	*unconvlenp = 0;
				75	cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0);
				76
Bram Moolenaar	26a60b4	2005-02-22 08:49:11 +0000	[diff] [blame]	77	if(cfstr == NULL)
				78	fprintf(stderr, "Encoding failed\n");
Bram Moolenaar	ab79bcb	2004-07-18 21:34:53 +0000	[diff] [blame]	79	/* When conversion failed, try excluding bytes from the end, helps when
				80	* there is an incomplete byte sequence. Only do up to 6 bytes to avoid
				81	* looping a long time when there really is something unconvertable. */
				82	while (cfstr == NULL && unconvlenp != NULL && len > 1 && *unconvlenp < 6)
				83	{
				84	--len;
				85	++*unconvlenp;
				86	cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0);
				87	}
				88	if (cfstr == NULL)
				89	return NULL;
Bram Moolenaar	26a60b4	2005-02-22 08:49:11 +0000	[diff] [blame]	90
Bram Moolenaar	ab79bcb	2004-07-18 21:34:53 +0000	[diff] [blame]	91	if (to == kCFStringEncodingUTF8)
				92	buflen = len * 6 + 1;
				93	else
				94	buflen = len + 1;
				95	retval = alloc(buflen);
				96	if (retval == NULL)
				97	{
				98	CFRelease(cfstr);
				99	return NULL;
				100	}
Bram Moolenaar	26a60b4	2005-02-22 08:49:11 +0000	[diff] [blame]	101
				102	#if 0
				103	CFRange convertRange = CFRangeMake(0, CFStringGetLength(cfstr));
				104	/* Determine output buffer size */
				105	CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, NULL, 0, (CFIndex *)&buflen);
				106	retval = (buflen > 0) ? alloc(buflen) : NULL;
				107	if (retval == NULL) {
				108	CFRelease(cfstr);
				109	return NULL;
				110	}
				111
				112	if (lenp)
				113	*lenp = buflen / sizeof(char_u);
				114
				115	if (!CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, retval, buflen, NULL))
				116	#endif
Bram Moolenaar	ab79bcb	2004-07-18 21:34:53 +0000	[diff] [blame]	117	if (!CFStringGetCString(cfstr, retval, buflen, to))
				118	{
				119	CFRelease(cfstr);
				120	if (fail_on_error)
				121	{
				122	vim_free(retval);
				123	return NULL;
				124	}
				125
Bram Moolenaar	26a60b4	2005-02-22 08:49:11 +0000	[diff] [blame]	126	fprintf(stderr, "Trying char-by-char conversion...\n");
Bram Moolenaar	ab79bcb	2004-07-18 21:34:53 +0000	[diff] [blame]	127	/* conversion failed for the whole string, but maybe it will work
				128	* for each character */
				129	for (d = retval, in = 0, out = 0; in < len && out < buflen - 1;)
				130	{
				131	if (from == kCFStringEncodingUTF8)
				132	l = utf_ptr2len_check(ptr + in);
				133	else
				134	l = 1;
				135	cfstr = CFStringCreateWithBytes(NULL, ptr + in, l, from, 0);
				136	if (cfstr == NULL)
				137	{
				138	*d++ = '?';
				139	out++;
				140	}
				141	else
				142	{
				143	if (!CFStringGetCString(cfstr, d, buflen - out, to))
				144	{
				145	*d++ = '?';
				146	out++;
				147	}
				148	else
				149	{
				150	i = strlen(d);
				151	d += i;
				152	out += i;
				153	}
				154	CFRelease(cfstr);
				155	}
				156	in += l;
				157	}
				158	*d = NUL;
				159	if (lenp != NULL)
				160	*lenp = out;
				161	return retval;
				162	}
				163	CFRelease(cfstr);
				164	if (lenp != NULL)
				165	*lenp = strlen(retval);
Bram Moolenaar	26a60b4	2005-02-22 08:49:11 +0000	[diff] [blame]	166
Bram Moolenaar	ab79bcb	2004-07-18 21:34:53 +0000	[diff] [blame]	167	return retval;
				168	}
				169
				170	/*
				171	* Conversion from Apple MacRoman char encoding to UTF-8 or latin1, using
				172	* standard Carbon framework.
				173	* Input: "ptr[*sizep]".
				174	* "real_size" is the size of the buffer that "ptr" points to.
				175	* output is in-place, "sizep" is adjusted.
				176	* Returns OK or FAIL.
				177	*/
				178	int
				179	macroman2enc(ptr, sizep, real_size)
				180	char_u *ptr;
				181	long *sizep;
				182	long real_size;
				183	{
				184	CFStringRef cfstr;
				185	CFRange r;
				186	CFIndex len = *sizep;
				187
				188	/* MacRoman is an 8-bit encoding, no need to move bytes to
				189	* conv_rest[]. */
				190	cfstr = CFStringCreateWithBytes(NULL, ptr, len,
				191	kCFStringEncodingMacRoman, 0);
				192	/*
				193	* If there is a conversion error, try using another
				194	* conversion.
				195	*/
				196	if (cfstr == NULL)
				197	return FAIL;
				198
				199	r.location = 0;
				200	r.length = CFStringGetLength(cfstr);
				201	if (r.length != CFStringGetBytes(cfstr, r,
				202	(enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
				203	0, /* no lossy conversion */
				204	0, /* not external representation */
				205	ptr + sizep, real_size - sizep, &len))
				206	{
				207	CFRelease(cfstr);
				208	return FAIL;
				209	}
				210	CFRelease(cfstr);
				211	mch_memmove(ptr, ptr + *sizep, len);
				212	*sizep = len;
				213
				214	return OK;
				215	}
				216
				217	/*
				218	* Conversion from UTF-8 or latin1 to MacRoman.
				219	* Input: "from[fromlen]"
				220	* Output: "to[maxtolen]" length in "*tolenp"
				221	* Unconverted rest in rest[*restlenp].
				222	* Returns OK or FAIL.
				223	*/
				224	int
				225	enc2macroman(from, fromlen, to, tolenp, maxtolen, rest, restlenp)
				226	char_u *from;
				227	size_t fromlen;
				228	char_u *to;
				229	int *tolenp;
				230	int maxtolen;
				231	char_u *rest;
				232	int *restlenp;
				233	{
				234	CFStringRef cfstr;
				235	CFRange r;
				236	CFIndex l;
				237
				238	*restlenp = 0;
				239	cfstr = CFStringCreateWithBytes(NULL, from, fromlen,
				240	(enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
				241	0);
				242	while (cfstr == NULL && *restlenp < 3 && fromlen > 1)
				243	{
				244	rest[*restlenp++] = from[--fromlen];
				245	cfstr = CFStringCreateWithBytes(NULL, from, fromlen,
				246	(enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
				247	0);
				248	}
				249	if (cfstr == NULL)
				250	return FAIL;
				251
				252	r.location = 0;
				253	r.length = CFStringGetLength(cfstr);
				254	if (r.length != CFStringGetBytes(cfstr, r,
				255	kCFStringEncodingMacRoman,
				256	0, /* no lossy conversion */
				257	0, /* not external representation (since vim
				258	* handles this internally */
				259	to, maxtolen, &l))
				260	{
				261	CFRelease(cfstr);
				262	return FAIL;
				263	}
				264	CFRelease(cfstr);
				265	*tolenp = l;
				266	return OK;
				267	}
Bram Moolenaar	5eb86f9	2004-07-26 12:53:41 +0000	[diff] [blame]	268
Bram Moolenaar	26a60b4	2005-02-22 08:49:11 +0000	[diff] [blame]	269	/*
				270	* Initializes text converters
				271	*/
				272	void
				273	mac_conv_init()
				274	{
				275	TextEncoding utf8_encoding;
				276	TextEncoding utf8_hfsplus_encoding;
				277	TextEncoding utf8_canon_encoding;
				278	TextEncoding utf16_encoding;
				279
				280	utf8_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
				281	kTextEncodingDefaultVariant, kUnicodeUTF8Format);
				282	utf8_hfsplus_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
				283	kUnicodeHFSPlusCompVariant, kUnicodeUTF8Format);
				284	utf8_canon_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
				285	kUnicodeCanonicalCompVariant, kUnicodeUTF8Format);
				286	utf16_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
				287	kTextEncodingDefaultVariant, kUnicode16BitFormat);
				288
				289	if (TECCreateConverter(&gPathConverter, utf8_encoding,
				290	utf8_hfsplus_encoding) != noErr)
				291	gPathConverter = NULL;
				292
				293	if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding,
				294	utf8_canon_encoding) != noErr)
				295	gUTF16ToUTF8Converter = NULL;
				296	}
				297
				298	/*
				299	* Destroys text converters
				300	*/
				301	void
				302	mac_conv_cleanup()
				303	{
				304	if (gUTF16ToUTF8Converter)
				305	{
				306	TECDisposeConverter(gUTF16ToUTF8Converter);
				307	gUTF16ToUTF8Converter = NULL;
				308	}
				309
				310	if (gPathConverter)
				311	{
				312	TECDisposeConverter(gPathConverter);
				313	gPathConverter = NULL;
				314	}
				315	}
				316
				317	/*
				318	* Conversion from UTF-16 UniChars to 'encoding'
				319	*/
				320	char_u *
				321	mac_utf16_to_enc(from, fromLen, actualLen)
				322	UniChar *from;
				323	size_t fromLen;
				324	size_t *actualLen;
				325	{
				326	/* Following code borrows somewhat from os_mswin.c */
				327	vimconv_T conv;
				328	size_t utf8_len;
				329	char_u *utf8_str;
				330	char_u *result = NULL;
				331
				332	/* Convert to utf-8 first, works better with iconv */
				333	utf8_len = 0;
				334	utf8_str = mac_utf16_to_utf8(from, fromLen, &utf8_len);
				335
				336	if (utf8_str)
				337	{
				338	/* We might be called before we have p_enc set up. */
				339	conv.vc_type = CONV_NONE;
				340
				341	/* If encoding (p_enc) is any unicode, it is actually in utf-8 (vim
				342	* internal unicode is always utf-8) so don't convert in such cases */
				343
				344	if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0)
				345	convert_setup(&conv, (char_u *)"utf-8",
				346	p_enc? p_enc: (char_u *)"macroman");
				347	if (conv.vc_type == CONV_NONE)
				348	{
				349	/* p_enc is utf-8, so we're done. */
				350	result = utf8_str;
				351	}
				352	else
				353	{
				354	result = string_convert(&conv, utf8_str, (int *)&utf8_len);
				355	vim_free(utf8_str);
				356	}
				357
				358	convert_setup(&conv, NULL, NULL);
				359
				360	if (actualLen)
				361	*actualLen = utf8_len;
				362	}
				363	else if (actualLen)
				364	*actualLen = 0;
				365
				366	return result;
				367	}
				368
				369	/*
				370	* Conversion from 'encoding' to UTF-16 UniChars
				371	*/
				372	UniChar *
				373	mac_enc_to_utf16(from, fromLen, actualLen)
				374	char_u *from;
				375	size_t fromLen;
				376	size_t *actualLen;
				377	{
				378	/* Following code borrows somewhat from os_mswin.c */
				379	vimconv_T conv;
				380	size_t utf8_len;
				381	char_u *utf8_str;
				382	UniChar *result = NULL;
				383	Boolean should_free_utf8 = FALSE;
				384
				385	do
				386	{
				387	/* Use MacRoman by default, we might be called before we have p_enc
				388	* set up. Convert to utf-8 first, works better with iconv(). Does
				389	* nothing if 'encoding' is "utf-8". */
				390	conv.vc_type = CONV_NONE;
				391	if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0 &&
				392	convert_setup(&conv, p_enc ? p_enc : (char_u *)"macroman",
				393	(char_u *)"utf-8") == FAIL)
				394	break;
				395
				396	if (conv.vc_type != CONV_NONE)
				397	{
				398	utf8_len = fromLen;
				399	utf8_str = string_convert(&conv, from, (int *)&utf8_len);
				400	should_free_utf8 = TRUE;
				401	}
				402	else
				403	{
				404	utf8_str = from;
				405	utf8_len = fromLen;
				406	}
				407
				408	if (utf8_str == NULL)
				409	break;
				410
				411	convert_setup(&conv, NULL, NULL);
				412
				413	result = mac_utf8_to_utf16(utf8_str, utf8_len, actualLen);
				414
				415	if (should_free_utf8)
				416	vim_free(utf8_str);
				417	return result;
				418	}
				419	while (0);
				420
				421	if (actualLen)
				422	*actualLen = 0;
				423
				424	return result;
				425	}
				426
				427	/*
				428	* Converts from UTF-16 UniChars to CFString
				429	*/
				430	CFStringRef
				431	mac_enc_to_cfstring(from, fromLen)
				432	char_u *from;
				433	size_t fromLen;
				434	{
				435	UniChar *utf16_str;
				436	size_t utf16_len;
				437	CFStringRef result = NULL;
				438
				439	utf16_str = mac_enc_to_utf16(from, fromLen, &utf16_len);
				440	if (utf16_str)
				441	{
				442	result = CFStringCreateWithCharacters(NULL, utf16_str, utf16_len/sizeof(UniChar));
				443	vim_free(utf16_str);
				444	}
				445
				446	return result;
				447	}
				448
				449	/*
				450	* Converts a decomposed HFS+ UTF-8 path to precomposed UTF-8
				451	*/
				452	char_u *
				453	mac_precompose_path(decompPath, decompLen, precompLen)
				454	char_u *decompPath;
				455	size_t decompLen;
				456	size_t *precompLen;
				457	{
				458	char_u *result = NULL;
				459	size_t actualLen = 0;
				460
				461	if (gPathConverter)
				462	{
				463	result = alloc(decompLen);
				464	if (result)
				465	{
				466	if (TECConvertText(gPathConverter, decompPath,
				467	decompLen, &decompLen, result,
				468	decompLen, &actualLen) != noErr)
				469	{
				470	vim_free(result);
				471	result = NULL;
				472	}
				473	}
				474	}
				475
				476	if (precompLen)
				477	*precompLen = actualLen;
				478
				479	return result;
				480	}
				481
				482	/*
				483	* Converts from UTF-16 UniChars to precomposed UTF-8
				484	*/
				485	char_u *
				486	mac_utf16_to_utf8(from, fromLen, actualLen)
				487	UniChar *from;
				488	size_t fromLen;
				489	size_t *actualLen;
				490	{
				491	ByteCount utf8_len;
				492	ByteCount inputRead;
				493	char_u *result;
				494
				495	if (gUTF16ToUTF8Converter)
				496	{
				497	result = alloc(fromLen * 6 + 1);
				498	if (result && TECConvertText(gUTF16ToUTF8Converter, (ConstTextPtr)from,
				499	fromLen, &inputRead, result,
				500	(fromLen6+1)sizeof(char_u), &utf8_len) == noErr)
				501	{
				502	TECFlushText(gUTF16ToUTF8Converter, result, (fromLen6+1)sizeof(char_u), &inputRead);
				503	utf8_len += inputRead;
				504	}
				505	else
				506	{
				507	vim_free(result);
				508	result = NULL;
				509	}
				510	}
				511	else
				512	{
				513	result = NULL;
				514	}
				515
				516	if (actualLen)
				517	*actualLen = result ? utf8_len : 0;
				518
				519	return result;
				520	}
				521
				522	/*
				523	* Converts from UTF-8 to UTF-16 UniChars
				524	*/
				525	UniChar *
				526	mac_utf8_to_utf16(from, fromLen, actualLen)
				527	char_u *from;
				528	size_t fromLen;
				529	size_t *actualLen;
				530	{
				531	CFStringRef utf8_str;
				532	CFRange convertRange;
				533	UniChar *result = NULL;
				534
				535	utf8_str = CFStringCreateWithBytes(NULL, from, fromLen,
				536	kCFStringEncodingUTF8, FALSE);
				537
				538	if (utf8_str == NULL) {
				539	if (actualLen)
				540	*actualLen = 0;
				541	return NULL;
				542	}
				543
				544	convertRange = CFRangeMake(0, CFStringGetLength(utf8_str));
				545	result = (UniChar )alloc(convertRange.length sizeof(UniChar));
				546
				547	CFStringGetCharacters(utf8_str, convertRange, result);
				548
				549	CFRelease(utf8_str);
				550
				551	if (actualLen)
				552	actualLen = convertRange.length sizeof(UniChar);
				553
				554	return result;
				555	}
Bram Moolenaar	5eb86f9	2004-07-26 12:53:41 +0000	[diff] [blame]	556	#endif /* FEAT_MBYTE */