Blame - src/os_mac_conv.c - android_external_vim

blob: fbce5770d644cda2bb60162ed548dc4ab4b7db5a [file] [log] [blame]

Bram Moolenaar	ab79bcb	2004-07-18 21:34:53 +0000	[diff] [blame]	1	/* vi:set ts=8 sts=4 sw=4:
				2	*
				3	* VIM - Vi IMproved by Bram Moolenaar
				4	*
				5	* Do ":help uganda" in Vim to read copying and usage conditions.
				6	* Do ":help credits" in Vim to see a list of people who contributed.
				7	* See README.txt for an overview of the Vim source code.
				8	*/
				9	/*
				10	* os_mac_conv.c: Code specifically for Mac string conversions.
				11	*
				12	* This code has been put in a separate file to avoid the conflicts that are
				13	* caused by including both the X11 and Carbon header files.
				14	*/
				15
				16	#define NO_X11_INCLUDES
				17	#include "vim.h"
				18
Bram Moolenaar	5eb86f9	2004-07-26 12:53:41 +0000	[diff] [blame]	19	#ifdef FEAT_MBYTE
Bram Moolenaar	26a60b4	2005-02-22 08:49:11 +0000	[diff] [blame]	20	static char_u mac_utf16_to_utf8 __ARGS((UniChar from, size_t fromLen, size_t *actualLen));
				21	static UniChar mac_utf8_to_utf16 __ARGS((char_u from, size_t fromLen, size_t *actualLen));
				22
				23	/* Converter for composing decomposed HFS+ file paths */
				24	static TECObjectRef gPathConverter;
				25	/* Converter used by mac_utf16_to_utf8 */
				26	static TECObjectRef gUTF16ToUTF8Converter;
				27
Bram Moolenaar	ab79bcb	2004-07-18 21:34:53 +0000	[diff] [blame]	28	/*
				29	* A Mac version of string_convert_ext() for special cases.
				30	*/
				31	char_u *
				32	mac_string_convert(ptr, len, lenp, fail_on_error, from_enc, to_enc, unconvlenp)
				33	char_u *ptr;
				34	int len;
				35	int *lenp;
				36	int fail_on_error;
				37	int from_enc;
				38	int to_enc;
				39	int *unconvlenp;
				40	{
				41	char_u retval, d;
				42	CFStringRef cfstr;
				43	int buflen, in, out, l, i;
				44	CFStringEncoding from;
				45	CFStringEncoding to;
				46
				47	switch (from_enc)
				48	{
				49	case 'l': from = kCFStringEncodingISOLatin1; break;
				50	case 'm': from = kCFStringEncodingMacRoman; break;
				51	case 'u': from = kCFStringEncodingUTF8; break;
				52	default: return NULL;
				53	}
				54	switch (to_enc)
				55	{
				56	case 'l': to = kCFStringEncodingISOLatin1; break;
				57	case 'm': to = kCFStringEncodingMacRoman; break;
				58	case 'u': to = kCFStringEncodingUTF8; break;
				59	default: return NULL;
				60	}
				61
				62	if (unconvlenp != NULL)
				63	*unconvlenp = 0;
				64	cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0);
				65
Bram Moolenaar	26a60b4	2005-02-22 08:49:11 +0000	[diff] [blame]	66	if(cfstr == NULL)
				67	fprintf(stderr, "Encoding failed\n");
Bram Moolenaar	ab79bcb	2004-07-18 21:34:53 +0000	[diff] [blame]	68	/* When conversion failed, try excluding bytes from the end, helps when
				69	* there is an incomplete byte sequence. Only do up to 6 bytes to avoid
				70	* looping a long time when there really is something unconvertable. */
				71	while (cfstr == NULL && unconvlenp != NULL && len > 1 && *unconvlenp < 6)
				72	{
				73	--len;
				74	++*unconvlenp;
				75	cfstr = CFStringCreateWithBytes(NULL, ptr, len, from, 0);
				76	}
				77	if (cfstr == NULL)
				78	return NULL;
Bram Moolenaar	26a60b4	2005-02-22 08:49:11 +0000	[diff] [blame]	79
Bram Moolenaar	ab79bcb	2004-07-18 21:34:53 +0000	[diff] [blame]	80	if (to == kCFStringEncodingUTF8)
				81	buflen = len * 6 + 1;
				82	else
				83	buflen = len + 1;
				84	retval = alloc(buflen);
				85	if (retval == NULL)
				86	{
				87	CFRelease(cfstr);
				88	return NULL;
				89	}
Bram Moolenaar	26a60b4	2005-02-22 08:49:11 +0000	[diff] [blame]	90
				91	#if 0
				92	CFRange convertRange = CFRangeMake(0, CFStringGetLength(cfstr));
				93	/* Determine output buffer size */
				94	CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, NULL, 0, (CFIndex *)&buflen);
				95	retval = (buflen > 0) ? alloc(buflen) : NULL;
				96	if (retval == NULL) {
				97	CFRelease(cfstr);
				98	return NULL;
				99	}
				100
				101	if (lenp)
				102	*lenp = buflen / sizeof(char_u);
				103
				104	if (!CFStringGetBytes(cfstr, convertRange, to, NULL, FALSE, retval, buflen, NULL))
				105	#endif
Bram Moolenaar	da2303d	2005-08-30 21:55:26 +0000	[diff] [blame]	106	if (!CFStringGetCString(cfstr, (char *)retval, buflen, to))
Bram Moolenaar	ab79bcb	2004-07-18 21:34:53 +0000	[diff] [blame]	107	{
				108	CFRelease(cfstr);
				109	if (fail_on_error)
				110	{
				111	vim_free(retval);
				112	return NULL;
				113	}
				114
Bram Moolenaar	26a60b4	2005-02-22 08:49:11 +0000	[diff] [blame]	115	fprintf(stderr, "Trying char-by-char conversion...\n");
Bram Moolenaar	ab79bcb	2004-07-18 21:34:53 +0000	[diff] [blame]	116	/* conversion failed for the whole string, but maybe it will work
				117	* for each character */
				118	for (d = retval, in = 0, out = 0; in < len && out < buflen - 1;)
				119	{
				120	if (from == kCFStringEncodingUTF8)
Bram Moolenaar	0fa313a	2005-08-10 21:07:57 +0000	[diff] [blame]	121	l = utf_ptr2len(ptr + in);
Bram Moolenaar	ab79bcb	2004-07-18 21:34:53 +0000	[diff] [blame]	122	else
				123	l = 1;
				124	cfstr = CFStringCreateWithBytes(NULL, ptr + in, l, from, 0);
				125	if (cfstr == NULL)
				126	{
				127	*d++ = '?';
				128	out++;
				129	}
				130	else
				131	{
Bram Moolenaar	da2303d	2005-08-30 21:55:26 +0000	[diff] [blame]	132	if (!CFStringGetCString(cfstr, (char *)d, buflen - out, to))
Bram Moolenaar	ab79bcb	2004-07-18 21:34:53 +0000	[diff] [blame]	133	{
				134	*d++ = '?';
				135	out++;
				136	}
				137	else
				138	{
Bram Moolenaar	da2303d	2005-08-30 21:55:26 +0000	[diff] [blame]	139	i = STRLEN(d);
Bram Moolenaar	ab79bcb	2004-07-18 21:34:53 +0000	[diff] [blame]	140	d += i;
				141	out += i;
				142	}
				143	CFRelease(cfstr);
				144	}
				145	in += l;
				146	}
				147	*d = NUL;
				148	if (lenp != NULL)
				149	*lenp = out;
				150	return retval;
				151	}
				152	CFRelease(cfstr);
				153	if (lenp != NULL)
Bram Moolenaar	da2303d	2005-08-30 21:55:26 +0000	[diff] [blame]	154	*lenp = STRLEN(retval);
Bram Moolenaar	26a60b4	2005-02-22 08:49:11 +0000	[diff] [blame]	155
Bram Moolenaar	ab79bcb	2004-07-18 21:34:53 +0000	[diff] [blame]	156	return retval;
				157	}
				158
				159	/*
				160	* Conversion from Apple MacRoman char encoding to UTF-8 or latin1, using
				161	* standard Carbon framework.
				162	* Input: "ptr[*sizep]".
				163	* "real_size" is the size of the buffer that "ptr" points to.
				164	* output is in-place, "sizep" is adjusted.
				165	* Returns OK or FAIL.
				166	*/
				167	int
				168	macroman2enc(ptr, sizep, real_size)
				169	char_u *ptr;
				170	long *sizep;
				171	long real_size;
				172	{
				173	CFStringRef cfstr;
				174	CFRange r;
				175	CFIndex len = *sizep;
				176
				177	/* MacRoman is an 8-bit encoding, no need to move bytes to
				178	* conv_rest[]. */
				179	cfstr = CFStringCreateWithBytes(NULL, ptr, len,
				180	kCFStringEncodingMacRoman, 0);
				181	/*
				182	* If there is a conversion error, try using another
				183	* conversion.
				184	*/
				185	if (cfstr == NULL)
				186	return FAIL;
				187
				188	r.location = 0;
				189	r.length = CFStringGetLength(cfstr);
				190	if (r.length != CFStringGetBytes(cfstr, r,
				191	(enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
				192	0, /* no lossy conversion */
				193	0, /* not external representation */
				194	ptr + sizep, real_size - sizep, &len))
				195	{
				196	CFRelease(cfstr);
				197	return FAIL;
				198	}
				199	CFRelease(cfstr);
				200	mch_memmove(ptr, ptr + *sizep, len);
				201	*sizep = len;
				202
				203	return OK;
				204	}
				205
				206	/*
				207	* Conversion from UTF-8 or latin1 to MacRoman.
				208	* Input: "from[fromlen]"
				209	* Output: "to[maxtolen]" length in "*tolenp"
				210	* Unconverted rest in rest[*restlenp].
				211	* Returns OK or FAIL.
				212	*/
				213	int
				214	enc2macroman(from, fromlen, to, tolenp, maxtolen, rest, restlenp)
				215	char_u *from;
				216	size_t fromlen;
				217	char_u *to;
				218	int *tolenp;
				219	int maxtolen;
				220	char_u *rest;
				221	int *restlenp;
				222	{
				223	CFStringRef cfstr;
				224	CFRange r;
				225	CFIndex l;
				226
				227	*restlenp = 0;
				228	cfstr = CFStringCreateWithBytes(NULL, from, fromlen,
				229	(enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
				230	0);
				231	while (cfstr == NULL && *restlenp < 3 && fromlen > 1)
				232	{
				233	rest[*restlenp++] = from[--fromlen];
				234	cfstr = CFStringCreateWithBytes(NULL, from, fromlen,
				235	(enc_utf8) ? kCFStringEncodingUTF8 : kCFStringEncodingISOLatin1,
				236	0);
				237	}
				238	if (cfstr == NULL)
				239	return FAIL;
				240
				241	r.location = 0;
				242	r.length = CFStringGetLength(cfstr);
				243	if (r.length != CFStringGetBytes(cfstr, r,
				244	kCFStringEncodingMacRoman,
				245	0, /* no lossy conversion */
				246	0, /* not external representation (since vim
				247	* handles this internally */
				248	to, maxtolen, &l))
				249	{
				250	CFRelease(cfstr);
				251	return FAIL;
				252	}
				253	CFRelease(cfstr);
				254	*tolenp = l;
				255	return OK;
				256	}
Bram Moolenaar	5eb86f9	2004-07-26 12:53:41 +0000	[diff] [blame]	257
Bram Moolenaar	26a60b4	2005-02-22 08:49:11 +0000	[diff] [blame]	258	/*
				259	* Initializes text converters
				260	*/
				261	void
				262	mac_conv_init()
				263	{
				264	TextEncoding utf8_encoding;
				265	TextEncoding utf8_hfsplus_encoding;
				266	TextEncoding utf8_canon_encoding;
				267	TextEncoding utf16_encoding;
				268
				269	utf8_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
				270	kTextEncodingDefaultVariant, kUnicodeUTF8Format);
				271	utf8_hfsplus_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
				272	kUnicodeHFSPlusCompVariant, kUnicodeUTF8Format);
				273	utf8_canon_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
				274	kUnicodeCanonicalCompVariant, kUnicodeUTF8Format);
				275	utf16_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
				276	kTextEncodingDefaultVariant, kUnicode16BitFormat);
				277
				278	if (TECCreateConverter(&gPathConverter, utf8_encoding,
				279	utf8_hfsplus_encoding) != noErr)
				280	gPathConverter = NULL;
				281
				282	if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding,
				283	utf8_canon_encoding) != noErr)
Bram Moolenaar	19a09a1	2005-03-04 23:39:37 +0000	[diff] [blame]	284	{
				285	/* On pre-10.3, Unicode normalization is not available so
				286	* fall back to non-normalizing converter */
				287	if (TECCreateConverter(&gUTF16ToUTF8Converter, utf16_encoding,
				288	utf8_encoding) != noErr)
				289	gUTF16ToUTF8Converter = NULL;
				290	}
Bram Moolenaar	26a60b4	2005-02-22 08:49:11 +0000	[diff] [blame]	291	}
				292
				293	/*
				294	* Destroys text converters
				295	*/
				296	void
				297	mac_conv_cleanup()
				298	{
				299	if (gUTF16ToUTF8Converter)
				300	{
				301	TECDisposeConverter(gUTF16ToUTF8Converter);
				302	gUTF16ToUTF8Converter = NULL;
				303	}
				304
				305	if (gPathConverter)
				306	{
				307	TECDisposeConverter(gPathConverter);
				308	gPathConverter = NULL;
				309	}
				310	}
				311
				312	/*
				313	* Conversion from UTF-16 UniChars to 'encoding'
				314	*/
				315	char_u *
				316	mac_utf16_to_enc(from, fromLen, actualLen)
				317	UniChar *from;
				318	size_t fromLen;
				319	size_t *actualLen;
				320	{
				321	/* Following code borrows somewhat from os_mswin.c */
				322	vimconv_T conv;
				323	size_t utf8_len;
				324	char_u *utf8_str;
				325	char_u *result = NULL;
				326
				327	/* Convert to utf-8 first, works better with iconv */
				328	utf8_len = 0;
				329	utf8_str = mac_utf16_to_utf8(from, fromLen, &utf8_len);
				330
				331	if (utf8_str)
				332	{
				333	/* We might be called before we have p_enc set up. */
				334	conv.vc_type = CONV_NONE;
				335
				336	/* If encoding (p_enc) is any unicode, it is actually in utf-8 (vim
				337	* internal unicode is always utf-8) so don't convert in such cases */
				338
				339	if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0)
				340	convert_setup(&conv, (char_u *)"utf-8",
				341	p_enc? p_enc: (char_u *)"macroman");
				342	if (conv.vc_type == CONV_NONE)
				343	{
				344	/* p_enc is utf-8, so we're done. */
				345	result = utf8_str;
				346	}
				347	else
				348	{
				349	result = string_convert(&conv, utf8_str, (int *)&utf8_len);
				350	vim_free(utf8_str);
				351	}
				352
				353	convert_setup(&conv, NULL, NULL);
				354
				355	if (actualLen)
				356	*actualLen = utf8_len;
				357	}
				358	else if (actualLen)
				359	*actualLen = 0;
				360
				361	return result;
				362	}
				363
				364	/*
				365	* Conversion from 'encoding' to UTF-16 UniChars
				366	*/
				367	UniChar *
				368	mac_enc_to_utf16(from, fromLen, actualLen)
				369	char_u *from;
				370	size_t fromLen;
				371	size_t *actualLen;
				372	{
				373	/* Following code borrows somewhat from os_mswin.c */
				374	vimconv_T conv;
				375	size_t utf8_len;
				376	char_u *utf8_str;
				377	UniChar *result = NULL;
				378	Boolean should_free_utf8 = FALSE;
				379
				380	do
				381	{
				382	/* Use MacRoman by default, we might be called before we have p_enc
				383	* set up. Convert to utf-8 first, works better with iconv(). Does
				384	* nothing if 'encoding' is "utf-8". */
				385	conv.vc_type = CONV_NONE;
				386	if ((enc_canon_props(p_enc) & ENC_UNICODE) == 0 &&
				387	convert_setup(&conv, p_enc ? p_enc : (char_u *)"macroman",
				388	(char_u *)"utf-8") == FAIL)
				389	break;
				390
				391	if (conv.vc_type != CONV_NONE)
				392	{
				393	utf8_len = fromLen;
				394	utf8_str = string_convert(&conv, from, (int *)&utf8_len);
				395	should_free_utf8 = TRUE;
				396	}
				397	else
				398	{
				399	utf8_str = from;
				400	utf8_len = fromLen;
				401	}
				402
				403	if (utf8_str == NULL)
				404	break;
				405
				406	convert_setup(&conv, NULL, NULL);
				407
				408	result = mac_utf8_to_utf16(utf8_str, utf8_len, actualLen);
				409
				410	if (should_free_utf8)
				411	vim_free(utf8_str);
				412	return result;
				413	}
				414	while (0);
				415
				416	if (actualLen)
				417	*actualLen = 0;
				418
				419	return result;
				420	}
				421
				422	/*
				423	* Converts from UTF-16 UniChars to CFString
				424	*/
				425	CFStringRef
				426	mac_enc_to_cfstring(from, fromLen)
				427	char_u *from;
				428	size_t fromLen;
				429	{
				430	UniChar *utf16_str;
				431	size_t utf16_len;
				432	CFStringRef result = NULL;
				433
				434	utf16_str = mac_enc_to_utf16(from, fromLen, &utf16_len);
				435	if (utf16_str)
				436	{
				437	result = CFStringCreateWithCharacters(NULL, utf16_str, utf16_len/sizeof(UniChar));
				438	vim_free(utf16_str);
				439	}
				440
				441	return result;
				442	}
				443
				444	/*
				445	* Converts a decomposed HFS+ UTF-8 path to precomposed UTF-8
				446	*/
				447	char_u *
				448	mac_precompose_path(decompPath, decompLen, precompLen)
				449	char_u *decompPath;
				450	size_t decompLen;
				451	size_t *precompLen;
				452	{
				453	char_u *result = NULL;
				454	size_t actualLen = 0;
				455
				456	if (gPathConverter)
				457	{
				458	result = alloc(decompLen);
				459	if (result)
				460	{
				461	if (TECConvertText(gPathConverter, decompPath,
				462	decompLen, &decompLen, result,
				463	decompLen, &actualLen) != noErr)
				464	{
				465	vim_free(result);
				466	result = NULL;
				467	}
				468	}
				469	}
				470
				471	if (precompLen)
				472	*precompLen = actualLen;
				473
				474	return result;
				475	}
				476
				477	/*
				478	* Converts from UTF-16 UniChars to precomposed UTF-8
				479	*/
				480	char_u *
				481	mac_utf16_to_utf8(from, fromLen, actualLen)
				482	UniChar *from;
				483	size_t fromLen;
				484	size_t *actualLen;
				485	{
				486	ByteCount utf8_len;
				487	ByteCount inputRead;
				488	char_u *result;
				489
				490	if (gUTF16ToUTF8Converter)
				491	{
				492	result = alloc(fromLen * 6 + 1);
				493	if (result && TECConvertText(gUTF16ToUTF8Converter, (ConstTextPtr)from,
				494	fromLen, &inputRead, result,
				495	(fromLen6+1)sizeof(char_u), &utf8_len) == noErr)
				496	{
				497	TECFlushText(gUTF16ToUTF8Converter, result, (fromLen6+1)sizeof(char_u), &inputRead);
				498	utf8_len += inputRead;
				499	}
				500	else
				501	{
				502	vim_free(result);
				503	result = NULL;
				504	}
				505	}
				506	else
				507	{
				508	result = NULL;
				509	}
				510
				511	if (actualLen)
				512	*actualLen = result ? utf8_len : 0;
				513
				514	return result;
				515	}
				516
				517	/*
				518	* Converts from UTF-8 to UTF-16 UniChars
				519	*/
				520	UniChar *
				521	mac_utf8_to_utf16(from, fromLen, actualLen)
				522	char_u *from;
				523	size_t fromLen;
				524	size_t *actualLen;
				525	{
				526	CFStringRef utf8_str;
				527	CFRange convertRange;
				528	UniChar *result = NULL;
				529
				530	utf8_str = CFStringCreateWithBytes(NULL, from, fromLen,
				531	kCFStringEncodingUTF8, FALSE);
				532
				533	if (utf8_str == NULL) {
				534	if (actualLen)
				535	*actualLen = 0;
				536	return NULL;
				537	}
				538
				539	convertRange = CFRangeMake(0, CFStringGetLength(utf8_str));
				540	result = (UniChar )alloc(convertRange.length sizeof(UniChar));
				541
				542	CFStringGetCharacters(utf8_str, convertRange, result);
				543
				544	CFRelease(utf8_str);
				545
				546	if (actualLen)
				547	actualLen = convertRange.length sizeof(UniChar);
				548
				549	return result;
				550	}
Bram Moolenaar	5eb86f9	2004-07-26 12:53:41 +0000	[diff] [blame]	551	#endif /* FEAT_MBYTE */