Blame - src/regexp.c - android_external_vim

blob: a9915a3b17cd03e0a9398ebb15c9939d12f71735 [file] [log] [blame]

Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1	/* vi:set ts=8 sts=4 sw=4:
				2	*
				3	* Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
				4	*
				5	* NOTICE:
				6	*
				7	* This is NOT the original regular expression code as written by Henry
				8	* Spencer. This code has been modified specifically for use with the VIM
				9	* editor, and should not be used separately from Vim. If you want a good
				10	* regular expression library, get the original code. The copyright notice
				11	* that follows is from the original.
				12	*
				13	* END NOTICE
				14	*
				15	* Copyright (c) 1986 by University of Toronto.
				16	* Written by Henry Spencer. Not derived from licensed software.
				17	*
				18	* Permission is granted to anyone to use this software for any
				19	* purpose on any computer system, and to redistribute it freely,
				20	* subject to the following restrictions:
				21	*
				22	* 1. The author is not responsible for the consequences of use of
				23	* this software, no matter how awful, even if they arise
				24	* from defects in it.
				25	*
				26	* 2. The origin of this software must not be misrepresented, either
				27	* by explicit claim or by omission.
				28	*
				29	* 3. Altered versions must be plainly marked as such, and must not
				30	* be misrepresented as being the original software.
				31	*
				32	* Beware that some of this code is subtly aware of the way operator
				33	* precedence is structured in regular expressions. Serious changes in
				34	* regular-expression syntax might require a total rethink.
				35	*
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	36	* Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
				37	* Webb, Ciaran McCreesh and Bram Moolenaar.
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	38	* Named character class support added by Walter Briscoe (1998 Jul 01)
				39	*/
				40
				41	#include "vim.h"
				42
				43	#undef DEBUG
				44
				45	/*
				46	* The "internal use only" fields in regexp.h are present to pass info from
				47	* compile to execute that permits the execute phase to run lots faster on
				48	* simple cases. They are:
				49	*
				50	* regstart char that must begin a match; NUL if none obvious; Can be a
				51	* multi-byte character.
				52	* reganch is the match anchored (at beginning-of-line only)?
				53	* regmust string (pointer into program) that match must include, or NULL
				54	* regmlen length of regmust string
				55	* regflags RF_ values or'ed together
				56	*
				57	* Regstart and reganch permit very fast decisions on suitable starting points
				58	* for a match, cutting down the work a lot. Regmust permits fast rejection
				59	* of lines that cannot possibly match. The regmust tests are costly enough
				60	* that vim_regcomp() supplies a regmust only if the r.e. contains something
				61	* potentially expensive (at present, the only such thing detected is * or +
				62	* at the start of the r.e., which can involve a lot of backup). Regmlen is
				63	* supplied because the test in vim_regexec() needs it and vim_regcomp() is
				64	* computing it anyway.
				65	*/
				66
				67	/*
				68	* Structure for regexp "program". This is essentially a linear encoding
				69	* of a nondeterministic finite-state machine (aka syntax charts or
				70	* "railroad normal form" in parsing technology). Each node is an opcode
				71	* plus a "next" pointer, possibly plus an operand. "Next" pointers of
				72	* all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
				73	* pointer with a BRANCH on both ends of it is connecting two alternatives.
				74	* (Here we have one of the subtle syntax dependencies: an individual BRANCH
				75	* (as opposed to a collection of them) is never concatenated with anything
				76	* because of operator precedence). The "next" pointer of a BRACES_COMPLEX
				77	* node points to the node after the stuff to be repeated. The operand of some
				78	* types of node is a literal string; for others, it is a node leading into a
				79	* sub-FSM. In particular, the operand of a BRANCH node is the first node of
				80	* the branch. (NB this is not a tree structure: the tail of the branch
				81	* connects to the thing following the set of BRANCHes.)
				82	*
				83	* pattern is coded like:
				84	*
				85	* +-----------------+
				86	* \| V
				87	* <aa>\\|<bb> BRANCH <aa> BRANCH <bb> --> END
				88	* \| ^ \| ^
				89	* +------+ +----------+
				90	*
				91	*
				92	* +------------------+
				93	* V \|
				94	* <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
				95	* \| \| ^ ^
				96	* \| +---------------+ \|
				97	* +---------------------------------------------+
				98	*
				99	*
				100	* +-------------------------+
				101	* V \|
				102	* <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
				103	* \| \| ^
				104	* \| +----------------+
				105	* +-----------------------------------------------+
				106	*
				107	*
				108	* <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END
				109	* \| \| ^ ^
				110	* \| +----------------+ \|
				111	* +--------------------------------+
				112	*
				113	* +---------+
				114	* \| V
				115	* \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END
				116	* \| \| \| \| ^ ^
				117	* \| \| \| +-----+ \|
				118	* \| \| +----------------+ \|
				119	* \| +---------------------------+ \|
				120	* +------------------------------------------------------+
				121	*
				122	* They all start with a BRANCH for "\\|" alternaties, even when there is only
				123	* one alternative.
				124	*/
				125
				126	/*
				127	* The opcodes are:
				128	*/
				129
				130	/* definition number opnd? meaning */
				131	#define END 0 /* End of program or NOMATCH operand. */
				132	#define BOL 1 /* Match "" at beginning of line. */
				133	#define EOL 2 /* Match "" at end of line. */
				134	#define BRANCH 3 /* node Match this alternative, or the
				135	* next... */
				136	#define BACK 4 /* Match "", "next" ptr points backward. */
				137	#define EXACTLY 5 /* str Match this string. */
				138	#define NOTHING 6 /* Match empty string. */
				139	#define STAR 7 /* node Match this (simple) thing 0 or more
				140	* times. */
				141	#define PLUS 8 /* node Match this (simple) thing 1 or more
				142	* times. */
				143	#define MATCH 9 /* node match the operand zero-width */
				144	#define NOMATCH 10 /* node check for no match with operand */
				145	#define BEHIND 11 /* node look behind for a match with operand */
				146	#define NOBEHIND 12 /* node look behind for no match with operand */
				147	#define SUBPAT 13 /* node match the operand here */
				148	#define BRACE_SIMPLE 14 /* node Match this (simple) thing between m and
				149	* n times (\{m,n\}). */
				150	#define BOW 15 /* Match "" after [^a-zA-Z0-9_] */
				151	#define EOW 16 /* Match "" at [^a-zA-Z0-9_] */
				152	#define BRACE_LIMITS 17 /* nr nr define the min & max for BRACE_SIMPLE
				153	* and BRACE_COMPLEX. */
				154	#define NEWL 18 /* Match line-break */
				155	#define BHPOS 19 /* End position for BEHIND or NOBEHIND */
				156
				157
				158	/* character classes: 20-48 normal, 50-78 include a line-break */
				159	#define ADD_NL 30
				160	#define FIRST_NL ANY + ADD_NL
				161	#define ANY 20 /* Match any one character. */
				162	#define ANYOF 21 /* str Match any character in this string. */
				163	#define ANYBUT 22 /* str Match any character not in this
				164	* string. */
				165	#define IDENT 23 /* Match identifier char */
				166	#define SIDENT 24 /* Match identifier char but no digit */
				167	#define KWORD 25 /* Match keyword char */
				168	#define SKWORD 26 /* Match word char but no digit */
				169	#define FNAME 27 /* Match file name char */
				170	#define SFNAME 28 /* Match file name char but no digit */
				171	#define PRINT 29 /* Match printable char */
				172	#define SPRINT 30 /* Match printable char but no digit */
				173	#define WHITE 31 /* Match whitespace char */
				174	#define NWHITE 32 /* Match non-whitespace char */
				175	#define DIGIT 33 /* Match digit char */
				176	#define NDIGIT 34 /* Match non-digit char */
				177	#define HEX 35 /* Match hex char */
				178	#define NHEX 36 /* Match non-hex char */
				179	#define OCTAL 37 /* Match octal char */
				180	#define NOCTAL 38 /* Match non-octal char */
				181	#define WORD 39 /* Match word char */
				182	#define NWORD 40 /* Match non-word char */
				183	#define HEAD 41 /* Match head char */
				184	#define NHEAD 42 /* Match non-head char */
				185	#define ALPHA 43 /* Match alpha char */
				186	#define NALPHA 44 /* Match non-alpha char */
				187	#define LOWER 45 /* Match lowercase char */
				188	#define NLOWER 46 /* Match non-lowercase char */
				189	#define UPPER 47 /* Match uppercase char */
				190	#define NUPPER 48 /* Match non-uppercase char */
				191	#define LAST_NL NUPPER + ADD_NL
				192	#define WITH_NL(op) ((op) >= FIRST_NL && (op) <= LAST_NL)
				193
				194	#define MOPEN 80 /* -89 Mark this point in input as start of
				195	* \( subexpr. MOPEN + 0 marks start of
				196	* match. */
				197	#define MCLOSE 90 /* -99 Analogous to MOPEN. MCLOSE + 0 marks
				198	* end of match. */
				199	#define BACKREF 100 /* -109 node Match same string again \1-\9 */
				200
				201	#ifdef FEAT_SYN_HL
				202	# define ZOPEN 110 /* -119 Mark this point in input as start of
				203	* \z( subexpr. */
				204	# define ZCLOSE 120 /* -129 Analogous to ZOPEN. */
				205	# define ZREF 130 /* -139 node Match external submatch \z1-\z9 */
				206	#endif
				207
				208	#define BRACE_COMPLEX 140 /* -149 node Match nodes between m & n times */
				209
				210	#define NOPEN 150 /* Mark this point in input as start of
				211	\%( subexpr. */
				212	#define NCLOSE 151 /* Analogous to NOPEN. */
				213
				214	#define MULTIBYTECODE 200 /* mbc Match one multi-byte character */
				215	#define RE_BOF 201 /* Match "" at beginning of file. */
				216	#define RE_EOF 202 /* Match "" at end of file. */
				217	#define CURSOR 203 /* Match location of cursor. */
				218
				219	#define RE_LNUM 204 /* nr cmp Match line number */
				220	#define RE_COL 205 /* nr cmp Match column number */
				221	#define RE_VCOL 206 /* nr cmp Match virtual column number */
				222
				223	/*
				224	* Magic characters have a special meaning, they don't match literally.
				225	* Magic characters are negative. This separates them from literal characters
				226	* (possibly multi-byte). Only ASCII characters can be Magic.
				227	*/
				228	#define Magic(x) ((int)(x) - 256)
				229	#define un_Magic(x) ((x) + 256)
				230	#define is_Magic(x) ((x) < 0)
				231
				232	static int no_Magic __ARGS((int x));
				233	static int toggle_Magic __ARGS((int x));
				234
				235	static int
				236	no_Magic(x)
				237	int x;
				238	{
				239	if (is_Magic(x))
				240	return un_Magic(x);
				241	return x;
				242	}
				243
				244	static int
				245	toggle_Magic(x)
				246	int x;
				247	{
				248	if (is_Magic(x))
				249	return un_Magic(x);
				250	return Magic(x);
				251	}
				252
				253	/*
				254	* The first byte of the regexp internal "program" is actually this magic
				255	* number; the start node begins in the second byte. It's used to catch the
				256	* most severe mutilation of the program by the caller.
				257	*/
				258
				259	#define REGMAGIC 0234
				260
				261	/*
				262	* Opcode notes:
				263	*
				264	* BRANCH The set of branches constituting a single choice are hooked
				265	* together with their "next" pointers, since precedence prevents
				266	* anything being concatenated to any individual branch. The
				267	* "next" pointer of the last BRANCH in a choice points to the
				268	* thing following the whole choice. This is also where the
				269	* final "next" pointer of each individual branch points; each
				270	* branch starts with the operand node of a BRANCH node.
				271	*
				272	* BACK Normal "next" pointers all implicitly point forward; BACK
				273	* exists to make loop structures possible.
				274	*
				275	* STAR,PLUS '=', and complex '*' and '+', are implemented as circular
				276	* BRANCH structures using BACK. Simple cases (one character
				277	* per match) are implemented with STAR and PLUS for speed
				278	* and to minimize recursive plunges.
				279	*
				280	* BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
				281	* node, and defines the min and max limits to be used for that
				282	* node.
				283	*
				284	* MOPEN,MCLOSE ...are numbered at compile time.
				285	* ZOPEN,ZCLOSE ...ditto
				286	*/
				287
				288	/*
				289	* A node is one char of opcode followed by two chars of "next" pointer.
				290	* "Next" pointers are stored as two 8-bit bytes, high order first. The
				291	* value is a positive offset from the opcode of the node containing it.
				292	* An operand, if any, simply follows the node. (Note that much of the
				293	* code generation knows about this implicit relationship.)
				294	*
				295	* Using two bytes for the "next" pointer is vast overkill for most things,
				296	* but allows patterns to get big without disasters.
				297	*/
				298	#define OP(p) ((int)*(p))
				299	#define NEXT(p) (((((p) + 1) & 0377) << 8) + (((p) + 2) & 0377))
				300	#define OPERAND(p) ((p) + 3)
				301	/* Obtain an operand that was stored as four bytes, MSB first. */
				302	#define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
				303	+ ((long)(p)[5] << 8) + (long)(p)[6])
				304	/* Obtain a second operand stored as four bytes. */
				305	#define OPERAND_MAX(p) OPERAND_MIN((p) + 4)
				306	/* Obtain a second single-byte operand stored after a four bytes operand. */
				307	#define OPERAND_CMP(p) (p)[7]
				308
				309	/*
				310	* Utility definitions.
				311	*/
				312	#define UCHARAT(p) ((int)(char_u )(p))
				313
				314	/* Used for an error (down from) vim_regcomp(): give the error message, set
				315	* rc_did_emsg and return NULL */
				316	#define EMSG_RET_NULL(m) { EMSG(m); rc_did_emsg = TRUE; return NULL; }
				317	#define EMSG_M_RET_NULL(m, c) { EMSG2(m, c ? "" : "\\"); rc_did_emsg = TRUE; return NULL; }
				318	#define EMSG_RET_FAIL(m) { EMSG(m); rc_did_emsg = TRUE; return FAIL; }
				319	#define EMSG_ONE_RET_NULL EMSG_M_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
				320
				321	#define MAX_LIMIT (32767L << 16L)
				322
				323	static int re_multi_type __ARGS((int));
				324	static int cstrncmp __ARGS((char_u s1, char_u s2, int *n));
				325	static char_u cstrchr __ARGS((char_u , int));
				326
				327	#ifdef DEBUG
				328	static void regdump __ARGS((char_u , regprog_T ));
				329	static char_u regprop __ARGS((char_u ));
				330	#endif
				331
				332	#define NOT_MULTI 0
				333	#define MULTI_ONE 1
				334	#define MULTI_MULT 2
				335	/*
				336	* Return NOT_MULTI if c is not a "multi" operator.
				337	* Return MULTI_ONE if c is a single "multi" operator.
				338	* Return MULTI_MULT if c is a multi "multi" operator.
				339	*/
				340	static int
				341	re_multi_type(c)
				342	int c;
				343	{
				344	if (c == Magic('@') \|\| c == Magic('=') \|\| c == Magic('?'))
				345	return MULTI_ONE;
				346	if (c == Magic('*') \|\| c == Magic('+') \|\| c == Magic('{'))
				347	return MULTI_MULT;
				348	return NOT_MULTI;
				349	}
				350
				351	/*
				352	* Flags to be passed up and down.
				353	*/
				354	#define HASWIDTH 0x1 /* Known never to match null string. */
				355	#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */
				356	#define SPSTART 0x4 /* Starts with * or +. */
				357	#define HASNL 0x8 /* Contains some \n. */
				358	#define HASLOOKBH 0x10 /* Contains "\@<=" or "\@<!". */
				359	#define WORST 0 /* Worst case. */
				360
				361	/*
				362	* When regcode is set to this value, code is not emitted and size is computed
				363	* instead.
				364	*/
				365	#define JUST_CALC_SIZE ((char_u *) -1)
				366
				367	static char_u *reg_prev_sub;
				368
				369	/*
				370	* REGEXP_INRANGE contains all characters which are always special in a []
				371	* range after '\'.
				372	* REGEXP_ABBR contains all characters which act as abbreviations after '\'.
				373	* These are:
				374	* \n - New line (NL).
				375	* \r - Carriage Return (CR).
				376	* \t - Tab (TAB).
				377	* \e - Escape (ESC).
				378	* \b - Backspace (Ctrl_H).
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	379	* \d - Character code in decimal, eg \d123
				380	* \o - Character code in octal, eg \o80
				381	* \x - Character code in hex, eg \x4a
				382	* \u - Multibyte character code, eg \u20ac
				383	* \U - Long multibyte character code, eg \U12345678
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	384	*/
				385	static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	386	static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	387
				388	static int backslash_trans __ARGS((int c));
				389	static int skip_class_name __ARGS((char_u **pp));
				390	static char_u skip_anyof __ARGS((char_u p));
				391	static void init_class_tab __ARGS((void));
				392
				393	/*
				394	* Translate '\x' to its control character, except "\n", which is Magic.
				395	*/
				396	static int
				397	backslash_trans(c)
				398	int c;
				399	{
				400	switch (c)
				401	{
				402	case 'r': return CAR;
				403	case 't': return TAB;
				404	case 'e': return ESC;
				405	case 'b': return BS;
				406	}
				407	return c;
				408	}
				409
				410	/*
				411	* Check for a character class name. "pp" points to the '['.
				412	* Returns one of the CLASS_ items. CLASS_NONE means that no item was
				413	* recognized. Otherwise "pp" is advanced to after the item.
				414	*/
				415	static int
				416	skip_class_name(pp)
				417	char_u **pp;
				418	{
				419	static const char *(class_names[]) =
				420	{
				421	"alnum:]",
				422	#define CLASS_ALNUM 0
				423	"alpha:]",
				424	#define CLASS_ALPHA 1
				425	"blank:]",
				426	#define CLASS_BLANK 2
				427	"cntrl:]",
				428	#define CLASS_CNTRL 3
				429	"digit:]",
				430	#define CLASS_DIGIT 4
				431	"graph:]",
				432	#define CLASS_GRAPH 5
				433	"lower:]",
				434	#define CLASS_LOWER 6
				435	"print:]",
				436	#define CLASS_PRINT 7
				437	"punct:]",
				438	#define CLASS_PUNCT 8
				439	"space:]",
				440	#define CLASS_SPACE 9
				441	"upper:]",
				442	#define CLASS_UPPER 10
				443	"xdigit:]",
				444	#define CLASS_XDIGIT 11
				445	"tab:]",
				446	#define CLASS_TAB 12
				447	"return:]",
				448	#define CLASS_RETURN 13
				449	"backspace:]",
				450	#define CLASS_BACKSPACE 14
				451	"escape:]",
				452	#define CLASS_ESCAPE 15
				453	};
				454	#define CLASS_NONE 99
				455	int i;
				456
				457	if ((*pp)[1] == ':')
				458	{
				459	for (i = 0; i < sizeof(class_names) / sizeof(*class_names); ++i)
				460	if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
				461	{
				462	*pp += STRLEN(class_names[i]) + 2;
				463	return i;
				464	}
				465	}
				466	return CLASS_NONE;
				467	}
				468
				469	/*
				470	* Skip over a "[]" range.
				471	* "p" must point to the character after the '['.
				472	* The returned pointer is on the matching ']', or the terminating NUL.
				473	*/
				474	static char_u *
				475	skip_anyof(p)
				476	char_u *p;
				477	{
				478	int cpo_lit; /* 'cpoptions' contains 'l' flag */
				479	#ifdef FEAT_MBYTE
				480	int l;
				481	#endif
				482
				483	cpo_lit = (!reg_syn && vim_strchr(p_cpo, CPO_LITERAL) != NULL);
				484
				485	if (p == '^') / Complement of range. */
				486	++p;
				487	if (p == ']' \|\| p == '-')
				488	++p;
				489	while (p != NUL && p != ']')
				490	{
				491	#ifdef FEAT_MBYTE
				492	if (has_mbyte && (l = (*mb_ptr2len_check)(p)) > 1)
				493	p += l;
				494	else
				495	#endif
				496	if (*p == '-')
				497	{
				498	++p;
				499	if (p != ']' && p != NUL)
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	500	mb_ptr_adv(p);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	501	}
				502	else if (*p == '\\'
				503	&& (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
				504	\|\| (!cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
				505	p += 2;
				506	else if (*p == '[')
				507	{
				508	if (skip_class_name(&p) == CLASS_NONE)
				509	++p; /* It was not a class name */
				510	}
				511	else
				512	++p;
				513	}
				514
				515	return p;
				516	}
				517
				518	/*
				519	* Specific version of character class functions.
				520	* Using a table to keep this fast.
				521	*/
				522	static short class_tab[256];
				523
				524	#define RI_DIGIT 0x01
				525	#define RI_HEX 0x02
				526	#define RI_OCTAL 0x04
				527	#define RI_WORD 0x08
				528	#define RI_HEAD 0x10
				529	#define RI_ALPHA 0x20
				530	#define RI_LOWER 0x40
				531	#define RI_UPPER 0x80
				532	#define RI_WHITE 0x100
				533
				534	static void
				535	init_class_tab()
				536	{
				537	int i;
				538	static int done = FALSE;
				539
				540	if (done)
				541	return;
				542
				543	for (i = 0; i < 256; ++i)
				544	{
				545	if (i >= '0' && i <= '7')
				546	class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
				547	else if (i >= '8' && i <= '9')
				548	class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
				549	else if (i >= 'a' && i <= 'f')
				550	class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
				551	#ifdef EBCDIC
				552	else if ((i >= 'g' && i <= 'i') \|\| (i >= 'j' && i <= 'r')
				553	\|\| (i >= 's' && i <= 'z'))
				554	#else
				555	else if (i >= 'g' && i <= 'z')
				556	#endif
				557	class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
				558	else if (i >= 'A' && i <= 'F')
				559	class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
				560	#ifdef EBCDIC
				561	else if ((i >= 'G' && i <= 'I') \|\| ( i >= 'J' && i <= 'R')
				562	\|\| (i >= 'S' && i <= 'Z'))
				563	#else
				564	else if (i >= 'G' && i <= 'Z')
				565	#endif
				566	class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
				567	else if (i == '_')
				568	class_tab[i] = RI_WORD + RI_HEAD;
				569	else
				570	class_tab[i] = 0;
				571	}
				572	class_tab[' '] \|= RI_WHITE;
				573	class_tab['\t'] \|= RI_WHITE;
				574	done = TRUE;
				575	}
				576
				577	#ifdef FEAT_MBYTE
				578	# define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT))
				579	# define ri_hex(c) (c < 0x100 && (class_tab[c] & RI_HEX))
				580	# define ri_octal(c) (c < 0x100 && (class_tab[c] & RI_OCTAL))
				581	# define ri_word(c) (c < 0x100 && (class_tab[c] & RI_WORD))
				582	# define ri_head(c) (c < 0x100 && (class_tab[c] & RI_HEAD))
				583	# define ri_alpha(c) (c < 0x100 && (class_tab[c] & RI_ALPHA))
				584	# define ri_lower(c) (c < 0x100 && (class_tab[c] & RI_LOWER))
				585	# define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER))
				586	# define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE))
				587	#else
				588	# define ri_digit(c) (class_tab[c] & RI_DIGIT)
				589	# define ri_hex(c) (class_tab[c] & RI_HEX)
				590	# define ri_octal(c) (class_tab[c] & RI_OCTAL)
				591	# define ri_word(c) (class_tab[c] & RI_WORD)
				592	# define ri_head(c) (class_tab[c] & RI_HEAD)
				593	# define ri_alpha(c) (class_tab[c] & RI_ALPHA)
				594	# define ri_lower(c) (class_tab[c] & RI_LOWER)
				595	# define ri_upper(c) (class_tab[c] & RI_UPPER)
				596	# define ri_white(c) (class_tab[c] & RI_WHITE)
				597	#endif
				598
				599	/* flags for regflags */
				600	#define RF_ICASE 1 /* ignore case */
				601	#define RF_NOICASE 2 /* don't ignore case */
				602	#define RF_HASNL 4 /* can match a NL */
				603	#define RF_ICOMBINE 8 /* ignore combining characters */
				604	#define RF_LOOKBH 16 /* uses "\@<=" or "\@<!" */
				605
				606	/*
				607	* Global work variables for vim_regcomp().
				608	*/
				609
				610	static char_u regparse; / Input-scan pointer. */
				611	static int prevchr_len; /* byte length of previous char */
				612	static int num_complex_braces; /* Complex \{...} count */
				613	static int regnpar; /* () count. */
				614	#ifdef FEAT_SYN_HL
				615	static int regnzpar; /* \z() count. */
				616	static int re_has_z; /* \z item detected */
				617	#endif
				618	static char_u regcode; / Code-emit pointer, or JUST_CALC_SIZE */
				619	static long regsize; /* Code size. */
				620	static char_u had_endbrace[NSUBEXP]; /* flags, TRUE if end of () found */
				621	static unsigned regflags; /* RF_ flags for prog */
				622	static long brace_min[10]; /* Minimums for complex brace repeats */
				623	static long brace_max[10]; /* Maximums for complex brace repeats */
				624	static int brace_count[10]; /* Current counts for complex brace repeats */
				625	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				626	static int had_eol; /* TRUE when EOL found by vim_regcomp() */
				627	#endif
				628	static int one_exactly = FALSE; /* only do one char for EXACTLY */
				629
				630	static int reg_magic; /* magicness of the pattern: */
				631	#define MAGIC_NONE 1 /* "\V" very unmagic */
				632	#define MAGIC_OFF 2 /* "\M" or 'magic' off */
				633	#define MAGIC_ON 3 /* "\m" or 'magic' */
				634	#define MAGIC_ALL 4 /* "\v" very magic */
				635
				636	static int reg_string; /* matching with a string instead of a buffer
				637	line */
				638
				639	/*
				640	* META contains all characters that may be magic, except '^' and '$'.
				641	*/
				642
				643	#ifdef EBCDIC
				644	static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{\|~";
				645	#else
				646	/* META[] is used often enough to justify turning it into a table. */
				647	static char_u META_flags[] = {
				648	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				649	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				650	/* % & ( ) * + . */
				651	0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
				652	/* 1 2 3 4 5 6 7 8 9 < = > ? */
				653	0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
				654	/* @ A C D F H I K L M O */
				655	1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
				656	/* P S U V W X Z [ _ */
				657	1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
				658	/* a c d f h i k l m n o */
				659	0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
				660	/* p s u v w x z { \| ~ */
				661	1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
				662	};
				663	#endif
				664
				665	static int curchr;
				666
				667	/* arguments for reg() */
				668	#define REG_NOPAREN 0 /* toplevel reg() */
				669	#define REG_PAREN 1 /* */
				670	#define REG_ZPAREN 2 /* \z(\) */
				671	#define REG_NPAREN 3 /* \%(\) */
				672
				673	/*
				674	* Forward declarations for vim_regcomp()'s friends.
				675	*/
				676	static void initchr __ARGS((char_u *));
				677	static int getchr __ARGS((void));
				678	static void skipchr_keepstart __ARGS((void));
				679	static int peekchr __ARGS((void));
				680	static void skipchr __ARGS((void));
				681	static void ungetchr __ARGS((void));
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	682	static int gethexchrs __ARGS((int maxinputlen));
				683	static int getoctchrs __ARGS((void));
				684	static int getdecchrs __ARGS((void));
				685	static int coll_get_char __ARGS((void));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	686	static void regcomp_start __ARGS((char_u *expr, int flags));
				687	static char_u reg __ARGS((int, int ));
				688	static char_u regbranch __ARGS((int flagp));
				689	static char_u regconcat __ARGS((int flagp));
				690	static char_u regpiece __ARGS((int ));
				691	static char_u regatom __ARGS((int ));
				692	static char_u *regnode __ARGS((int));
				693	static int prog_magic_wrong __ARGS((void));
				694	static char_u regnext __ARGS((char_u ));
				695	static void regc __ARGS((int b));
				696	#ifdef FEAT_MBYTE
				697	static void regmbc __ARGS((int c));
				698	#endif
				699	static void reginsert __ARGS((int, char_u *));
				700	static void reginsert_limits __ARGS((int, long, long, char_u *));
				701	static char_u re_put_long __ARGS((char_u pr, long_u val));
				702	static int read_limits __ARGS((long , long ));
				703	static void regtail __ARGS((char_u , char_u ));
				704	static void regoptail __ARGS((char_u , char_u ));
				705
				706	/*
				707	* Return TRUE if compiled regular expression "prog" can match a line break.
				708	*/
				709	int
				710	re_multiline(prog)
				711	regprog_T *prog;
				712	{
				713	return (prog->regflags & RF_HASNL);
				714	}
				715
				716	/*
				717	* Return TRUE if compiled regular expression "prog" looks before the start
				718	* position (pattern contains "\@<=" or "\@<!").
				719	*/
				720	int
				721	re_lookbehind(prog)
				722	regprog_T *prog;
				723	{
				724	return (prog->regflags & RF_LOOKBH);
				725	}
				726
				727	/*
				728	* Skip past regular expression.
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	729	* Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	730	* Take care of characters with a backslash in front of it.
				731	* Skip strings inside [ and ].
				732	* When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
				733	* expression and change "\?" to "?". If "*newp" is not NULL the expression
				734	* is changed in-place.
				735	*/
				736	char_u *
				737	skip_regexp(startp, dirc, magic, newp)
				738	char_u *startp;
				739	int dirc;
				740	int magic;
				741	char_u **newp;
				742	{
				743	int mymagic;
				744	char_u *p = startp;
				745
				746	if (magic)
				747	mymagic = MAGIC_ON;
				748	else
				749	mymagic = MAGIC_OFF;
				750
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	751	for (; p[0] != NUL; mb_ptr_adv(p))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	752	{
				753	if (p[0] == dirc) /* found end of regexp */
				754	break;
				755	if ((p[0] == '[' && mymagic >= MAGIC_ON)
				756	\|\| (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
				757	{
				758	p = skip_anyof(p + 1);
				759	if (p[0] == NUL)
				760	break;
				761	}
				762	else if (p[0] == '\\' && p[1] != NUL)
				763	{
				764	if (dirc == '?' && newp != NULL && p[1] == '?')
				765	{
				766	/* change "\?" to "?", make a copy first. */
				767	if (*newp == NULL)
				768	{
				769	*newp = vim_strsave(startp);
				770	if (*newp != NULL)
				771	p = *newp + (p - startp);
				772	}
				773	if (*newp != NULL)
				774	mch_memmove(p, p + 1, STRLEN(p));
				775	else
				776	++p;
				777	}
				778	else
				779	++p; /* skip next character */
				780	if (*p == 'v')
				781	mymagic = MAGIC_ALL;
				782	else if (*p == 'V')
				783	mymagic = MAGIC_NONE;
				784	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	785	}
				786	return p;
				787	}
				788
				789	/*
Bram Moolenaar	86b6835	2004-12-27 21:59:20 +0000	[diff] [blame]	790	* vim_regcomp() - compile a regular expression into internal code
				791	* Returns the program in allocated space. Returns NULL for an error.
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	792	*
				793	* We can't allocate space until we know how big the compiled form will be,
				794	* but we can't compile it (and thus know how big it is) until we've got a
				795	* place to put the code. So we cheat: we compile it twice, once with code
				796	* generation turned off and size counting turned on, and once "for real".
				797	* This also means that we don't allocate space until we are sure that the
				798	* thing really will compile successfully, and we never have to move the
				799	* code and thus invalidate pointers into it. (Note that it has to be in
				800	* one piece because vim_free() must be able to free it all.)
				801	*
				802	* Whether upper/lower case is to be ignored is decided when executing the
				803	* program, it does not matter here.
				804	*
				805	* Beware that the optimization-preparation code in here knows about some
				806	* of the structure of the compiled regexp.
				807	* "re_flags": RE_MAGIC and/or RE_STRING.
				808	*/
				809	regprog_T *
				810	vim_regcomp(expr, re_flags)
				811	char_u *expr;
				812	int re_flags;
				813	{
				814	regprog_T *r;
				815	char_u *scan;
				816	char_u *longest;
				817	int len;
				818	int flags;
				819
				820	if (expr == NULL)
				821	EMSG_RET_NULL(_(e_null));
				822
				823	init_class_tab();
				824
				825	/*
				826	* First pass: determine size, legality.
				827	*/
				828	regcomp_start(expr, re_flags);
				829	regcode = JUST_CALC_SIZE;
				830	regc(REGMAGIC);
				831	if (reg(REG_NOPAREN, &flags) == NULL)
				832	return NULL;
				833
				834	/* Small enough for pointer-storage convention? */
				835	#ifdef SMALL_MALLOC /* 16 bit storage allocation */
				836	if (regsize >= 65536L - 256L)
				837	EMSG_RET_NULL(_("E339: Pattern too long"));
				838	#endif
				839
				840	/* Allocate space. */
				841	r = (regprog_T *)lalloc(sizeof(regprog_T) + regsize, TRUE);
				842	if (r == NULL)
				843	return NULL;
				844
				845	/*
				846	* Second pass: emit code.
				847	*/
				848	regcomp_start(expr, re_flags);
				849	regcode = r->program;
				850	regc(REGMAGIC);
				851	if (reg(REG_NOPAREN, &flags) == NULL)
				852	{
				853	vim_free(r);
				854	return NULL;
				855	}
				856
				857	/* Dig out information for optimizations. */
				858	r->regstart = NUL; /* Worst-case defaults. */
				859	r->reganch = 0;
				860	r->regmust = NULL;
				861	r->regmlen = 0;
				862	r->regflags = regflags;
				863	if (flags & HASNL)
				864	r->regflags \|= RF_HASNL;
				865	if (flags & HASLOOKBH)
				866	r->regflags \|= RF_LOOKBH;
				867	#ifdef FEAT_SYN_HL
				868	/* Remember whether this pattern has any \z specials in it. */
				869	r->reghasz = re_has_z;
				870	#endif
				871	scan = r->program + 1; /* First BRANCH. */
				872	if (OP(regnext(scan)) == END) /* Only one top-level choice. */
				873	{
				874	scan = OPERAND(scan);
				875
				876	/* Starting-point info. */
				877	if (OP(scan) == BOL \|\| OP(scan) == RE_BOF)
				878	{
				879	r->reganch++;
				880	scan = regnext(scan);
				881	}
				882
				883	if (OP(scan) == EXACTLY)
				884	{
				885	#ifdef FEAT_MBYTE
				886	if (has_mbyte)
				887	r->regstart = (*mb_ptr2char)(OPERAND(scan));
				888	else
				889	#endif
				890	r->regstart = *OPERAND(scan);
				891	}
				892	else if ((OP(scan) == BOW
				893	\|\| OP(scan) == EOW
				894	\|\| OP(scan) == NOTHING
				895	\|\| OP(scan) == MOPEN + 0 \|\| OP(scan) == NOPEN
				896	\|\| OP(scan) == MCLOSE + 0 \|\| OP(scan) == NCLOSE)
				897	&& OP(regnext(scan)) == EXACTLY)
				898	{
				899	#ifdef FEAT_MBYTE
				900	if (has_mbyte)
				901	r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
				902	else
				903	#endif
				904	r->regstart = *OPERAND(regnext(scan));
				905	}
				906
				907	/*
				908	* If there's something expensive in the r.e., find the longest
				909	* literal string that must appear and make it the regmust. Resolve
				910	* ties in favor of later strings, since the regstart check works
				911	* with the beginning of the r.e. and avoiding duplication
				912	* strengthens checking. Not a strong reason, but sufficient in the
				913	* absence of others.
				914	*/
				915	/*
				916	* When the r.e. starts with BOW, it is faster to look for a regmust
				917	* first. Used a lot for "#" and "*" commands. (Added by mool).
				918	*/
				919	if ((flags & SPSTART \|\| OP(scan) == BOW \|\| OP(scan) == EOW)
				920	&& !(flags & HASNL))
				921	{
				922	longest = NULL;
				923	len = 0;
				924	for (; scan != NULL; scan = regnext(scan))
				925	if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
				926	{
				927	longest = OPERAND(scan);
				928	len = (int)STRLEN(OPERAND(scan));
				929	}
				930	r->regmust = longest;
				931	r->regmlen = len;
				932	}
				933	}
				934	#ifdef DEBUG
				935	regdump(expr, r);
				936	#endif
				937	return r;
				938	}
				939
				940	/*
				941	* Setup to parse the regexp. Used once to get the length and once to do it.
				942	*/
				943	static void
				944	regcomp_start(expr, re_flags)
				945	char_u *expr;
				946	int re_flags; /* see vim_regcomp() */
				947	{
				948	initchr(expr);
				949	if (re_flags & RE_MAGIC)
				950	reg_magic = MAGIC_ON;
				951	else
				952	reg_magic = MAGIC_OFF;
				953	reg_string = (re_flags & RE_STRING);
				954
				955	num_complex_braces = 0;
				956	regnpar = 1;
				957	vim_memset(had_endbrace, 0, sizeof(had_endbrace));
				958	#ifdef FEAT_SYN_HL
				959	regnzpar = 1;
				960	re_has_z = 0;
				961	#endif
				962	regsize = 0L;
				963	regflags = 0;
				964	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				965	had_eol = FALSE;
				966	#endif
				967	}
				968
				969	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				970	/*
				971	* Check if during the previous call to vim_regcomp the EOL item "$" has been
				972	* found. This is messy, but it works fine.
				973	*/
				974	int
				975	vim_regcomp_had_eol()
				976	{
				977	return had_eol;
				978	}
				979	#endif
				980
				981	/*
				982	* reg - regular expression, i.e. main body or parenthesized thing
				983	*
				984	* Caller must absorb opening parenthesis.
				985	*
				986	* Combining parenthesis handling with the base level of regular expression
				987	* is a trifle forced, but the need to tie the tails of the branches to what
				988	* follows makes it hard to avoid.
				989	*/
				990	static char_u *
				991	reg(paren, flagp)
				992	int paren; /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
				993	int *flagp;
				994	{
				995	char_u *ret;
				996	char_u *br;
				997	char_u *ender;
				998	int parno = 0;
				999	int flags;
				1000
				1001	flagp = HASWIDTH; / Tentatively. */
				1002
				1003	#ifdef FEAT_SYN_HL
				1004	if (paren == REG_ZPAREN)
				1005	{
				1006	/* Make a ZOPEN node. */
				1007	if (regnzpar >= NSUBEXP)
				1008	EMSG_RET_NULL(_("E50: Too many \\z("));
				1009	parno = regnzpar;
				1010	regnzpar++;
				1011	ret = regnode(ZOPEN + parno);
				1012	}
				1013	else
				1014	#endif
				1015	if (paren == REG_PAREN)
				1016	{
				1017	/* Make a MOPEN node. */
				1018	if (regnpar >= NSUBEXP)
				1019	EMSG_M_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
				1020	parno = regnpar;
				1021	++regnpar;
				1022	ret = regnode(MOPEN + parno);
				1023	}
				1024	else if (paren == REG_NPAREN)
				1025	{
				1026	/* Make a NOPEN node. */
				1027	ret = regnode(NOPEN);
				1028	}
				1029	else
				1030	ret = NULL;
				1031
				1032	/* Pick up the branches, linking them together. */
				1033	br = regbranch(&flags);
				1034	if (br == NULL)
				1035	return NULL;
				1036	if (ret != NULL)
				1037	regtail(ret, br); /* [MZ]OPEN -> first. */
				1038	else
				1039	ret = br;
				1040	/* If one of the branches can be zero-width, the whole thing can.
				1041	* If one of the branches has * at start or matches a line-break, the
				1042	* whole thing can. */
				1043	if (!(flags & HASWIDTH))
				1044	*flagp &= ~HASWIDTH;
				1045	*flagp \|= flags & (SPSTART \| HASNL \| HASLOOKBH);
				1046	while (peekchr() == Magic('\|'))
				1047	{
				1048	skipchr();
				1049	br = regbranch(&flags);
				1050	if (br == NULL)
				1051	return NULL;
				1052	regtail(ret, br); /* BRANCH -> BRANCH. */
				1053	if (!(flags & HASWIDTH))
				1054	*flagp &= ~HASWIDTH;
				1055	*flagp \|= flags & (SPSTART \| HASNL \| HASLOOKBH);
				1056	}
				1057
				1058	/* Make a closing node, and hook it on the end. */
				1059	ender = regnode(
				1060	#ifdef FEAT_SYN_HL
				1061	paren == REG_ZPAREN ? ZCLOSE + parno :
				1062	#endif
				1063	paren == REG_PAREN ? MCLOSE + parno :
				1064	paren == REG_NPAREN ? NCLOSE : END);
				1065	regtail(ret, ender);
				1066
				1067	/* Hook the tails of the branches to the closing node. */
				1068	for (br = ret; br != NULL; br = regnext(br))
				1069	regoptail(br, ender);
				1070
				1071	/* Check for proper termination. */
				1072	if (paren != REG_NOPAREN && getchr() != Magic(')'))
				1073	{
				1074	#ifdef FEAT_SYN_HL
				1075	if (paren == REG_ZPAREN)
				1076	EMSG_RET_NULL(_("E52: Unmatched \\z("))
				1077	else
				1078	#endif
				1079	if (paren == REG_NPAREN)
				1080	EMSG_M_RET_NULL(_("E53: Unmatched %s%%("), reg_magic == MAGIC_ALL)
				1081	else
				1082	EMSG_M_RET_NULL(_("E54: Unmatched %s("), reg_magic == MAGIC_ALL)
				1083	}
				1084	else if (paren == REG_NOPAREN && peekchr() != NUL)
				1085	{
				1086	if (curchr == Magic(')'))
				1087	EMSG_M_RET_NULL(_("E55: Unmatched %s)"), reg_magic == MAGIC_ALL)
				1088	else
				1089	EMSG_RET_NULL(_(e_trailing)) /* "Can't happen". */
				1090	/* NOTREACHED */
				1091	}
				1092	/*
				1093	* Here we set the flag allowing back references to this set of
				1094	* parentheses.
				1095	*/
				1096	if (paren == REG_PAREN)
				1097	had_endbrace[parno] = TRUE; /* have seen the close paren */
				1098	return ret;
				1099	}
				1100
				1101	/*
				1102	* regbranch - one alternative of an \| operator
				1103	*
				1104	* Implements the & operator.
				1105	*/
				1106	static char_u *
				1107	regbranch(flagp)
				1108	int *flagp;
				1109	{
				1110	char_u *ret;
				1111	char_u *chain = NULL;
				1112	char_u *latest;
				1113	int flags;
				1114
				1115	flagp = WORST \| HASNL; / Tentatively. */
				1116
				1117	ret = regnode(BRANCH);
				1118	for (;;)
				1119	{
				1120	latest = regconcat(&flags);
				1121	if (latest == NULL)
				1122	return NULL;
				1123	/* If one of the branches has width, the whole thing has. If one of
				1124	* the branches anchors at start-of-line, the whole thing does.
				1125	* If one of the branches uses look-behind, the whole thing does. */
				1126	*flagp \|= flags & (HASWIDTH \| SPSTART \| HASLOOKBH);
				1127	/* If one of the branches doesn't match a line-break, the whole thing
				1128	* doesn't. */
				1129	*flagp &= ~HASNL \| (flags & HASNL);
				1130	if (chain != NULL)
				1131	regtail(chain, latest);
				1132	if (peekchr() != Magic('&'))
				1133	break;
				1134	skipchr();
				1135	regtail(latest, regnode(END)); /* operand ends */
				1136	reginsert(MATCH, latest);
				1137	chain = latest;
				1138	}
				1139
				1140	return ret;
				1141	}
				1142
				1143	/*
				1144	* regbranch - one alternative of an \| or & operator
				1145	*
				1146	* Implements the concatenation operator.
				1147	*/
				1148	static char_u *
				1149	regconcat(flagp)
				1150	int *flagp;
				1151	{
				1152	char_u *first = NULL;
				1153	char_u *chain = NULL;
				1154	char_u *latest;
				1155	int flags;
				1156	int cont = TRUE;
				1157
				1158	flagp = WORST; / Tentatively. */
				1159
				1160	while (cont)
				1161	{
				1162	switch (peekchr())
				1163	{
				1164	case NUL:
				1165	case Magic('\|'):
				1166	case Magic('&'):
				1167	case Magic(')'):
				1168	cont = FALSE;
				1169	break;
				1170	case Magic('Z'):
				1171	#ifdef FEAT_MBYTE
				1172	regflags \|= RF_ICOMBINE;
				1173	#endif
				1174	skipchr_keepstart();
				1175	break;
				1176	case Magic('c'):
				1177	regflags \|= RF_ICASE;
				1178	skipchr_keepstart();
				1179	break;
				1180	case Magic('C'):
				1181	regflags \|= RF_NOICASE;
				1182	skipchr_keepstart();
				1183	break;
				1184	case Magic('v'):
				1185	reg_magic = MAGIC_ALL;
				1186	skipchr_keepstart();
				1187	curchr = -1;
				1188	break;
				1189	case Magic('m'):
				1190	reg_magic = MAGIC_ON;
				1191	skipchr_keepstart();
				1192	curchr = -1;
				1193	break;
				1194	case Magic('M'):
				1195	reg_magic = MAGIC_OFF;
				1196	skipchr_keepstart();
				1197	curchr = -1;
				1198	break;
				1199	case Magic('V'):
				1200	reg_magic = MAGIC_NONE;
				1201	skipchr_keepstart();
				1202	curchr = -1;
				1203	break;
				1204	default:
				1205	latest = regpiece(&flags);
				1206	if (latest == NULL)
				1207	return NULL;
				1208	*flagp \|= flags & (HASWIDTH \| HASNL \| HASLOOKBH);
				1209	if (chain == NULL) /* First piece. */
				1210	*flagp \|= flags & SPSTART;
				1211	else
				1212	regtail(chain, latest);
				1213	chain = latest;
				1214	if (first == NULL)
				1215	first = latest;
				1216	break;
				1217	}
				1218	}
				1219	if (first == NULL) /* Loop ran zero times. */
				1220	first = regnode(NOTHING);
				1221	return first;
				1222	}
				1223
				1224	/*
				1225	* regpiece - something followed by possible [*+=]
				1226	*
				1227	* Note that the branching code sequences used for = and the general cases
				1228	* of * and + are somewhat optimized: they use the same NOTHING node as
				1229	* both the endmarker for their branch list and the body of the last branch.
				1230	* It might seem that this node could be dispensed with entirely, but the
				1231	* endmarker role is not redundant.
				1232	*/
				1233	static char_u *
				1234	regpiece(flagp)
				1235	int *flagp;
				1236	{
				1237	char_u *ret;
				1238	int op;
				1239	char_u *next;
				1240	int flags;
				1241	long minval;
				1242	long maxval;
				1243
				1244	ret = regatom(&flags);
				1245	if (ret == NULL)
				1246	return NULL;
				1247
				1248	op = peekchr();
				1249	if (re_multi_type(op) == NOT_MULTI)
				1250	{
				1251	*flagp = flags;
				1252	return ret;
				1253	}
				1254	if (!(flags & HASWIDTH) && re_multi_type(op) == MULTI_MULT)
				1255	{
				1256	if (op == Magic('*'))
				1257	EMSG_M_RET_NULL(_("E56: %s* operand could be empty"),
				1258	reg_magic >= MAGIC_ON);
				1259	if (op == Magic('+'))
				1260	EMSG_M_RET_NULL(_("E57: %s+ operand could be empty"),
				1261	reg_magic == MAGIC_ALL);
				1262	/* "\{}" is checked below, it's allowed when there is an upper limit */
				1263	}
				1264	/* default flags */
				1265	*flagp = (WORST \| SPSTART \| (flags & (HASNL \| HASLOOKBH)));
				1266
				1267	skipchr();
				1268	switch (op)
				1269	{
				1270	case Magic('*'):
				1271	if (flags & SIMPLE)
				1272	reginsert(STAR, ret);
				1273	else
				1274	{
				1275	/* Emit x* as (x&\|), where & means "self". */
				1276	reginsert(BRANCH, ret); /* Either x */
				1277	regoptail(ret, regnode(BACK)); /* and loop */
				1278	regoptail(ret, ret); /* back */
				1279	regtail(ret, regnode(BRANCH)); /* or */
				1280	regtail(ret, regnode(NOTHING)); /* null. */
				1281	}
				1282	break;
				1283
				1284	case Magic('+'):
				1285	if (flags & SIMPLE)
				1286	reginsert(PLUS, ret);
				1287	else
				1288	{
				1289	/* Emit x+ as x(&\|), where & means "self". */
				1290	next = regnode(BRANCH); /* Either */
				1291	regtail(ret, next);
				1292	regtail(regnode(BACK), ret); /* loop back */
				1293	regtail(next, regnode(BRANCH)); /* or */
				1294	regtail(ret, regnode(NOTHING)); /* null. */
				1295	}
				1296	*flagp = (WORST \| HASWIDTH \| (flags & (HASNL \| HASLOOKBH)));
				1297	break;
				1298
				1299	case Magic('@'):
				1300	{
				1301	int lop = END;
				1302
				1303	switch (no_Magic(getchr()))
				1304	{
				1305	case '=': lop = MATCH; break; /* \@= */
				1306	case '!': lop = NOMATCH; break; /* \@! */
				1307	case '>': lop = SUBPAT; break; /* \@> */
				1308	case '<': switch (no_Magic(getchr()))
				1309	{
				1310	case '=': lop = BEHIND; break; /* \@<= */
				1311	case '!': lop = NOBEHIND; break; /* \@<! */
				1312	}
				1313	}
				1314	if (lop == END)
				1315	EMSG_M_RET_NULL(_("E59: invalid character after %s@"),
				1316	reg_magic == MAGIC_ALL);
				1317	/* Look behind must match with behind_pos. */
				1318	if (lop == BEHIND \|\| lop == NOBEHIND)
				1319	{
				1320	regtail(ret, regnode(BHPOS));
				1321	*flagp \|= HASLOOKBH;
				1322	}
				1323	regtail(ret, regnode(END)); /* operand ends */
				1324	reginsert(lop, ret);
				1325	break;
				1326	}
				1327
				1328	case Magic('?'):
				1329	case Magic('='):
				1330	/* Emit x= as (x\|) */
				1331	reginsert(BRANCH, ret); /* Either x */
				1332	regtail(ret, regnode(BRANCH)); /* or */
				1333	next = regnode(NOTHING); /* null. */
				1334	regtail(ret, next);
				1335	regoptail(ret, next);
				1336	break;
				1337
				1338	case Magic('{'):
				1339	if (!read_limits(&minval, &maxval))
				1340	return NULL;
				1341	if (!(flags & HASWIDTH) && (maxval > minval
				1342	? maxval >= MAX_LIMIT : minval >= MAX_LIMIT))
				1343	EMSG_M_RET_NULL(_("E58: %s{ operand could be empty"),
				1344	reg_magic == MAGIC_ALL);
				1345	if (flags & SIMPLE)
				1346	{
				1347	reginsert(BRACE_SIMPLE, ret);
				1348	reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
				1349	}
				1350	else
				1351	{
				1352	if (num_complex_braces >= 10)
				1353	EMSG_M_RET_NULL(_("E60: Too many complex %s{...}s"),
				1354	reg_magic == MAGIC_ALL);
				1355	reginsert(BRACE_COMPLEX + num_complex_braces, ret);
				1356	regoptail(ret, regnode(BACK));
				1357	regoptail(ret, ret);
				1358	reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
				1359	++num_complex_braces;
				1360	}
				1361	if (minval > 0 && maxval > 0)
				1362	*flagp = (HASWIDTH \| (flags & (HASNL \| HASLOOKBH)));
				1363	break;
				1364	}
				1365	if (re_multi_type(peekchr()) != NOT_MULTI)
				1366	{
				1367	/* Can't have a multi follow a multi. */
				1368	if (peekchr() == Magic('*'))
				1369	sprintf((char )IObuff, _("E61: Nested %s"),
				1370	reg_magic >= MAGIC_ON ? "" : "\\");
				1371	else
				1372	sprintf((char *)IObuff, _("E62: Nested %s%c"),
				1373	reg_magic == MAGIC_ALL ? "" : "\\", no_Magic(peekchr()));
				1374	EMSG_RET_NULL(IObuff);
				1375	}
				1376
				1377	return ret;
				1378	}
				1379
				1380	/*
				1381	* regatom - the lowest level
				1382	*
				1383	* Optimization: gobbles an entire sequence of ordinary characters so that
				1384	* it can turn them into a single node, which is smaller to store and
				1385	* faster to run. Don't do this when one_exactly is set.
				1386	*/
				1387	static char_u *
				1388	regatom(flagp)
				1389	int *flagp;
				1390	{
				1391	char_u *ret;
				1392	int flags;
				1393	int cpo_lit; /* 'cpoptions' contains 'l' flag */
				1394	int c;
				1395	static char_u classchars = (char_u )".iIkKfFpPsSdDxXoOwWhHaAlLuU";
				1396	static int classcodes[] = {ANY, IDENT, SIDENT, KWORD, SKWORD,
				1397	FNAME, SFNAME, PRINT, SPRINT,
				1398	WHITE, NWHITE, DIGIT, NDIGIT,
				1399	HEX, NHEX, OCTAL, NOCTAL,
				1400	WORD, NWORD, HEAD, NHEAD,
				1401	ALPHA, NALPHA, LOWER, NLOWER,
				1402	UPPER, NUPPER
				1403	};
				1404	char_u *p;
				1405	int extra = 0;
				1406
				1407	flagp = WORST; / Tentatively. */
				1408	cpo_lit = (!reg_syn && vim_strchr(p_cpo, CPO_LITERAL) != NULL);
				1409
				1410	c = getchr();
				1411	switch (c)
				1412	{
				1413	case Magic('^'):
				1414	ret = regnode(BOL);
				1415	break;
				1416
				1417	case Magic('$'):
				1418	ret = regnode(EOL);
				1419	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				1420	had_eol = TRUE;
				1421	#endif
				1422	break;
				1423
				1424	case Magic('<'):
				1425	ret = regnode(BOW);
				1426	break;
				1427
				1428	case Magic('>'):
				1429	ret = regnode(EOW);
				1430	break;
				1431
				1432	case Magic('_'):
				1433	c = no_Magic(getchr());
				1434	if (c == '^') /* "\_^" is start-of-line */
				1435	{
				1436	ret = regnode(BOL);
				1437	break;
				1438	}
				1439	if (c == '$') /* "\_$" is end-of-line */
				1440	{
				1441	ret = regnode(EOL);
				1442	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				1443	had_eol = TRUE;
				1444	#endif
				1445	break;
				1446	}
				1447
				1448	extra = ADD_NL;
				1449	*flagp \|= HASNL;
				1450
				1451	/* "\_[" is character range plus newline */
				1452	if (c == '[')
				1453	goto collection;
				1454
				1455	/* "\_x" is character class plus newline */
				1456	/FALLTHROUGH/
				1457
				1458	/*
				1459	* Character classes.
				1460	*/
				1461	case Magic('.'):
				1462	case Magic('i'):
				1463	case Magic('I'):
				1464	case Magic('k'):
				1465	case Magic('K'):
				1466	case Magic('f'):
				1467	case Magic('F'):
				1468	case Magic('p'):
				1469	case Magic('P'):
				1470	case Magic('s'):
				1471	case Magic('S'):
				1472	case Magic('d'):
				1473	case Magic('D'):
				1474	case Magic('x'):
				1475	case Magic('X'):
				1476	case Magic('o'):
				1477	case Magic('O'):
				1478	case Magic('w'):
				1479	case Magic('W'):
				1480	case Magic('h'):
				1481	case Magic('H'):
				1482	case Magic('a'):
				1483	case Magic('A'):
				1484	case Magic('l'):
				1485	case Magic('L'):
				1486	case Magic('u'):
				1487	case Magic('U'):
				1488	p = vim_strchr(classchars, no_Magic(c));
				1489	if (p == NULL)
				1490	EMSG_RET_NULL(_("E63: invalid use of \\_"));
				1491	ret = regnode(classcodes[p - classchars] + extra);
				1492	*flagp \|= HASWIDTH \| SIMPLE;
				1493	break;
				1494
				1495	case Magic('n'):
				1496	if (reg_string)
				1497	{
				1498	/* In a string "\n" matches a newline character. */
				1499	ret = regnode(EXACTLY);
				1500	regc(NL);
				1501	regc(NUL);
				1502	*flagp \|= HASWIDTH \| SIMPLE;
				1503	}
				1504	else
				1505	{
				1506	/* In buffer text "\n" matches the end of a line. */
				1507	ret = regnode(NEWL);
				1508	*flagp \|= HASWIDTH \| HASNL;
				1509	}
				1510	break;
				1511
				1512	case Magic('('):
				1513	if (one_exactly)
				1514	EMSG_ONE_RET_NULL;
				1515	ret = reg(REG_PAREN, &flags);
				1516	if (ret == NULL)
				1517	return NULL;
				1518	*flagp \|= flags & (HASWIDTH \| SPSTART \| HASNL \| HASLOOKBH);
				1519	break;
				1520
				1521	case NUL:
				1522	case Magic('\|'):
				1523	case Magic('&'):
				1524	case Magic(')'):
				1525	EMSG_RET_NULL(_(e_internal)); /* Supposed to be caught earlier. */
				1526	/* NOTREACHED */
				1527
				1528	case Magic('='):
				1529	case Magic('?'):
				1530	case Magic('+'):
				1531	case Magic('@'):
				1532	case Magic('{'):
				1533	case Magic('*'):
				1534	c = no_Magic(c);
				1535	sprintf((char *)IObuff, _("E64: %s%c follows nothing"),
				1536	(c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL)
				1537	? "" : "\\", c);
				1538	EMSG_RET_NULL(IObuff);
				1539	/* NOTREACHED */
				1540
				1541	case Magic('~'): /* previous substitute pattern */
				1542	if (reg_prev_sub)
				1543	{
				1544	char_u *lp;
				1545
				1546	ret = regnode(EXACTLY);
				1547	lp = reg_prev_sub;
				1548	while (*lp != NUL)
				1549	regc(*lp++);
				1550	regc(NUL);
				1551	if (*reg_prev_sub != NUL)
				1552	{
				1553	*flagp \|= HASWIDTH;
				1554	if ((lp - reg_prev_sub) == 1)
				1555	*flagp \|= SIMPLE;
				1556	}
				1557	}
				1558	else
				1559	EMSG_RET_NULL(_(e_nopresub));
				1560	break;
				1561
				1562	case Magic('1'):
				1563	case Magic('2'):
				1564	case Magic('3'):
				1565	case Magic('4'):
				1566	case Magic('5'):
				1567	case Magic('6'):
				1568	case Magic('7'):
				1569	case Magic('8'):
				1570	case Magic('9'):
				1571	{
				1572	int refnum;
				1573
				1574	refnum = c - Magic('0');
				1575	/*
				1576	* Check if the back reference is legal. We must have seen the
				1577	* close brace.
				1578	* TODO: Should also check that we don't refer to something
				1579	* that is repeated (+*=): what instance of the repetition
				1580	* should we match?
				1581	*/
				1582	if (!had_endbrace[refnum])
				1583	{
				1584	/* Trick: check if "@<=" or "@<!" follows, in which case
				1585	* the \1 can appear before the referenced match. */
				1586	for (p = regparse; *p != NUL; ++p)
				1587	if (p[0] == '@' && p[1] == '<'
				1588	&& (p[2] == '!' \|\| p[2] == '='))
				1589	break;
				1590	if (*p == NUL)
				1591	EMSG_RET_NULL(_("E65: Illegal back reference"));
				1592	}
				1593	ret = regnode(BACKREF + refnum);
				1594	}
				1595	break;
				1596
				1597	#ifdef FEAT_SYN_HL
				1598	case Magic('z'):
				1599	{
				1600	c = no_Magic(getchr());
				1601	switch (c)
				1602	{
				1603	case '(': if (reg_do_extmatch != REX_SET)
				1604	EMSG_RET_NULL(_("E66: \\z( not allowed here"));
				1605	if (one_exactly)
				1606	EMSG_ONE_RET_NULL;
				1607	ret = reg(REG_ZPAREN, &flags);
				1608	if (ret == NULL)
				1609	return NULL;
				1610	*flagp \|= flags & (HASWIDTH\|SPSTART\|HASNL\|HASLOOKBH);
				1611	re_has_z = REX_SET;
				1612	break;
				1613
				1614	case '1':
				1615	case '2':
				1616	case '3':
				1617	case '4':
				1618	case '5':
				1619	case '6':
				1620	case '7':
				1621	case '8':
				1622	case '9': if (reg_do_extmatch != REX_USE)
				1623	EMSG_RET_NULL(_("E67: \\z1 et al. not allowed here"));
				1624	ret = regnode(ZREF + c - '0');
				1625	re_has_z = REX_USE;
				1626	break;
				1627
				1628	case 's': ret = regnode(MOPEN + 0);
				1629	break;
				1630
				1631	case 'e': ret = regnode(MCLOSE + 0);
				1632	break;
				1633
				1634	default: EMSG_RET_NULL(_("E68: Invalid character after \\z"));
				1635	}
				1636	}
				1637	break;
				1638	#endif
				1639
				1640	case Magic('%'):
				1641	{
				1642	c = no_Magic(getchr());
				1643	switch (c)
				1644	{
				1645	/* () without a back reference */
				1646	case '(':
				1647	if (one_exactly)
				1648	EMSG_ONE_RET_NULL;
				1649	ret = reg(REG_NPAREN, &flags);
				1650	if (ret == NULL)
				1651	return NULL;
				1652	*flagp \|= flags & (HASWIDTH \| SPSTART \| HASNL \| HASLOOKBH);
				1653	break;
				1654
				1655	/* Catch \%^ and \%$ regardless of where they appear in the
				1656	* pattern -- regardless of whether or not it makes sense. */
				1657	case '^':
				1658	ret = regnode(RE_BOF);
				1659	break;
				1660
				1661	case '$':
				1662	ret = regnode(RE_EOF);
				1663	break;
				1664
				1665	case '#':
				1666	ret = regnode(CURSOR);
				1667	break;
				1668
				1669	/* \%[abc]: Emit as a list of branches, all ending at the last
				1670	* branch which matches nothing. */
				1671	case '[':
				1672	if (one_exactly) /* doesn't nest */
				1673	EMSG_ONE_RET_NULL;
				1674	{
				1675	char_u *lastbranch;
				1676	char_u *lastnode = NULL;
				1677	char_u *br;
				1678
				1679	ret = NULL;
				1680	while ((c = getchr()) != ']')
				1681	{
				1682	if (c == NUL)
				1683	EMSG_M_RET_NULL(_("E69: Missing ] after %s%%["),
				1684	reg_magic == MAGIC_ALL);
				1685	br = regnode(BRANCH);
				1686	if (ret == NULL)
				1687	ret = br;
				1688	else
				1689	regtail(lastnode, br);
				1690
				1691	ungetchr();
				1692	one_exactly = TRUE;
				1693	lastnode = regatom(flagp);
				1694	one_exactly = FALSE;
				1695	if (lastnode == NULL)
				1696	return NULL;
				1697	}
				1698	if (ret == NULL)
				1699	EMSG_M_RET_NULL(_("E70: Empty %s%%[]"),
				1700	reg_magic == MAGIC_ALL);
				1701	lastbranch = regnode(BRANCH);
				1702	br = regnode(NOTHING);
				1703	if (ret != JUST_CALC_SIZE)
				1704	{
				1705	regtail(lastnode, br);
				1706	regtail(lastbranch, br);
				1707	/* connect all branches to the NOTHING
				1708	* branch at the end */
				1709	for (br = ret; br != lastnode; )
				1710	{
				1711	if (OP(br) == BRANCH)
				1712	{
				1713	regtail(br, lastbranch);
				1714	br = OPERAND(br);
				1715	}
				1716	else
				1717	br = regnext(br);
				1718	}
				1719	}
				1720	*flagp &= ~HASWIDTH;
				1721	break;
				1722	}
				1723
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	1724	case 'd': /* %d123 decimal */
				1725	case 'o': /* %o123 octal */
				1726	case 'x': /* %xab hex 2 */
				1727	case 'u': /* %uabcd hex 4 */
				1728	case 'U': /* %U1234abcd hex 8 */
				1729	{
				1730	int i;
				1731
				1732	switch (c)
				1733	{
				1734	case 'd': i = getdecchrs(); break;
				1735	case 'o': i = getoctchrs(); break;
				1736	case 'x': i = gethexchrs(2); break;
				1737	case 'u': i = gethexchrs(4); break;
				1738	case 'U': i = gethexchrs(8); break;
				1739	default: i = -1; break;
				1740	}
				1741
				1742	if (i < 0)
				1743	EMSG_M_RET_NULL(
				1744	_("E678: Invalid character after %s%%[dxouU]"),
				1745	reg_magic == MAGIC_ALL);
				1746	ret = regnode(EXACTLY);
				1747	if (i == 0)
				1748	regc(0x0a);
				1749	else
				1750	#ifdef FEAT_MBYTE
				1751	regmbc(i);
				1752	#else
				1753	regc(i);
				1754	#endif
				1755	regc(NUL);
				1756	*flagp \|= HASWIDTH;
				1757	break;
				1758	}
				1759
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1760	default:
				1761	if (VIM_ISDIGIT(c) \|\| c == '<' \|\| c == '>')
				1762	{
				1763	long_u n = 0;
				1764	int cmp;
				1765
				1766	cmp = c;
				1767	if (cmp == '<' \|\| cmp == '>')
				1768	c = getchr();
				1769	while (VIM_ISDIGIT(c))
				1770	{
				1771	n = n * 10 + (c - '0');
				1772	c = getchr();
				1773	}
				1774	if (c == 'l' \|\| c == 'c' \|\| c == 'v')
				1775	{
				1776	if (c == 'l')
				1777	ret = regnode(RE_LNUM);
				1778	else if (c == 'c')
				1779	ret = regnode(RE_COL);
				1780	else
				1781	ret = regnode(RE_VCOL);
				1782	if (ret == JUST_CALC_SIZE)
				1783	regsize += 5;
				1784	else
				1785	{
				1786	/* put the number and the optional
				1787	* comparator after the opcode */
				1788	regcode = re_put_long(regcode, n);
				1789	*regcode++ = cmp;
				1790	}
				1791	break;
				1792	}
				1793	}
				1794
				1795	EMSG_M_RET_NULL(_("E71: Invalid character after %s%%"),
				1796	reg_magic == MAGIC_ALL);
				1797	}
				1798	}
				1799	break;
				1800
				1801	case Magic('['):
				1802	collection:
				1803	{
				1804	char_u *lp;
				1805
				1806	/*
				1807	* If there is no matching ']', we assume the '[' is a normal
				1808	* character. This makes 'incsearch' and ":help [" work.
				1809	*/
				1810	lp = skip_anyof(regparse);
				1811	if (lp == ']') / there is a matching ']' */
				1812	{
				1813	int startc = -1; /* > 0 when next '-' is a range */
				1814	int endc;
				1815
				1816	/*
				1817	* In a character class, different parsing rules apply.
				1818	* Not even \ is special anymore, nothing is.
				1819	*/
				1820	if (regparse == '^') / Complement of range. */
				1821	{
				1822	ret = regnode(ANYBUT + extra);
				1823	regparse++;
				1824	}
				1825	else
				1826	ret = regnode(ANYOF + extra);
				1827
				1828	/* At the start ']' and '-' mean the literal character. */
				1829	if (regparse == ']' \|\| regparse == '-')
				1830	regc(*regparse++);
				1831
				1832	while (regparse != NUL && regparse != ']')
				1833	{
				1834	if (*regparse == '-')
				1835	{
				1836	++regparse;
				1837	/* The '-' is not used for a range at the end and
				1838	* after or before a '\n'. */
				1839	if (regparse == ']' \|\| regparse == NUL
				1840	\|\| startc == -1
				1841	\|\| (regparse[0] == '\\' && regparse[1] == 'n'))
				1842	{
				1843	regc('-');
				1844	startc = '-'; /* [--x] is a range */
				1845	}
				1846	else
				1847	{
				1848	#ifdef FEAT_MBYTE
				1849	if (has_mbyte)
				1850	endc = mb_ptr2char_adv(&regparse);
				1851	else
				1852	#endif
				1853	endc = *regparse++;
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	1854
				1855	/* Handle \o40, \x20 and \u20AC style sequences */
				1856	if (endc == '\\' && !cpo_lit)
				1857	endc = coll_get_char();
				1858
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1859	if (startc > endc)
				1860	EMSG_RET_NULL(_(e_invrange));
				1861	#ifdef FEAT_MBYTE
				1862	if (has_mbyte && ((*mb_char2len)(startc) > 1
				1863	\|\| (*mb_char2len)(endc) > 1))
				1864	{
				1865	/* Limit to a range of 256 chars */
				1866	if (endc > startc + 256)
				1867	EMSG_RET_NULL(_(e_invrange));
				1868	while (++startc <= endc)
				1869	regmbc(startc);
				1870	}
				1871	else
				1872	#endif
				1873	{
				1874	#ifdef EBCDIC
				1875	int alpha_only = FALSE;
				1876
				1877	/* for alphabetical range skip the gaps
				1878	* 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'. */
				1879	if (isalpha(startc) && isalpha(endc))
				1880	alpha_only = TRUE;
				1881	#endif
				1882	while (++startc <= endc)
				1883	#ifdef EBCDIC
				1884	if (!alpha_only \|\| isalpha(startc))
				1885	#endif
				1886	regc(startc);
				1887	}
				1888	startc = -1;
				1889	}
				1890	}
				1891	/*
				1892	* Only "\]", "\^", "\]" and "\\" are special in Vi. Vim
				1893	* accepts "\t", "\e", etc., but only when the 'l' flag in
				1894	* 'cpoptions' is not included.
				1895	*/
				1896	else if (*regparse == '\\'
				1897	&& (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
				1898	\|\| (!cpo_lit
				1899	&& vim_strchr(REGEXP_ABBR,
				1900	regparse[1]) != NULL)))
				1901	{
				1902	regparse++;
				1903	if (*regparse == 'n')
				1904	{
				1905	/* '\n' in range: also match NL */
				1906	if (ret != JUST_CALC_SIZE)
				1907	{
				1908	if (*ret == ANYBUT)
				1909	*ret = ANYBUT + ADD_NL;
				1910	else if (*ret == ANYOF)
				1911	*ret = ANYOF + ADD_NL;
				1912	/* else: must have had a \n already */
				1913	}
				1914	*flagp \|= HASNL;
				1915	regparse++;
				1916	startc = -1;
				1917	}
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	1918	else if (*regparse == 'd'
				1919	\|\| *regparse == 'o'
				1920	\|\| *regparse == 'x'
				1921	\|\| *regparse == 'u'
				1922	\|\| *regparse == 'U')
				1923	{
				1924	startc = coll_get_char();
				1925	if (startc == 0)
				1926	regc(0x0a);
				1927	else
				1928	#ifdef FEAT_MBYTE
				1929	regmbc(startc);
				1930	#else
				1931	regc(startc);
				1932	#endif
				1933	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1934	else
				1935	{
				1936	startc = backslash_trans(*regparse++);
				1937	regc(startc);
				1938	}
				1939	}
				1940	else if (*regparse == '[')
				1941	{
				1942	int c_class;
				1943	int cu;
				1944
				1945	c_class = skip_class_name(&regparse);
				1946	startc = -1;
				1947	/* Characters assumed to be 8 bits! */
				1948	switch (c_class)
				1949	{
				1950	case CLASS_NONE:
				1951	/* literal '[', allow [[-x] as a range */
				1952	startc = *regparse++;
				1953	regc(startc);
				1954	break;
				1955	case CLASS_ALNUM:
				1956	for (cu = 1; cu <= 255; cu++)
				1957	if (isalnum(cu))
				1958	regc(cu);
				1959	break;
				1960	case CLASS_ALPHA:
				1961	for (cu = 1; cu <= 255; cu++)
				1962	if (isalpha(cu))
				1963	regc(cu);
				1964	break;
				1965	case CLASS_BLANK:
				1966	regc(' ');
				1967	regc('\t');
				1968	break;
				1969	case CLASS_CNTRL:
				1970	for (cu = 1; cu <= 255; cu++)
				1971	if (iscntrl(cu))
				1972	regc(cu);
				1973	break;
				1974	case CLASS_DIGIT:
				1975	for (cu = 1; cu <= 255; cu++)
				1976	if (VIM_ISDIGIT(cu))
				1977	regc(cu);
				1978	break;
				1979	case CLASS_GRAPH:
				1980	for (cu = 1; cu <= 255; cu++)
				1981	if (isgraph(cu))
				1982	regc(cu);
				1983	break;
				1984	case CLASS_LOWER:
				1985	for (cu = 1; cu <= 255; cu++)
				1986	if (islower(cu))
				1987	regc(cu);
				1988	break;
				1989	case CLASS_PRINT:
				1990	for (cu = 1; cu <= 255; cu++)
				1991	if (vim_isprintc(cu))
				1992	regc(cu);
				1993	break;
				1994	case CLASS_PUNCT:
				1995	for (cu = 1; cu <= 255; cu++)
				1996	if (ispunct(cu))
				1997	regc(cu);
				1998	break;
				1999	case CLASS_SPACE:
				2000	for (cu = 9; cu <= 13; cu++)
				2001	regc(cu);
				2002	regc(' ');
				2003	break;
				2004	case CLASS_UPPER:
				2005	for (cu = 1; cu <= 255; cu++)
				2006	if (isupper(cu))
				2007	regc(cu);
				2008	break;
				2009	case CLASS_XDIGIT:
				2010	for (cu = 1; cu <= 255; cu++)
				2011	if (vim_isxdigit(cu))
				2012	regc(cu);
				2013	break;
				2014	case CLASS_TAB:
				2015	regc('\t');
				2016	break;
				2017	case CLASS_RETURN:
				2018	regc('\r');
				2019	break;
				2020	case CLASS_BACKSPACE:
				2021	regc('\b');
				2022	break;
				2023	case CLASS_ESCAPE:
				2024	regc('\033');
				2025	break;
				2026	}
				2027	}
				2028	else
				2029	{
				2030	#ifdef FEAT_MBYTE
				2031	if (has_mbyte)
				2032	{
				2033	int len;
				2034
				2035	/* produce a multibyte character, including any
				2036	* following composing characters */
				2037	startc = mb_ptr2char(regparse);
				2038	len = (*mb_ptr2len_check)(regparse);
				2039	if (enc_utf8 && utf_char2len(startc) != len)
				2040	startc = -1; /* composing chars */
				2041	while (--len >= 0)
				2042	regc(*regparse++);
				2043	}
				2044	else
				2045	#endif
				2046	{
				2047	startc = *regparse++;
				2048	regc(startc);
				2049	}
				2050	}
				2051	}
				2052	regc(NUL);
				2053	prevchr_len = 1; /* last char was the ']' */
				2054	if (*regparse != ']')
				2055	EMSG_RET_NULL(_(e_toomsbra)); /* Cannot happen? */
				2056	skipchr(); /* let's be friends with the lexer again */
				2057	*flagp \|= HASWIDTH \| SIMPLE;
				2058	break;
				2059	}
				2060	}
				2061	/* FALLTHROUGH */
				2062
				2063	default:
				2064	{
				2065	int len;
				2066
				2067	#ifdef FEAT_MBYTE
				2068	/* A multi-byte character is handled as a separate atom if it's
				2069	* before a multi. */
				2070	if (has_mbyte && (*mb_char2len)(c) > 1
				2071	&& re_multi_type(peekchr()) != NOT_MULTI)
				2072	{
				2073	ret = regnode(MULTIBYTECODE);
				2074	regmbc(c);
				2075	*flagp \|= HASWIDTH \| SIMPLE;
				2076	break;
				2077	}
				2078	#endif
				2079
				2080	ret = regnode(EXACTLY);
				2081
				2082	/*
				2083	* Append characters as long as:
				2084	* - there is no following multi, we then need the character in
				2085	* front of it as a single character operand
				2086	* - not running into a Magic character
				2087	* - "one_exactly" is not set
				2088	* But always emit at least one character. Might be a Multi,
				2089	* e.g., a "[" without matching "]".
				2090	*/
				2091	for (len = 0; c != NUL && (len == 0
				2092	\|\| (re_multi_type(peekchr()) == NOT_MULTI
				2093	&& !one_exactly
				2094	&& !is_Magic(c))); ++len)
				2095	{
				2096	c = no_Magic(c);
				2097	#ifdef FEAT_MBYTE
				2098	if (has_mbyte)
				2099	{
				2100	regmbc(c);
				2101	if (enc_utf8)
				2102	{
				2103	int off;
				2104	int l;
				2105
				2106	/* Need to get composing character too, directly
				2107	* access regparse for that, because skipchr() skips
				2108	* over composing chars. */
				2109	ungetchr();
				2110	if (*regparse == '\\' && regparse[1] != NUL)
				2111	off = 1;
				2112	else
				2113	off = 0;
				2114	for (;;)
				2115	{
				2116	l = utf_ptr2len_check(regparse + off);
				2117	if (!UTF_COMPOSINGLIKE(regparse + off,
				2118	regparse + off + l))
				2119	break;
				2120	off += l;
				2121	regmbc(utf_ptr2char(regparse + off));
				2122	}
				2123	skipchr();
				2124	}
				2125	}
				2126	else
				2127	#endif
				2128	regc(c);
				2129	c = getchr();
				2130	}
				2131	ungetchr();
				2132
				2133	regc(NUL);
				2134	*flagp \|= HASWIDTH;
				2135	if (len == 1)
				2136	*flagp \|= SIMPLE;
				2137	}
				2138	break;
				2139	}
				2140
				2141	return ret;
				2142	}
				2143
				2144	/*
				2145	* emit a node
				2146	* Return pointer to generated code.
				2147	*/
				2148	static char_u *
				2149	regnode(op)
				2150	int op;
				2151	{
				2152	char_u *ret;
				2153
				2154	ret = regcode;
				2155	if (ret == JUST_CALC_SIZE)
				2156	regsize += 3;
				2157	else
				2158	{
				2159	*regcode++ = op;
				2160	regcode++ = NUL; / Null "next" pointer. */
				2161	*regcode++ = NUL;
				2162	}
				2163	return ret;
				2164	}
				2165
				2166	/*
				2167	* Emit (if appropriate) a byte of code
				2168	*/
				2169	static void
				2170	regc(b)
				2171	int b;
				2172	{
				2173	if (regcode == JUST_CALC_SIZE)
				2174	regsize++;
				2175	else
				2176	*regcode++ = b;
				2177	}
				2178
				2179	#ifdef FEAT_MBYTE
				2180	/*
				2181	* Emit (if appropriate) a multi-byte character of code
				2182	*/
				2183	static void
				2184	regmbc(c)
				2185	int c;
				2186	{
				2187	if (regcode == JUST_CALC_SIZE)
				2188	regsize += (*mb_char2len)(c);
				2189	else
				2190	regcode += (*mb_char2bytes)(c, regcode);
				2191	}
				2192	#endif
				2193
				2194	/*
				2195	* reginsert - insert an operator in front of already-emitted operand
				2196	*
				2197	* Means relocating the operand.
				2198	*/
				2199	static void
				2200	reginsert(op, opnd)
				2201	int op;
				2202	char_u *opnd;
				2203	{
				2204	char_u *src;
				2205	char_u *dst;
				2206	char_u *place;
				2207
				2208	if (regcode == JUST_CALC_SIZE)
				2209	{
				2210	regsize += 3;
				2211	return;
				2212	}
				2213	src = regcode;
				2214	regcode += 3;
				2215	dst = regcode;
				2216	while (src > opnd)
				2217	--dst = --src;
				2218
				2219	place = opnd; /* Op node, where operand used to be. */
				2220	*place++ = op;
				2221	*place++ = NUL;
				2222	*place = NUL;
				2223	}
				2224
				2225	/*
				2226	* reginsert_limits - insert an operator in front of already-emitted operand.
				2227	* The operator has the given limit values as operands. Also set next pointer.
				2228	*
				2229	* Means relocating the operand.
				2230	*/
				2231	static void
				2232	reginsert_limits(op, minval, maxval, opnd)
				2233	int op;
				2234	long minval;
				2235	long maxval;
				2236	char_u *opnd;
				2237	{
				2238	char_u *src;
				2239	char_u *dst;
				2240	char_u *place;
				2241
				2242	if (regcode == JUST_CALC_SIZE)
				2243	{
				2244	regsize += 11;
				2245	return;
				2246	}
				2247	src = regcode;
				2248	regcode += 11;
				2249	dst = regcode;
				2250	while (src > opnd)
				2251	--dst = --src;
				2252
				2253	place = opnd; /* Op node, where operand used to be. */
				2254	*place++ = op;
				2255	*place++ = NUL;
				2256	*place++ = NUL;
				2257	place = re_put_long(place, (long_u)minval);
				2258	place = re_put_long(place, (long_u)maxval);
				2259	regtail(opnd, place);
				2260	}
				2261
				2262	/*
				2263	* Write a long as four bytes at "p" and return pointer to the next char.
				2264	*/
				2265	static char_u *
				2266	re_put_long(p, val)
				2267	char_u *p;
				2268	long_u val;
				2269	{
				2270	*p++ = (char_u) ((val >> 24) & 0377);
				2271	*p++ = (char_u) ((val >> 16) & 0377);
				2272	*p++ = (char_u) ((val >> 8) & 0377);
				2273	*p++ = (char_u) (val & 0377);
				2274	return p;
				2275	}
				2276
				2277	/*
				2278	* regtail - set the next-pointer at the end of a node chain
				2279	*/
				2280	static void
				2281	regtail(p, val)
				2282	char_u *p;
				2283	char_u *val;
				2284	{
				2285	char_u *scan;
				2286	char_u *temp;
				2287	int offset;
				2288
				2289	if (p == JUST_CALC_SIZE)
				2290	return;
				2291
				2292	/* Find last node. */
				2293	scan = p;
				2294	for (;;)
				2295	{
				2296	temp = regnext(scan);
				2297	if (temp == NULL)
				2298	break;
				2299	scan = temp;
				2300	}
				2301
				2302	if (OP(scan) == BACK)
				2303	offset = (int)(scan - val);
				2304	else
				2305	offset = (int)(val - scan);
				2306	*(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
				2307	*(scan + 2) = (char_u) (offset & 0377);
				2308	}
				2309
				2310	/*
				2311	* regoptail - regtail on item after a BRANCH; nop if none
				2312	*/
				2313	static void
				2314	regoptail(p, val)
				2315	char_u *p;
				2316	char_u *val;
				2317	{
				2318	/* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
				2319	if (p == NULL \|\| p == JUST_CALC_SIZE
				2320	\|\| (OP(p) != BRANCH
				2321	&& (OP(p) < BRACE_COMPLEX \|\| OP(p) > BRACE_COMPLEX + 9)))
				2322	return;
				2323	regtail(OPERAND(p), val);
				2324	}
				2325
				2326	/*
				2327	* getchr() - get the next character from the pattern. We know about
				2328	* magic and such, so therefore we need a lexical analyzer.
				2329	*/
				2330
				2331	/* static int curchr; */
				2332	static int prevprevchr;
				2333	static int prevchr;
				2334	static int nextchr; /* used for ungetchr() */
				2335	/*
				2336	* Note: prevchr is sometimes -1 when we are not at the start,
				2337	* eg in /[ ^I]^ the pattern was never found even if it existed, because ^ was
				2338	* taken to be magic -- webb
				2339	*/
				2340	static int at_start; /* True when on the first character */
				2341	static int prev_at_start; /* True when on the second character */
				2342
				2343	static void
				2344	initchr(str)
				2345	char_u *str;
				2346	{
				2347	regparse = str;
				2348	prevchr_len = 0;
				2349	curchr = prevprevchr = prevchr = nextchr = -1;
				2350	at_start = TRUE;
				2351	prev_at_start = FALSE;
				2352	}
				2353
				2354	static int
				2355	peekchr()
				2356	{
				2357	if (curchr == -1)
				2358	{
				2359	switch (curchr = regparse[0])
				2360	{
				2361	case '.':
				2362	case '[':
				2363	case '~':
				2364	/* magic when 'magic' is on */
				2365	if (reg_magic >= MAGIC_ON)
				2366	curchr = Magic(curchr);
				2367	break;
				2368	case '(':
				2369	case ')':
				2370	case '{':
				2371	case '%':
				2372	case '+':
				2373	case '=':
				2374	case '?':
				2375	case '@':
				2376	case '!':
				2377	case '&':
				2378	case '\|':
				2379	case '<':
				2380	case '>':
				2381	case '#': /* future ext. */
				2382	case '"': /* future ext. */
				2383	case '\'': /* future ext. */
				2384	case ',': /* future ext. */
				2385	case '-': /* future ext. */
				2386	case ':': /* future ext. */
				2387	case ';': /* future ext. */
				2388	case '`': /* future ext. */
				2389	case '/': /* Can't be used in / command */
				2390	/* magic only after "\v" */
				2391	if (reg_magic == MAGIC_ALL)
				2392	curchr = Magic(curchr);
				2393	break;
				2394	case '*':
				2395	/* * is not magic as the very first character, eg "?*ptr" and when
				2396	* after '^', eg "/^ptr" /
				2397	if (reg_magic >= MAGIC_ON && !at_start
				2398	&& !(prev_at_start && prevchr == Magic('^')))
				2399	curchr = Magic('*');
				2400	break;
				2401	case '^':
				2402	/* '^' is only magic as the very first character and if it's after
				2403	* "\(", "\\|", "\&' or "\n" */
				2404	if (reg_magic >= MAGIC_OFF
				2405	&& (at_start
				2406	\|\| reg_magic == MAGIC_ALL
				2407	\|\| prevchr == Magic('(')
				2408	\|\| prevchr == Magic('\|')
				2409	\|\| prevchr == Magic('&')
				2410	\|\| prevchr == Magic('n')
				2411	\|\| (no_Magic(prevchr) == '('
				2412	&& prevprevchr == Magic('%'))))
				2413	{
				2414	curchr = Magic('^');
				2415	at_start = TRUE;
				2416	prev_at_start = FALSE;
				2417	}
				2418	break;
				2419	case '$':
				2420	/* '$' is only magic as the very last char and if it's in front of
				2421	* either "\\|", "\)", "\&", or "\n" */
				2422	if (reg_magic >= MAGIC_OFF)
				2423	{
				2424	char_u *p = regparse + 1;
				2425
				2426	/* ignore \c \C \m and \M after '$' */
				2427	while (p[0] == '\\' && (p[1] == 'c' \|\| p[1] == 'C'
				2428	\|\| p[1] == 'm' \|\| p[1] == 'M' \|\| p[1] == 'Z'))
				2429	p += 2;
				2430	if (p[0] == NUL
				2431	\|\| (p[0] == '\\'
				2432	&& (p[1] == '\|' \|\| p[1] == '&' \|\| p[1] == ')'
				2433	\|\| p[1] == 'n'))
				2434	\|\| reg_magic == MAGIC_ALL)
				2435	curchr = Magic('$');
				2436	}
				2437	break;
				2438	case '\\':
				2439	{
				2440	int c = regparse[1];
				2441
				2442	if (c == NUL)
				2443	curchr = '\\'; /* trailing '\' */
				2444	else if (
				2445	#ifdef EBCDIC
				2446	vim_strchr(META, c)
				2447	#else
				2448	c <= '~' && META_flags[c]
				2449	#endif
				2450	)
				2451	{
				2452	/*
				2453	* META contains everything that may be magic sometimes,
				2454	* except ^ and $ ("\^" and "\$" are only magic after
				2455	* "\v"). We now fetch the next character and toggle its
				2456	* magicness. Therefore, \ is so meta-magic that it is
				2457	* not in META.
				2458	*/
				2459	curchr = -1;
				2460	prev_at_start = at_start;
				2461	at_start = FALSE; /* be able to say "/\ptr" /
				2462	++regparse;
				2463	peekchr();
				2464	--regparse;
				2465	curchr = toggle_Magic(curchr);
				2466	}
				2467	else if (vim_strchr(REGEXP_ABBR, c))
				2468	{
				2469	/*
				2470	* Handle abbreviations, like "\t" for TAB -- webb
				2471	*/
				2472	curchr = backslash_trans(c);
				2473	}
				2474	else if (reg_magic == MAGIC_NONE && (c == '$' \|\| c == '^'))
				2475	curchr = toggle_Magic(c);
				2476	else
				2477	{
				2478	/*
				2479	* Next character can never be (made) magic?
				2480	* Then backslashing it won't do anything.
				2481	*/
				2482	#ifdef FEAT_MBYTE
				2483	if (has_mbyte)
				2484	curchr = (*mb_ptr2char)(regparse + 1);
				2485	else
				2486	#endif
				2487	curchr = c;
				2488	}
				2489	break;
				2490	}
				2491
				2492	#ifdef FEAT_MBYTE
				2493	default:
				2494	if (has_mbyte)
				2495	curchr = (*mb_ptr2char)(regparse);
				2496	#endif
				2497	}
				2498	}
				2499
				2500	return curchr;
				2501	}
				2502
				2503	/*
				2504	* Eat one lexed character. Do this in a way that we can undo it.
				2505	*/
				2506	static void
				2507	skipchr()
				2508	{
				2509	/* peekchr() eats a backslash, do the same here */
				2510	if (*regparse == '\\')
				2511	prevchr_len = 1;
				2512	else
				2513	prevchr_len = 0;
				2514	if (regparse[prevchr_len] != NUL)
				2515	{
				2516	#ifdef FEAT_MBYTE
				2517	if (has_mbyte)
				2518	prevchr_len += (*mb_ptr2len_check)(regparse + prevchr_len);
				2519	else
				2520	#endif
				2521	++prevchr_len;
				2522	}
				2523	regparse += prevchr_len;
				2524	prev_at_start = at_start;
				2525	at_start = FALSE;
				2526	prevprevchr = prevchr;
				2527	prevchr = curchr;
				2528	curchr = nextchr; /* use previously unget char, or -1 */
				2529	nextchr = -1;
				2530	}
				2531
				2532	/*
				2533	* Skip a character while keeping the value of prev_at_start for at_start.
				2534	* prevchr and prevprevchr are also kept.
				2535	*/
				2536	static void
				2537	skipchr_keepstart()
				2538	{
				2539	int as = prev_at_start;
				2540	int pr = prevchr;
				2541	int prpr = prevprevchr;
				2542
				2543	skipchr();
				2544	at_start = as;
				2545	prevchr = pr;
				2546	prevprevchr = prpr;
				2547	}
				2548
				2549	static int
				2550	getchr()
				2551	{
				2552	int chr = peekchr();
				2553
				2554	skipchr();
				2555	return chr;
				2556	}
				2557
				2558	/*
				2559	* put character back. Works only once!
				2560	*/
				2561	static void
				2562	ungetchr()
				2563	{
				2564	nextchr = curchr;
				2565	curchr = prevchr;
				2566	prevchr = prevprevchr;
				2567	at_start = prev_at_start;
				2568	prev_at_start = FALSE;
				2569
				2570	/* Backup regparse, so that it's at the same position as before the
				2571	* getchr(). */
				2572	regparse -= prevchr_len;
				2573	}
				2574
				2575	/*
Bram Moolenaar	7b0294c	2004-10-11 10:16:09 +0000	[diff] [blame]	2576	* Get and return the value of the hex string at the current position.
				2577	* Return -1 if there is no valid hex number.
				2578	* The position is updated:
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	2579	* blahblah\%x20asdf
				2580	* before-^ ^-after
				2581	* The parameter controls the maximum number of input characters. This will be
				2582	* 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
				2583	*/
				2584	static int
				2585	gethexchrs(maxinputlen)
				2586	int maxinputlen;
				2587	{
				2588	int nr = 0;
				2589	int c;
				2590	int i;
				2591
				2592	for (i = 0; i < maxinputlen; ++i)
				2593	{
				2594	c = regparse[0];
				2595	if (!vim_isxdigit(c))
				2596	break;
				2597	nr <<= 4;
				2598	nr \|= hex2nr(c);
				2599	++regparse;
				2600	}
				2601
				2602	if (i == 0)
				2603	return -1;
				2604	return nr;
				2605	}
				2606
				2607	/*
				2608	* get and return the value of the decimal string immediately after the
				2609	* current position. Return -1 for invalid. Consumes all digits.
				2610	*/
				2611	static int
				2612	getdecchrs()
				2613	{
				2614	int nr = 0;
				2615	int c;
				2616	int i;
				2617
				2618	for (i = 0; ; ++i)
				2619	{
				2620	c = regparse[0];
				2621	if (c < '0' \|\| c > '9')
				2622	break;
				2623	nr *= 10;
				2624	nr += c - '0';
				2625	++regparse;
				2626	}
				2627
				2628	if (i == 0)
				2629	return -1;
				2630	return nr;
				2631	}
				2632
				2633	/*
				2634	* get and return the value of the octal string immediately after the current
				2635	* position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
				2636	* numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
				2637	* treat 8 or 9 as recognised characters. Position is updated:
				2638	* blahblah\%o210asdf
				2639	* before-^ ^-after
				2640	*/
				2641	static int
				2642	getoctchrs()
				2643	{
				2644	int nr = 0;
				2645	int c;
				2646	int i;
				2647
				2648	for (i = 0; i < 3 && nr < 040; ++i)
				2649	{
				2650	c = regparse[0];
				2651	if (c < '0' \|\| c > '7')
				2652	break;
				2653	nr <<= 3;
				2654	nr \|= hex2nr(c);
				2655	++regparse;
				2656	}
				2657
				2658	if (i == 0)
				2659	return -1;
				2660	return nr;
				2661	}
				2662
				2663	/*
				2664	* Get a number after a backslash that is inside [].
				2665	* When nothing is recognized return a backslash.
				2666	*/
				2667	static int
				2668	coll_get_char()
				2669	{
				2670	int nr = -1;
				2671
				2672	switch (*regparse++)
				2673	{
				2674	case 'd': nr = getdecchrs(); break;
				2675	case 'o': nr = getoctchrs(); break;
				2676	case 'x': nr = gethexchrs(2); break;
				2677	case 'u': nr = gethexchrs(4); break;
				2678	case 'U': nr = gethexchrs(8); break;
				2679	}
				2680	if (nr < 0)
				2681	{
				2682	/* If getting the number fails be backwards compatible: the character
				2683	* is a backslash. */
				2684	--regparse;
				2685	nr = '\\';
				2686	}
				2687	return nr;
				2688	}
				2689
				2690	/*
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2691	* read_limits - Read two integers to be taken as a minimum and maximum.
				2692	* If the first character is '-', then the range is reversed.
				2693	* Should end with 'end'. If minval is missing, zero is default, if maxval is
				2694	* missing, a very big number is the default.
				2695	*/
				2696	static int
				2697	read_limits(minval, maxval)
				2698	long *minval;
				2699	long *maxval;
				2700	{
				2701	int reverse = FALSE;
				2702	char_u *first_char;
				2703	long tmp;
				2704
				2705	if (*regparse == '-')
				2706	{
				2707	/* Starts with '-', so reverse the range later */
				2708	regparse++;
				2709	reverse = TRUE;
				2710	}
				2711	first_char = regparse;
				2712	*minval = getdigits(&regparse);
				2713	if (regparse == ',') / There is a comma */
				2714	{
				2715	if (vim_isdigit(*++regparse))
				2716	*maxval = getdigits(&regparse);
				2717	else
				2718	*maxval = MAX_LIMIT;
				2719	}
				2720	else if (VIM_ISDIGIT(*first_char))
				2721	maxval = minval; /* It was \{n} or \{-n} */
				2722	else
				2723	maxval = MAX_LIMIT; / It was \{} or \{-} */
				2724	if (*regparse == '\\')
				2725	regparse++; /* Allow either \{...} or \{...\} */
				2726	if (regparse != '}' \|\| (maxval == 0 && *minval == 0))
				2727	{
				2728	sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"),
				2729	reg_magic == MAGIC_ALL ? "" : "\\");
				2730	EMSG_RET_FAIL(IObuff);
				2731	}
				2732
				2733	/*
				2734	* Reverse the range if there was a '-', or make sure it is in the right
				2735	* order otherwise.
				2736	*/
				2737	if ((!reverse && minval > maxval) \|\| (reverse && minval < maxval))
				2738	{
				2739	tmp = *minval;
				2740	minval = maxval;
				2741	*maxval = tmp;
				2742	}
				2743	skipchr(); /* let's be friends with the lexer again */
				2744	return OK;
				2745	}
				2746
				2747	/*
				2748	* vim_regexec and friends
				2749	*/
				2750
				2751	/*
				2752	* Global work variables for vim_regexec().
				2753	*/
				2754
				2755	/* The current match-position is remembered with these variables: */
				2756	static linenr_T reglnum; /* line number, relative to first line */
				2757	static char_u regline; / start of current line */
				2758	static char_u reginput; / current input, points into "regline" */
				2759
				2760	static int need_clear_subexpr; /* subexpressions still need to be
				2761	* cleared */
				2762	#ifdef FEAT_SYN_HL
				2763	static int need_clear_zsubexpr = FALSE; /* extmatch subexpressions
				2764	* still need to be cleared */
				2765	#endif
				2766
				2767	static int out_of_stack; /* TRUE when ran out of stack space */
				2768
				2769	/*
				2770	* Structure used to save the current input state, when it needs to be
				2771	* restored after trying a match. Used by reg_save() and reg_restore().
				2772	*/
				2773	typedef struct
				2774	{
				2775	union
				2776	{
				2777	char_u ptr; / reginput pointer, for single-line regexp */
				2778	lpos_T pos; /* reginput pos, for multi-line regexp */
				2779	} rs_u;
				2780	} regsave_T;
				2781
				2782	/* struct to save start/end pointer/position in for */
				2783	typedef struct
				2784	{
				2785	union
				2786	{
				2787	char_u *ptr;
				2788	lpos_T pos;
				2789	} se_u;
				2790	} save_se_T;
				2791
				2792	static char_u *reg_getline __ARGS((linenr_T lnum));
				2793	static long vim_regexec_both __ARGS((char_u *line, colnr_T col));
				2794	static long regtry __ARGS((regprog_T *prog, colnr_T col));
				2795	static void cleanup_subexpr __ARGS((void));
				2796	#ifdef FEAT_SYN_HL
				2797	static void cleanup_zsubexpr __ARGS((void));
				2798	#endif
				2799	static void reg_nextline __ARGS((void));
				2800	static void reg_save __ARGS((regsave_T *save));
				2801	static void reg_restore __ARGS((regsave_T *save));
				2802	static int reg_save_equal __ARGS((regsave_T *save));
				2803	static void save_se_multi __ARGS((save_se_T savep, lpos_T posp));
				2804	static void save_se_one __ARGS((save_se_T savep, char_u *pp));
				2805
				2806	/* Save the sub-expressions before attempting a match. */
				2807	#define save_se(savep, posp, pp) \
				2808	REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
				2809
				2810	/* After a failed match restore the sub-expressions. */
				2811	#define restore_se(savep, posp, pp) { \
				2812	if (REG_MULTI) \
				2813	*(posp) = (savep)->se_u.pos; \
				2814	else \
				2815	*(pp) = (savep)->se_u.ptr; }
				2816
				2817	static int re_num_cmp __ARGS((long_u val, char_u *scan));
				2818	static int regmatch __ARGS((char_u *prog));
				2819	static int regrepeat __ARGS((char_u *p, long maxcount));
				2820
				2821	#ifdef DEBUG
				2822	int regnarrate = 0;
				2823	#endif
				2824
				2825	/*
				2826	* Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
				2827	* Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
				2828	* contains '\c' or '\C' the value is overruled.
				2829	*/
				2830	static int ireg_ic;
				2831
				2832	#ifdef FEAT_MBYTE
				2833	/*
				2834	* Similar to ireg_ic, but only for 'combining' characters. Set with \Z flag
				2835	* in the regexp. Defaults to false, always.
				2836	*/
				2837	static int ireg_icombine;
				2838	#endif
				2839
				2840	/*
				2841	* Sometimes need to save a copy of a line. Since alloc()/free() is very
				2842	* slow, we keep one allocated piece of memory and only re-allocate it when
				2843	* it's too small. It's freed in vim_regexec_both() when finished.
				2844	*/
				2845	static char_u *reg_tofree;
				2846	static unsigned reg_tofreelen;
				2847
				2848	/*
				2849	* These variables are set when executing a regexp to speed up the execution.
				2850	* Which ones are set depends on whethere a single-line or multi-line match is
				2851	* done:
				2852	* single-line multi-line
				2853	* reg_match &regmatch_T NULL
				2854	* reg_mmatch NULL &regmmatch_T
				2855	* reg_startp reg_match->startp <invalid>
				2856	* reg_endp reg_match->endp <invalid>
				2857	* reg_startpos <invalid> reg_mmatch->startpos
				2858	* reg_endpos <invalid> reg_mmatch->endpos
				2859	* reg_win NULL window in which to search
				2860	* reg_buf <invalid> buffer in which to search
				2861	* reg_firstlnum <invalid> first line in which to search
				2862	* reg_maxline 0 last line nr
				2863	* reg_line_lbr FALSE or TRUE FALSE
				2864	*/
				2865	static regmatch_T *reg_match;
				2866	static regmmatch_T *reg_mmatch;
				2867	static char_u **reg_startp = NULL;
				2868	static char_u **reg_endp = NULL;
				2869	static lpos_T *reg_startpos = NULL;
				2870	static lpos_T *reg_endpos = NULL;
				2871	static win_T *reg_win;
				2872	static buf_T *reg_buf;
				2873	static linenr_T reg_firstlnum;
				2874	static linenr_T reg_maxline;
				2875	static int reg_line_lbr; /* "\n" in string is line break */
				2876
				2877	/*
				2878	* Get pointer to the line "lnum", which is relative to "reg_firstlnum".
				2879	*/
				2880	static char_u *
				2881	reg_getline(lnum)
				2882	linenr_T lnum;
				2883	{
				2884	/* when looking behind for a match/no-match lnum is negative. But we
				2885	* can't go before line 1 */
				2886	if (reg_firstlnum + lnum < 1)
				2887	return NULL;
				2888	return ml_get_buf(reg_buf, reg_firstlnum + lnum, FALSE);
				2889	}
				2890
				2891	static regsave_T behind_pos;
				2892
				2893	#ifdef FEAT_SYN_HL
				2894	static char_u reg_startzp[NSUBEXP]; / Workspace to mark beginning */
				2895	static char_u reg_endzp[NSUBEXP]; / and end of \z(...\) matches */
				2896	static lpos_T reg_startzpos[NSUBEXP]; /* idem, beginning pos */
				2897	static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
				2898	#endif
				2899
				2900	/* TRUE if using multi-line regexp. */
				2901	#define REG_MULTI (reg_match == NULL)
				2902
				2903	/*
				2904	* Match a regexp against a string.
				2905	* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
				2906	* Uses curbuf for line count and 'iskeyword'.
				2907	*
				2908	* Return TRUE if there is a match, FALSE if not.
				2909	*/
				2910	int
				2911	vim_regexec(rmp, line, col)
				2912	regmatch_T *rmp;
				2913	char_u line; / string to match against */
				2914	colnr_T col; /* column to start looking for match */
				2915	{
				2916	reg_match = rmp;
				2917	reg_mmatch = NULL;
				2918	reg_maxline = 0;
				2919	reg_line_lbr = FALSE;
				2920	reg_win = NULL;
				2921	ireg_ic = rmp->rm_ic;
				2922	#ifdef FEAT_MBYTE
				2923	ireg_icombine = FALSE;
				2924	#endif
				2925	return (vim_regexec_both(line, col) != 0);
				2926	}
				2927
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	2928	#if defined(FEAT_MODIFY_FNAME) \|\| defined(FEAT_EVAL) \
				2929	\|\| defined(FIND_REPLACE_DIALOG) \|\| defined(PROTO)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2930	/*
				2931	* Like vim_regexec(), but consider a "\n" in "line" to be a line break.
				2932	*/
				2933	int
				2934	vim_regexec_nl(rmp, line, col)
				2935	regmatch_T *rmp;
				2936	char_u line; / string to match against */
				2937	colnr_T col; /* column to start looking for match */
				2938	{
				2939	reg_match = rmp;
				2940	reg_mmatch = NULL;
				2941	reg_maxline = 0;
				2942	reg_line_lbr = TRUE;
				2943	reg_win = NULL;
				2944	ireg_ic = rmp->rm_ic;
				2945	#ifdef FEAT_MBYTE
				2946	ireg_icombine = FALSE;
				2947	#endif
				2948	return (vim_regexec_both(line, col) != 0);
				2949	}
				2950	#endif
				2951
				2952	/*
				2953	* Match a regexp against multiple lines.
				2954	* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
				2955	* Uses curbuf for line count and 'iskeyword'.
				2956	*
				2957	* Return zero if there is no match. Return number of lines contained in the
				2958	* match otherwise.
				2959	*/
				2960	long
				2961	vim_regexec_multi(rmp, win, buf, lnum, col)
				2962	regmmatch_T *rmp;
				2963	win_T win; / window in which to search or NULL */
				2964	buf_T buf; / buffer in which to search */
				2965	linenr_T lnum; /* nr of line to start looking for match */
				2966	colnr_T col; /* column to start looking for match */
				2967	{
				2968	long r;
				2969	buf_T *save_curbuf = curbuf;
				2970
				2971	reg_match = NULL;
				2972	reg_mmatch = rmp;
				2973	reg_buf = buf;
				2974	reg_win = win;
				2975	reg_firstlnum = lnum;
				2976	reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
				2977	reg_line_lbr = FALSE;
				2978	ireg_ic = rmp->rmm_ic;
				2979	#ifdef FEAT_MBYTE
				2980	ireg_icombine = FALSE;
				2981	#endif
				2982
				2983	/* Need to switch to buffer "buf" to make vim_iswordc() work. */
				2984	curbuf = buf;
				2985	r = vim_regexec_both(NULL, col);
				2986	curbuf = save_curbuf;
				2987
				2988	return r;
				2989	}
				2990
				2991	/*
				2992	* Match a regexp against a string ("line" points to the string) or multiple
				2993	* lines ("line" is NULL, use reg_getline()).
				2994	*/
				2995	#ifdef HAVE_SETJMP_H
				2996	static long
				2997	vim_regexec_both(line_arg, col_arg)
				2998	char_u *line_arg;
				2999	colnr_T col_arg; /* column to start looking for match */
				3000	#else
				3001	static long
				3002	vim_regexec_both(line, col)
				3003	char_u *line;
				3004	colnr_T col; /* column to start looking for match */
				3005	#endif
				3006	{
				3007	regprog_T *prog;
				3008	char_u *s;
				3009	long retval;
				3010	#ifdef HAVE_SETJMP_H
				3011	char_u *line;
				3012	colnr_T col;
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	3013	int did_mch_startjmp = FALSE;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3014	#endif
				3015
				3016	reg_tofree = NULL;
				3017
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3018	#ifdef HAVE_SETJMP_H
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3019	/* Trick to avoid "might be clobbered by `longjmp'" warning from gcc. */
				3020	line = line_arg;
				3021	col = col_arg;
				3022	#endif
				3023	retval = 0L;
				3024
				3025	if (REG_MULTI)
				3026	{
				3027	prog = reg_mmatch->regprog;
				3028	line = reg_getline((linenr_T)0);
				3029	reg_startpos = reg_mmatch->startpos;
				3030	reg_endpos = reg_mmatch->endpos;
				3031	}
				3032	else
				3033	{
				3034	prog = reg_match->regprog;
				3035	reg_startp = reg_match->startp;
				3036	reg_endp = reg_match->endp;
				3037	}
				3038
				3039	/* Be paranoid... */
				3040	if (prog == NULL \|\| line == NULL)
				3041	{
				3042	EMSG(_(e_null));
				3043	goto theend;
				3044	}
				3045
				3046	/* Check validity of program. */
				3047	if (prog_magic_wrong())
				3048	goto theend;
				3049
				3050	/* If pattern contains "\c" or "\C": overrule value of ireg_ic */
				3051	if (prog->regflags & RF_ICASE)
				3052	ireg_ic = TRUE;
				3053	else if (prog->regflags & RF_NOICASE)
				3054	ireg_ic = FALSE;
				3055
				3056	#ifdef FEAT_MBYTE
				3057	/* If pattern contains "\Z" overrule value of ireg_icombine */
				3058	if (prog->regflags & RF_ICOMBINE)
				3059	ireg_icombine = TRUE;
				3060	#endif
				3061
				3062	/* If there is a "must appear" string, look for it. */
				3063	if (prog->regmust != NULL)
				3064	{
				3065	int c;
				3066
				3067	#ifdef FEAT_MBYTE
				3068	if (has_mbyte)
				3069	c = (*mb_ptr2char)(prog->regmust);
				3070	else
				3071	#endif
				3072	c = *prog->regmust;
				3073	s = line + col;
				3074	while ((s = cstrchr(s, c)) != NULL)
				3075	{
				3076	if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
				3077	break; /* Found it. */
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	3078	mb_ptr_adv(s);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3079	}
				3080	if (s == NULL) /* Not present. */
				3081	goto theend;
				3082	}
				3083
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	3084	#ifdef HAVE_TRY_EXCEPT
				3085	__try
				3086	{
				3087	#endif
				3088
				3089	#ifdef HAVE_SETJMP_H
				3090	/*
				3091	* Matching with a regexp may cause a very deep recursive call of
				3092	* regmatch(). Vim will crash when running out of stack space. Catch
				3093	* this here if the system supports it.
				3094	* It's a bit slow, do it after the check for "regmust".
				3095	* Don't do it if the caller already set it up.
				3096	*/
				3097	if (!lc_active)
				3098	{
				3099	did_mch_startjmp = TRUE;
				3100	mch_startjmp();
				3101	if (SETJMP(lc_jump_env) != 0)
				3102	{
				3103	mch_didjmp();
				3104	# ifdef SIGHASARG
				3105	if (lc_signal != SIGINT)
				3106	# endif
				3107	EMSG(_(e_complex));
				3108	retval = 0L;
				3109	goto inner_end;
				3110	}
				3111	}
				3112	#endif
				3113
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3114	regline = line;
				3115	reglnum = 0;
				3116	out_of_stack = FALSE;
				3117
				3118	/* Simplest case: Anchored match need be tried only once. */
				3119	if (prog->reganch)
				3120	{
				3121	int c;
				3122
				3123	#ifdef FEAT_MBYTE
				3124	if (has_mbyte)
				3125	c = (*mb_ptr2char)(regline + col);
				3126	else
				3127	#endif
				3128	c = regline[col];
				3129	if (prog->regstart == NUL
				3130	\|\| prog->regstart == c
				3131	\|\| (ireg_ic && ((
				3132	#ifdef FEAT_MBYTE
				3133	(enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
				3134	\|\| (c < 255 && prog->regstart < 255 &&
				3135	#endif
				3136	TOLOWER_LOC(prog->regstart) == TOLOWER_LOC(c)))))
				3137	retval = regtry(prog, col);
				3138	else
				3139	retval = 0;
				3140	}
				3141	else
				3142	{
				3143	/* Messy cases: unanchored match. */
				3144	while (!got_int && !out_of_stack)
				3145	{
				3146	if (prog->regstart != NUL)
				3147	{
				3148	/* Skip until the char we know it must start with. */
				3149	s = cstrchr(regline + col, prog->regstart);
				3150	if (s == NULL)
				3151	{
				3152	retval = 0;
				3153	break;
				3154	}
				3155	col = (int)(s - regline);
				3156	}
				3157
				3158	retval = regtry(prog, col);
				3159	if (retval > 0)
				3160	break;
				3161
				3162	/* if not currently on the first line, get it again */
				3163	if (reglnum != 0)
				3164	{
				3165	regline = reg_getline((linenr_T)0);
				3166	reglnum = 0;
				3167	}
				3168	if (regline[col] == NUL)
				3169	break;
				3170	#ifdef FEAT_MBYTE
				3171	if (has_mbyte)
				3172	col += (*mb_ptr2len_check)(regline + col);
				3173	else
				3174	#endif
				3175	++col;
				3176	}
				3177	}
				3178
				3179	if (out_of_stack)
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	3180	EMSG(_(e_outofstack));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3181
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	3182	#ifdef HAVE_SETJMP_H
				3183	inner_end:
				3184	;
				3185	#endif
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3186	#ifdef HAVE_TRY_EXCEPT
				3187	}
				3188	__except(EXCEPTION_EXECUTE_HANDLER)
				3189	{
				3190	if (GetExceptionCode() == EXCEPTION_STACK_OVERFLOW)
				3191	{
				3192	RESETSTKOFLW();
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	3193	EMSG(_(e_outofstack));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3194	}
				3195	else
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	3196	EMSG(_(e_complex));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3197	retval = 0L;
				3198	}
				3199	#endif
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	3200	#ifdef HAVE_SETJMP_H
				3201	if (did_mch_startjmp)
				3202	mch_endjmp();
				3203	#endif
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3204
				3205	theend:
				3206	/* Didn't find a match. */
				3207	vim_free(reg_tofree);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3208	return retval;
				3209	}
				3210
				3211	#ifdef FEAT_SYN_HL
				3212	static reg_extmatch_T *make_extmatch __ARGS((void));
				3213
				3214	/*
				3215	* Create a new extmatch and mark it as referenced once.
				3216	*/
				3217	static reg_extmatch_T *
				3218	make_extmatch()
				3219	{
				3220	reg_extmatch_T *em;
				3221
				3222	em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T));
				3223	if (em != NULL)
				3224	em->refcnt = 1;
				3225	return em;
				3226	}
				3227
				3228	/*
				3229	* Add a reference to an extmatch.
				3230	*/
				3231	reg_extmatch_T *
				3232	ref_extmatch(em)
				3233	reg_extmatch_T *em;
				3234	{
				3235	if (em != NULL)
				3236	em->refcnt++;
				3237	return em;
				3238	}
				3239
				3240	/*
				3241	* Remove a reference to an extmatch. If there are no references left, free
				3242	* the info.
				3243	*/
				3244	void
				3245	unref_extmatch(em)
				3246	reg_extmatch_T *em;
				3247	{
				3248	int i;
				3249
				3250	if (em != NULL && --em->refcnt <= 0)
				3251	{
				3252	for (i = 0; i < NSUBEXP; ++i)
				3253	vim_free(em->matches[i]);
				3254	vim_free(em);
				3255	}
				3256	}
				3257	#endif
				3258
				3259	/*
				3260	* regtry - try match of "prog" with at regline["col"].
				3261	* Returns 0 for failure, number of lines contained in the match otherwise.
				3262	*/
				3263	static long
				3264	regtry(prog, col)
				3265	regprog_T *prog;
				3266	colnr_T col;
				3267	{
				3268	reginput = regline + col;
				3269	need_clear_subexpr = TRUE;
				3270	#ifdef FEAT_SYN_HL
				3271	/* Clear the external match subpointers if necessary. */
				3272	if (prog->reghasz == REX_SET)
				3273	need_clear_zsubexpr = TRUE;
				3274	#endif
				3275
				3276	if (regmatch(prog->program + 1))
				3277	{
				3278	cleanup_subexpr();
				3279	if (REG_MULTI)
				3280	{
				3281	if (reg_startpos[0].lnum < 0)
				3282	{
				3283	reg_startpos[0].lnum = 0;
				3284	reg_startpos[0].col = col;
				3285	}
				3286	if (reg_endpos[0].lnum < 0)
				3287	{
				3288	reg_endpos[0].lnum = reglnum;
				3289	reg_endpos[0].col = (int)(reginput - regline);
				3290	}
				3291	else
				3292	/* Use line number of "\ze". */
				3293	reglnum = reg_endpos[0].lnum;
				3294	}
				3295	else
				3296	{
				3297	if (reg_startp[0] == NULL)
				3298	reg_startp[0] = regline + col;
				3299	if (reg_endp[0] == NULL)
				3300	reg_endp[0] = reginput;
				3301	}
				3302	#ifdef FEAT_SYN_HL
				3303	/* Package any found \z(...\) matches for export. Default is none. */
				3304	unref_extmatch(re_extmatch_out);
				3305	re_extmatch_out = NULL;
				3306
				3307	if (prog->reghasz == REX_SET)
				3308	{
				3309	int i;
				3310
				3311	cleanup_zsubexpr();
				3312	re_extmatch_out = make_extmatch();
				3313	for (i = 0; i < NSUBEXP; i++)
				3314	{
				3315	if (REG_MULTI)
				3316	{
				3317	/* Only accept single line matches. */
				3318	if (reg_startzpos[i].lnum >= 0
				3319	&& reg_endzpos[i].lnum == reg_startzpos[i].lnum)
				3320	re_extmatch_out->matches[i] =
				3321	vim_strnsave(reg_getline(reg_startzpos[i].lnum)
				3322	+ reg_startzpos[i].col,
				3323	reg_endzpos[i].col - reg_startzpos[i].col);
				3324	}
				3325	else
				3326	{
				3327	if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
				3328	re_extmatch_out->matches[i] =
				3329	vim_strnsave(reg_startzp[i],
				3330	(int)(reg_endzp[i] - reg_startzp[i]));
				3331	}
				3332	}
				3333	}
				3334	#endif
				3335	return 1 + reglnum;
				3336	}
				3337	return 0;
				3338	}
				3339
				3340	#ifdef FEAT_MBYTE
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3341	static int reg_prev_class __ARGS((void));
				3342
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3343	/*
				3344	* Get class of previous character.
				3345	*/
				3346	static int
				3347	reg_prev_class()
				3348	{
				3349	if (reginput > regline)
				3350	return mb_get_class(reginput - 1
				3351	- (*mb_head_off)(regline, reginput - 1));
				3352	return -1;
				3353	}
				3354
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3355	#endif
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	3356	#define ADVANCE_REGINPUT() mb_ptr_adv(reginput)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3357
				3358	/*
				3359	* The arguments from BRACE_LIMITS are stored here. They are actually local
				3360	* to regmatch(), but they are here to reduce the amount of stack space used
				3361	* (it can be called recursively many times).
				3362	*/
				3363	static long bl_minval;
				3364	static long bl_maxval;
				3365
				3366	/*
				3367	* regmatch - main matching routine
				3368	*
				3369	* Conceptually the strategy is simple: Check to see whether the current
				3370	* node matches, call self recursively to see whether the rest matches,
				3371	* and then act accordingly. In practice we make some effort to avoid
				3372	* recursion, in particular by going through "ordinary" nodes (that don't
				3373	* need to know whether the rest of the match failed) by a loop instead of
				3374	* by recursion.
				3375	*
				3376	* Returns TRUE when there is a match. Leaves reginput and reglnum just after
				3377	* the last matched character.
				3378	* Returns FALSE when there is no match. Leaves reginput and reglnum in an
				3379	* undefined state!
				3380	*/
				3381	static int
				3382	regmatch(scan)
				3383	char_u scan; / Current node. */
				3384	{
				3385	char_u next; / Next node. */
				3386	int op;
				3387	int c;
				3388
				3389	#ifdef HAVE_GETRLIMIT
				3390	/* Check if we are running out of stack space. Could be caused by
				3391	* recursively calling ourselves. */
				3392	if (out_of_stack \|\| mch_stackcheck((char *)&op) == FAIL)
				3393	{
				3394	out_of_stack = TRUE;
				3395	return FALSE;
				3396	}
				3397	#endif
				3398
				3399	/* Some patterns my cause a long time to match, even though they are not
				3400	* illegal. E.g., "$[a-z]\+$\+Q". Allow breaking them with CTRL-C. */
				3401	fast_breakcheck();
				3402
				3403	#ifdef DEBUG
				3404	if (scan != NULL && regnarrate)
				3405	{
				3406	mch_errmsg(regprop(scan));
				3407	mch_errmsg("(\n");
				3408	}
				3409	#endif
				3410	while (scan != NULL)
				3411	{
				3412	if (got_int \|\| out_of_stack)
				3413	return FALSE;
				3414	#ifdef DEBUG
				3415	if (regnarrate)
				3416	{
				3417	mch_errmsg(regprop(scan));
				3418	mch_errmsg("...\n");
				3419	# ifdef FEAT_SYN_HL
				3420	if (re_extmatch_in != NULL)
				3421	{
				3422	int i;
				3423
				3424	mch_errmsg(_("External submatches:\n"));
				3425	for (i = 0; i < NSUBEXP; i++)
				3426	{
				3427	mch_errmsg(" \"");
				3428	if (re_extmatch_in->matches[i] != NULL)
				3429	mch_errmsg(re_extmatch_in->matches[i]);
				3430	mch_errmsg("\"\n");
				3431	}
				3432	}
				3433	# endif
				3434	}
				3435	#endif
				3436	next = regnext(scan);
				3437
				3438	op = OP(scan);
				3439	/* Check for character class with NL added. */
				3440	if (WITH_NL(op) && *reginput == NUL && reglnum < reg_maxline)
				3441	{
				3442	reg_nextline();
				3443	}
				3444	else if (reg_line_lbr && WITH_NL(op) && *reginput == '\n')
				3445	{
				3446	ADVANCE_REGINPUT();
				3447	}
				3448	else
				3449	{
				3450	if (WITH_NL(op))
				3451	op -= ADD_NL;
				3452	#ifdef FEAT_MBYTE
				3453	if (has_mbyte)
				3454	c = (*mb_ptr2char)(reginput);
				3455	else
				3456	#endif
				3457	c = *reginput;
				3458	switch (op)
				3459	{
				3460	case BOL:
				3461	if (reginput != regline)
				3462	return FALSE;
				3463	break;
				3464
				3465	case EOL:
				3466	if (c != NUL)
				3467	return FALSE;
				3468	break;
				3469
				3470	case RE_BOF:
				3471	/* Passing -1 to the getline() function provided for the search
				3472	* should always return NULL if the current line is the first
				3473	* line of the file. */
				3474	if (reglnum != 0 \|\| reginput != regline
				3475	\|\| (REG_MULTI && reg_getline((linenr_T)-1) != NULL))
				3476	return FALSE;
				3477	break;
				3478
				3479	case RE_EOF:
				3480	if (reglnum != reg_maxline \|\| c != NUL)
				3481	return FALSE;
				3482	break;
				3483
				3484	case CURSOR:
				3485	/* Check if the buffer is in a window and compare the
				3486	* reg_win->w_cursor position to the match position. */
				3487	if (reg_win == NULL
				3488	\|\| (reglnum + reg_firstlnum != reg_win->w_cursor.lnum)
				3489	\|\| ((colnr_T)(reginput - regline) != reg_win->w_cursor.col))
				3490	return FALSE;
				3491	break;
				3492
				3493	case RE_LNUM:
				3494	if (!REG_MULTI \|\| !re_num_cmp((long_u)(reglnum + reg_firstlnum),
				3495	scan))
				3496	return FALSE;
				3497	break;
				3498
				3499	case RE_COL:
				3500	if (!re_num_cmp((long_u)(reginput - regline) + 1, scan))
				3501	return FALSE;
				3502	break;
				3503
				3504	case RE_VCOL:
				3505	if (!re_num_cmp((long_u)win_linetabsize(
				3506	reg_win == NULL ? curwin : reg_win,
				3507	regline, (colnr_T)(reginput - regline)) + 1, scan))
				3508	return FALSE;
				3509	break;
				3510
				3511	case BOW: /* \<word; reginput points to w */
				3512	if (c == NUL) /* Can't match at end of line */
				3513	return FALSE;
				3514	#ifdef FEAT_MBYTE
				3515	if (has_mbyte)
				3516	{
				3517	int this_class;
				3518
				3519	/* Get class of current and previous char (if it exists). */
				3520	this_class = mb_get_class(reginput);
				3521	if (this_class <= 1)
				3522	return FALSE; /* not on a word at all */
				3523	if (reg_prev_class() == this_class)
				3524	return FALSE; /* previous char is in same word */
				3525	}
				3526	#endif
				3527	else
				3528	{
				3529	if (!vim_iswordc(c)
				3530	\|\| (reginput > regline && vim_iswordc(reginput[-1])))
				3531	return FALSE;
				3532	}
				3533	break;
				3534
				3535	case EOW: /* word\>; reginput points after d */
				3536	if (reginput == regline) /* Can't match at start of line */
				3537	return FALSE;
				3538	#ifdef FEAT_MBYTE
				3539	if (has_mbyte)
				3540	{
				3541	int this_class, prev_class;
				3542
				3543	/* Get class of current and previous char (if it exists). */
				3544	this_class = mb_get_class(reginput);
				3545	prev_class = reg_prev_class();
				3546	if (this_class == prev_class)
				3547	return FALSE;
				3548	if (prev_class == 0 \|\| prev_class == 1)
				3549	return FALSE;
				3550	}
				3551	else
				3552	#endif
				3553	{
				3554	if (!vim_iswordc(reginput[-1]))
				3555	return FALSE;
				3556	if (reginput[0] != NUL && vim_iswordc(c))
				3557	return FALSE;
				3558	}
				3559	break; /* Matched with EOW */
				3560
				3561	case ANY:
				3562	if (c == NUL)
				3563	return FALSE;
				3564	ADVANCE_REGINPUT();
				3565	break;
				3566
				3567	case IDENT:
				3568	if (!vim_isIDc(c))
				3569	return FALSE;
				3570	ADVANCE_REGINPUT();
				3571	break;
				3572
				3573	case SIDENT:
				3574	if (VIM_ISDIGIT(*reginput) \|\| !vim_isIDc(c))
				3575	return FALSE;
				3576	ADVANCE_REGINPUT();
				3577	break;
				3578
				3579	case KWORD:
				3580	if (!vim_iswordp(reginput))
				3581	return FALSE;
				3582	ADVANCE_REGINPUT();
				3583	break;
				3584
				3585	case SKWORD:
				3586	if (VIM_ISDIGIT(*reginput) \|\| !vim_iswordp(reginput))
				3587	return FALSE;
				3588	ADVANCE_REGINPUT();
				3589	break;
				3590
				3591	case FNAME:
				3592	if (!vim_isfilec(c))
				3593	return FALSE;
				3594	ADVANCE_REGINPUT();
				3595	break;
				3596
				3597	case SFNAME:
				3598	if (VIM_ISDIGIT(*reginput) \|\| !vim_isfilec(c))
				3599	return FALSE;
				3600	ADVANCE_REGINPUT();
				3601	break;
				3602
				3603	case PRINT:
				3604	if (ptr2cells(reginput) != 1)
				3605	return FALSE;
				3606	ADVANCE_REGINPUT();
				3607	break;
				3608
				3609	case SPRINT:
				3610	if (VIM_ISDIGIT(*reginput) \|\| ptr2cells(reginput) != 1)
				3611	return FALSE;
				3612	ADVANCE_REGINPUT();
				3613	break;
				3614
				3615	case WHITE:
				3616	if (!vim_iswhite(c))
				3617	return FALSE;
				3618	ADVANCE_REGINPUT();
				3619	break;
				3620
				3621	case NWHITE:
				3622	if (c == NUL \|\| vim_iswhite(c))
				3623	return FALSE;
				3624	ADVANCE_REGINPUT();
				3625	break;
				3626
				3627	case DIGIT:
				3628	if (!ri_digit(c))
				3629	return FALSE;
				3630	ADVANCE_REGINPUT();
				3631	break;
				3632
				3633	case NDIGIT:
				3634	if (c == NUL \|\| ri_digit(c))
				3635	return FALSE;
				3636	ADVANCE_REGINPUT();
				3637	break;
				3638
				3639	case HEX:
				3640	if (!ri_hex(c))
				3641	return FALSE;
				3642	ADVANCE_REGINPUT();
				3643	break;
				3644
				3645	case NHEX:
				3646	if (c == NUL \|\| ri_hex(c))
				3647	return FALSE;
				3648	ADVANCE_REGINPUT();
				3649	break;
				3650
				3651	case OCTAL:
				3652	if (!ri_octal(c))
				3653	return FALSE;
				3654	ADVANCE_REGINPUT();
				3655	break;
				3656
				3657	case NOCTAL:
				3658	if (c == NUL \|\| ri_octal(c))
				3659	return FALSE;
				3660	ADVANCE_REGINPUT();
				3661	break;
				3662
				3663	case WORD:
				3664	if (!ri_word(c))
				3665	return FALSE;
				3666	ADVANCE_REGINPUT();
				3667	break;
				3668
				3669	case NWORD:
				3670	if (c == NUL \|\| ri_word(c))
				3671	return FALSE;
				3672	ADVANCE_REGINPUT();
				3673	break;
				3674
				3675	case HEAD:
				3676	if (!ri_head(c))
				3677	return FALSE;
				3678	ADVANCE_REGINPUT();
				3679	break;
				3680
				3681	case NHEAD:
				3682	if (c == NUL \|\| ri_head(c))
				3683	return FALSE;
				3684	ADVANCE_REGINPUT();
				3685	break;
				3686
				3687	case ALPHA:
				3688	if (!ri_alpha(c))
				3689	return FALSE;
				3690	ADVANCE_REGINPUT();
				3691	break;
				3692
				3693	case NALPHA:
				3694	if (c == NUL \|\| ri_alpha(c))
				3695	return FALSE;
				3696	ADVANCE_REGINPUT();
				3697	break;
				3698
				3699	case LOWER:
				3700	if (!ri_lower(c))
				3701	return FALSE;
				3702	ADVANCE_REGINPUT();
				3703	break;
				3704
				3705	case NLOWER:
				3706	if (c == NUL \|\| ri_lower(c))
				3707	return FALSE;
				3708	ADVANCE_REGINPUT();
				3709	break;
				3710
				3711	case UPPER:
				3712	if (!ri_upper(c))
				3713	return FALSE;
				3714	ADVANCE_REGINPUT();
				3715	break;
				3716
				3717	case NUPPER:
				3718	if (c == NUL \|\| ri_upper(c))
				3719	return FALSE;
				3720	ADVANCE_REGINPUT();
				3721	break;
				3722
				3723	case EXACTLY:
				3724	{
				3725	int len;
				3726	char_u *opnd;
				3727
				3728	opnd = OPERAND(scan);
				3729	/* Inline the first byte, for speed. */
				3730	if (opnd != reginput
				3731	&& (!ireg_ic \|\| (
				3732	#ifdef FEAT_MBYTE
				3733	!enc_utf8 &&
				3734	#endif
				3735	TOLOWER_LOC(opnd) != TOLOWER_LOC(reginput))))
				3736	return FALSE;
				3737	if (*opnd == NUL)
				3738	{
				3739	/* match empty string always works; happens when "~" is
				3740	* empty. */
				3741	}
				3742	else if (opnd[1] == NUL
				3743	#ifdef FEAT_MBYTE
				3744	&& !(enc_utf8 && ireg_ic)
				3745	#endif
				3746	)
				3747	++reginput; /* matched a single char */
				3748	else
				3749	{
				3750	len = (int)STRLEN(opnd);
				3751	/* Need to match first byte again for multi-byte. */
				3752	if (cstrncmp(opnd, reginput, &len) != 0)
				3753	return FALSE;
				3754	#ifdef FEAT_MBYTE
				3755	/* Check for following composing character. */
				3756	if (enc_utf8 && UTF_COMPOSINGLIKE(reginput, reginput + len))
				3757	{
				3758	/* raaron: This code makes a composing character get
				3759	* ignored, which is the correct behavior (sometimes)
				3760	* for voweled Hebrew texts. */
				3761	if (!ireg_icombine)
				3762	return FALSE;
				3763	}
				3764	else
				3765	#endif
				3766	reginput += len;
				3767	}
				3768	}
				3769	break;
				3770
				3771	case ANYOF:
				3772	case ANYBUT:
				3773	if (c == NUL)
				3774	return FALSE;
				3775	if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
				3776	return FALSE;
				3777	ADVANCE_REGINPUT();
				3778	break;
				3779
				3780	#ifdef FEAT_MBYTE
				3781	case MULTIBYTECODE:
				3782	if (has_mbyte)
				3783	{
				3784	int i, len;
				3785	char_u *opnd;
				3786
				3787	opnd = OPERAND(scan);
				3788	/* Safety check (just in case 'encoding' was changed since
				3789	* compiling the program). */
				3790	if ((len = (*mb_ptr2len_check)(opnd)) < 2)
				3791	return FALSE;
				3792	for (i = 0; i < len; ++i)
				3793	if (opnd[i] != reginput[i])
				3794	return FALSE;
				3795	reginput += len;
				3796	}
				3797	else
				3798	return FALSE;
				3799	break;
				3800	#endif
				3801
				3802	case NOTHING:
				3803	break;
				3804
				3805	case BACK:
				3806	break;
				3807
				3808	case MOPEN + 0: /* Match start: \zs */
				3809	case MOPEN + 1: /* \( */
				3810	case MOPEN + 2:
				3811	case MOPEN + 3:
				3812	case MOPEN + 4:
				3813	case MOPEN + 5:
				3814	case MOPEN + 6:
				3815	case MOPEN + 7:
				3816	case MOPEN + 8:
				3817	case MOPEN + 9:
				3818	{
				3819	int no;
				3820	save_se_T save;
				3821
				3822	no = op - MOPEN;
				3823	cleanup_subexpr();
				3824	save_se(&save, &reg_startpos[no], &reg_startp[no]);
				3825
				3826	if (regmatch(next))
				3827	return TRUE;
				3828
				3829	restore_se(&save, &reg_startpos[no], &reg_startp[no]);
				3830	return FALSE;
				3831	}
				3832	/* break; Not Reached */
				3833
				3834	case NOPEN: /* \%( */
				3835	case NCLOSE: /* \) after \%( */
				3836	if (regmatch(next))
				3837	return TRUE;
				3838	return FALSE;
				3839	/* break; Not Reached */
				3840
				3841	#ifdef FEAT_SYN_HL
				3842	case ZOPEN + 1:
				3843	case ZOPEN + 2:
				3844	case ZOPEN + 3:
				3845	case ZOPEN + 4:
				3846	case ZOPEN + 5:
				3847	case ZOPEN + 6:
				3848	case ZOPEN + 7:
				3849	case ZOPEN + 8:
				3850	case ZOPEN + 9:
				3851	{
				3852	int no;
				3853	save_se_T save;
				3854
				3855	no = op - ZOPEN;
				3856	cleanup_zsubexpr();
				3857	save_se(&save, &reg_startzpos[no], &reg_startzp[no]);
				3858
				3859	if (regmatch(next))
				3860	return TRUE;
				3861
				3862	restore_se(&save, &reg_startzpos[no], &reg_startzp[no]);
				3863	return FALSE;
				3864	}
				3865	/* break; Not Reached */
				3866	#endif
				3867
				3868	case MCLOSE + 0: /* Match end: \ze */
				3869	case MCLOSE + 1: /* \) */
				3870	case MCLOSE + 2:
				3871	case MCLOSE + 3:
				3872	case MCLOSE + 4:
				3873	case MCLOSE + 5:
				3874	case MCLOSE + 6:
				3875	case MCLOSE + 7:
				3876	case MCLOSE + 8:
				3877	case MCLOSE + 9:
				3878	{
				3879	int no;
				3880	save_se_T save;
				3881
				3882	no = op - MCLOSE;
				3883	cleanup_subexpr();
				3884	save_se(&save, &reg_endpos[no], &reg_endp[no]);
				3885
				3886	if (regmatch(next))
				3887	return TRUE;
				3888
				3889	restore_se(&save, &reg_endpos[no], &reg_endp[no]);
				3890	return FALSE;
				3891	}
				3892	/* break; Not Reached */
				3893
				3894	#ifdef FEAT_SYN_HL
				3895	case ZCLOSE + 1: /* \) after \z( */
				3896	case ZCLOSE + 2:
				3897	case ZCLOSE + 3:
				3898	case ZCLOSE + 4:
				3899	case ZCLOSE + 5:
				3900	case ZCLOSE + 6:
				3901	case ZCLOSE + 7:
				3902	case ZCLOSE + 8:
				3903	case ZCLOSE + 9:
				3904	{
				3905	int no;
				3906	save_se_T save;
				3907
				3908	no = op - ZCLOSE;
				3909	cleanup_zsubexpr();
				3910	save_se(&save, &reg_endzpos[no], &reg_endzp[no]);
				3911
				3912	if (regmatch(next))
				3913	return TRUE;
				3914
				3915	restore_se(&save, &reg_endzpos[no], &reg_endzp[no]);
				3916	return FALSE;
				3917	}
				3918	/* break; Not Reached */
				3919	#endif
				3920
				3921	case BACKREF + 1:
				3922	case BACKREF + 2:
				3923	case BACKREF + 3:
				3924	case BACKREF + 4:
				3925	case BACKREF + 5:
				3926	case BACKREF + 6:
				3927	case BACKREF + 7:
				3928	case BACKREF + 8:
				3929	case BACKREF + 9:
				3930	{
				3931	int no;
				3932	int len;
				3933	linenr_T clnum;
				3934	colnr_T ccol;
				3935	char_u *p;
				3936
				3937	no = op - BACKREF;
				3938	cleanup_subexpr();
				3939	if (!REG_MULTI) /* Single-line regexp */
				3940	{
				3941	if (reg_endp[no] == NULL)
				3942	{
				3943	/* Backref was not set: Match an empty string. */
				3944	len = 0;
				3945	}
				3946	else
				3947	{
				3948	/* Compare current input with back-ref in the same
				3949	* line. */
				3950	len = (int)(reg_endp[no] - reg_startp[no]);
				3951	if (cstrncmp(reg_startp[no], reginput, &len) != 0)
				3952	return FALSE;
				3953	}
				3954	}
				3955	else /* Multi-line regexp */
				3956	{
				3957	if (reg_endpos[no].lnum < 0)
				3958	{
				3959	/* Backref was not set: Match an empty string. */
				3960	len = 0;
				3961	}
				3962	else
				3963	{
				3964	if (reg_startpos[no].lnum == reglnum
				3965	&& reg_endpos[no].lnum == reglnum)
				3966	{
				3967	/* Compare back-ref within the current line. */
				3968	len = reg_endpos[no].col - reg_startpos[no].col;
				3969	if (cstrncmp(regline + reg_startpos[no].col,
				3970	reginput, &len) != 0)
				3971	return FALSE;
				3972	}
				3973	else
				3974	{
				3975	/* Messy situation: Need to compare between two
				3976	* lines. */
				3977	ccol = reg_startpos[no].col;
				3978	clnum = reg_startpos[no].lnum;
				3979	for (;;)
				3980	{
				3981	/* Since getting one line may invalidate
				3982	* the other, need to make copy. Slow! */
				3983	if (regline != reg_tofree)
				3984	{
				3985	len = (int)STRLEN(regline);
				3986	if (reg_tofree == NULL
				3987	\|\| len >= (int)reg_tofreelen)
				3988	{
				3989	len += 50; /* get some extra */
				3990	vim_free(reg_tofree);
				3991	reg_tofree = alloc(len);
				3992	if (reg_tofree == NULL)
				3993	return FALSE; /* out of memory! */
				3994	reg_tofreelen = len;
				3995	}
				3996	STRCPY(reg_tofree, regline);
				3997	reginput = reg_tofree
				3998	+ (reginput - regline);
				3999	regline = reg_tofree;
				4000	}
				4001
				4002	/* Get the line to compare with. */
				4003	p = reg_getline(clnum);
				4004	if (clnum == reg_endpos[no].lnum)
				4005	len = reg_endpos[no].col - ccol;
				4006	else
				4007	len = (int)STRLEN(p + ccol);
				4008
				4009	if (cstrncmp(p + ccol, reginput, &len) != 0)
				4010	return FALSE; /* doesn't match */
				4011	if (clnum == reg_endpos[no].lnum)
				4012	break; /* match and at end! */
				4013	if (reglnum == reg_maxline)
				4014	return FALSE; /* text too short */
				4015
				4016	/* Advance to next line. */
				4017	reg_nextline();
				4018	++clnum;
				4019	ccol = 0;
				4020	if (got_int \|\| out_of_stack)
				4021	return FALSE;
				4022	}
				4023
				4024	/* found a match! Note that regline may now point
				4025	* to a copy of the line, that should not matter. */
				4026	}
				4027	}
				4028	}
				4029
				4030	/* Matched the backref, skip over it. */
				4031	reginput += len;
				4032	}
				4033	break;
				4034
				4035	#ifdef FEAT_SYN_HL
				4036	case ZREF + 1:
				4037	case ZREF + 2:
				4038	case ZREF + 3:
				4039	case ZREF + 4:
				4040	case ZREF + 5:
				4041	case ZREF + 6:
				4042	case ZREF + 7:
				4043	case ZREF + 8:
				4044	case ZREF + 9:
				4045	{
				4046	int no;
				4047	int len;
				4048
				4049	cleanup_zsubexpr();
				4050	no = op - ZREF;
				4051	if (re_extmatch_in != NULL
				4052	&& re_extmatch_in->matches[no] != NULL)
				4053	{
				4054	len = (int)STRLEN(re_extmatch_in->matches[no]);
				4055	if (cstrncmp(re_extmatch_in->matches[no],
				4056	reginput, &len) != 0)
				4057	return FALSE;
				4058	reginput += len;
				4059	}
				4060	else
				4061	{
				4062	/* Backref was not set: Match an empty string. */
				4063	}
				4064	}
				4065	break;
				4066	#endif
				4067
				4068	case BRANCH:
				4069	{
				4070	if (OP(next) != BRANCH) /* No choice. */
				4071	next = OPERAND(scan); /* Avoid recursion. */
				4072	else
				4073	{
				4074	regsave_T save;
				4075
				4076	do
				4077	{
				4078	reg_save(&save);
				4079	if (regmatch(OPERAND(scan)))
				4080	return TRUE;
				4081	reg_restore(&save);
				4082	scan = regnext(scan);
				4083	} while (scan != NULL && OP(scan) == BRANCH);
				4084	return FALSE;
				4085	/* NOTREACHED */
				4086	}
				4087	}
				4088	break;
				4089
				4090	case BRACE_LIMITS:
				4091	{
				4092	int no;
				4093
				4094	if (OP(next) == BRACE_SIMPLE)
				4095	{
				4096	bl_minval = OPERAND_MIN(scan);
				4097	bl_maxval = OPERAND_MAX(scan);
				4098	}
				4099	else if (OP(next) >= BRACE_COMPLEX
				4100	&& OP(next) < BRACE_COMPLEX + 10)
				4101	{
				4102	no = OP(next) - BRACE_COMPLEX;
				4103	brace_min[no] = OPERAND_MIN(scan);
				4104	brace_max[no] = OPERAND_MAX(scan);
				4105	brace_count[no] = 0;
				4106	}
				4107	else
				4108	{
				4109	EMSG(_(e_internal)); /* Shouldn't happen */
				4110	return FALSE;
				4111	}
				4112	}
				4113	break;
				4114
				4115	case BRACE_COMPLEX + 0:
				4116	case BRACE_COMPLEX + 1:
				4117	case BRACE_COMPLEX + 2:
				4118	case BRACE_COMPLEX + 3:
				4119	case BRACE_COMPLEX + 4:
				4120	case BRACE_COMPLEX + 5:
				4121	case BRACE_COMPLEX + 6:
				4122	case BRACE_COMPLEX + 7:
				4123	case BRACE_COMPLEX + 8:
				4124	case BRACE_COMPLEX + 9:
				4125	{
				4126	int no;
				4127	regsave_T save;
				4128
				4129	no = op - BRACE_COMPLEX;
				4130	++brace_count[no];
				4131
				4132	/* If not matched enough times yet, try one more */
				4133	if (brace_count[no] <= (brace_min[no] <= brace_max[no]
				4134	? brace_min[no] : brace_max[no]))
				4135	{
				4136	reg_save(&save);
				4137	if (regmatch(OPERAND(scan)))
				4138	return TRUE;
				4139	reg_restore(&save);
				4140	--brace_count[no]; /* failed, decrement match count */
				4141	return FALSE;
				4142	}
				4143
				4144	/* If matched enough times, may try matching some more */
				4145	if (brace_min[no] <= brace_max[no])
				4146	{
				4147	/* Range is the normal way around, use longest match */
				4148	if (brace_count[no] <= brace_max[no])
				4149	{
				4150	reg_save(&save);
				4151	if (regmatch(OPERAND(scan)))
				4152	return TRUE; /* matched some more times */
				4153	reg_restore(&save);
				4154	--brace_count[no]; /* matched just enough times */
				4155	/* continue with the items after \{} */
				4156	}
				4157	}
				4158	else
				4159	{
				4160	/* Range is backwards, use shortest match first */
				4161	if (brace_count[no] <= brace_min[no])
				4162	{
				4163	reg_save(&save);
				4164	if (regmatch(next))
				4165	return TRUE;
				4166	reg_restore(&save);
				4167	next = OPERAND(scan);
				4168	/* must try to match one more item */
				4169	}
				4170	}
				4171	}
				4172	break;
				4173
				4174	case BRACE_SIMPLE:
				4175	case STAR:
				4176	case PLUS:
				4177	{
				4178	int nextb; /* next byte */
				4179	int nextb_ic; /* next byte reverse case */
				4180	long count;
				4181	regsave_T save;
				4182	long minval;
				4183	long maxval;
				4184
				4185	/*
				4186	* Lookahead to avoid useless match attempts when we know
				4187	* what character comes next.
				4188	*/
				4189	if (OP(next) == EXACTLY)
				4190	{
				4191	nextb = *OPERAND(next);
				4192	if (ireg_ic)
				4193	{
				4194	if (isupper(nextb))
				4195	nextb_ic = TOLOWER_LOC(nextb);
				4196	else
				4197	nextb_ic = TOUPPER_LOC(nextb);
				4198	}
				4199	else
				4200	nextb_ic = nextb;
				4201	}
				4202	else
				4203	{
				4204	nextb = NUL;
				4205	nextb_ic = NUL;
				4206	}
				4207	if (op != BRACE_SIMPLE)
				4208	{
				4209	minval = (op == STAR) ? 0 : 1;
				4210	maxval = MAX_LIMIT;
				4211	}
				4212	else
				4213	{
				4214	minval = bl_minval;
				4215	maxval = bl_maxval;
				4216	}
				4217
				4218	/*
				4219	* When maxval > minval, try matching as much as possible, up
				4220	* to maxval. When maxval < minval, try matching at least the
				4221	* minimal number (since the range is backwards, that's also
				4222	* maxval!).
				4223	*/
				4224	count = regrepeat(OPERAND(scan), maxval);
				4225	if (got_int)
				4226	return FALSE;
				4227	if (minval <= maxval)
				4228	{
				4229	/* Range is the normal way around, use longest match */
				4230	while (count >= minval)
				4231	{
				4232	/* If it could match, try it. */
				4233	if (nextb == NUL \|\| *reginput == nextb
				4234	\|\| *reginput == nextb_ic)
				4235	{
				4236	reg_save(&save);
				4237	if (regmatch(next))
				4238	return TRUE;
				4239	reg_restore(&save);
				4240	}
				4241	/* Couldn't or didn't match -- back up one char. */
				4242	if (--count < minval)
				4243	break;
				4244	if (reginput == regline)
				4245	{
				4246	/* backup to last char of previous line */
				4247	--reglnum;
				4248	regline = reg_getline(reglnum);
				4249	/* Just in case regrepeat() didn't count right. */
				4250	if (regline == NULL)
				4251	return FALSE;
				4252	reginput = regline + STRLEN(regline);
				4253	fast_breakcheck();
				4254	if (got_int \|\| out_of_stack)
				4255	return FALSE;
				4256	}
				4257	else
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4258	mb_ptr_back(regline, reginput);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4259	}
				4260	}
				4261	else
				4262	{
				4263	/* Range is backwards, use shortest match first.
				4264	* Careful: maxval and minval are exchanged! */
				4265	if (count < maxval)
				4266	return FALSE;
				4267	for (;;)
				4268	{
				4269	/* If it could work, try it. */
				4270	if (nextb == NUL \|\| *reginput == nextb
				4271	\|\| *reginput == nextb_ic)
				4272	{
				4273	reg_save(&save);
				4274	if (regmatch(next))
				4275	return TRUE;
				4276	reg_restore(&save);
				4277	}
				4278	/* Couldn't or didn't match: try advancing one char. */
				4279	if (count == minval
				4280	\|\| regrepeat(OPERAND(scan), 1L) == 0)
				4281	break;
				4282	++count;
				4283	if (got_int \|\| out_of_stack)
				4284	return FALSE;
				4285	}
				4286	}
				4287	return FALSE;
				4288	}
				4289	/* break; Not Reached */
				4290
				4291	case NOMATCH:
				4292	{
				4293	regsave_T save;
				4294
				4295	/* If the operand matches, we fail. Otherwise backup and
				4296	* continue with the next item. */
				4297	reg_save(&save);
				4298	if (regmatch(OPERAND(scan)))
				4299	return FALSE;
				4300	reg_restore(&save);
				4301	}
				4302	break;
				4303
				4304	case MATCH:
				4305	case SUBPAT:
				4306	{
				4307	regsave_T save;
				4308
				4309	/* If the operand doesn't match, we fail. Otherwise backup
				4310	* and continue with the next item. */
				4311	reg_save(&save);
				4312	if (!regmatch(OPERAND(scan)))
				4313	return FALSE;
				4314	if (op == MATCH) /* zero-width */
				4315	reg_restore(&save);
				4316	}
				4317	break;
				4318
				4319	case BEHIND:
				4320	case NOBEHIND:
				4321	{
				4322	regsave_T save_after, save_start;
				4323	regsave_T save_behind_pos;
				4324	int needmatch = (op == BEHIND);
				4325
				4326	/*
				4327	* Look back in the input of the operand matches or not. This
				4328	* must be done at every position in the input and checking if
				4329	* the match ends at the current position.
				4330	* First check if the next item matches, that's probably
				4331	* faster.
				4332	*/
				4333	reg_save(&save_start);
				4334	if (regmatch(next))
				4335	{
				4336	/* save the position after the found match for next */
				4337	reg_save(&save_after);
				4338
				4339	/* start looking for a match with operand at the current
				4340	* postion. Go back one character until we find the
				4341	* result, hitting the start of the line or the previous
				4342	* line (for multi-line matching).
				4343	* Set behind_pos to where the match should end, BHPOS
				4344	* will match it. */
				4345	save_behind_pos = behind_pos;
				4346	behind_pos = save_start;
				4347	for (;;)
				4348	{
				4349	reg_restore(&save_start);
				4350	if (regmatch(OPERAND(scan))
				4351	&& reg_save_equal(&behind_pos))
				4352	{
				4353	behind_pos = save_behind_pos;
				4354	/* found a match that ends where "next" started */
				4355	if (needmatch)
				4356	{
				4357	reg_restore(&save_after);
				4358	return TRUE;
				4359	}
				4360	return FALSE;
				4361	}
				4362	/*
				4363	* No match: Go back one character. May go to
				4364	* previous line once.
				4365	*/
				4366	if (REG_MULTI)
				4367	{
				4368	if (save_start.rs_u.pos.col == 0)
				4369	{
				4370	if (save_start.rs_u.pos.lnum
				4371	< behind_pos.rs_u.pos.lnum
				4372	\|\| reg_getline(
				4373	--save_start.rs_u.pos.lnum) == NULL)
				4374	break;
				4375	reg_restore(&save_start);
				4376	save_start.rs_u.pos.col =
				4377	(colnr_T)STRLEN(regline);
				4378	}
				4379	else
				4380	--save_start.rs_u.pos.col;
				4381	}
				4382	else
				4383	{
				4384	if (save_start.rs_u.ptr == regline)
				4385	break;
				4386	--save_start.rs_u.ptr;
				4387	}
				4388	}
				4389
				4390	/* NOBEHIND succeeds when no match was found */
				4391	behind_pos = save_behind_pos;
				4392	if (!needmatch)
				4393	{
				4394	reg_restore(&save_after);
				4395	return TRUE;
				4396	}
				4397	}
				4398	return FALSE;
				4399	}
				4400
				4401	case BHPOS:
				4402	if (REG_MULTI)
				4403	{
				4404	if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline)
				4405	\|\| behind_pos.rs_u.pos.lnum != reglnum)
				4406	return FALSE;
				4407	}
				4408	else if (behind_pos.rs_u.ptr != reginput)
				4409	return FALSE;
				4410	break;
				4411
				4412	case NEWL:
				4413	if ((c != NUL \|\| reglnum == reg_maxline)
				4414	&& (c != '\n' \|\| !reg_line_lbr))
				4415	return FALSE;
				4416	if (reg_line_lbr)
				4417	ADVANCE_REGINPUT();
				4418	else
				4419	reg_nextline();
				4420	break;
				4421
				4422	case END:
				4423	return TRUE; /* Success! */
				4424
				4425	default:
				4426	EMSG(_(e_re_corr));
				4427	#ifdef DEBUG
				4428	printf("Illegal op code %d\n", op);
				4429	#endif
				4430	return FALSE;
				4431	}
				4432	}
				4433
				4434	scan = next;
				4435	}
				4436
				4437	/*
				4438	* We get here only if there's trouble -- normally "case END" is the
				4439	* terminating point.
				4440	*/
				4441	EMSG(_(e_re_corr));
				4442	#ifdef DEBUG
				4443	printf("Premature EOL\n");
				4444	#endif
				4445	return FALSE;
				4446	}
				4447
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4448	/*
				4449	* regrepeat - repeatedly match something simple, return how many.
				4450	* Advances reginput (and reglnum) to just after the matched chars.
				4451	*/
				4452	static int
				4453	regrepeat(p, maxcount)
				4454	char_u *p;
				4455	long maxcount; /* maximum number of matches allowed */
				4456	{
				4457	long count = 0;
				4458	char_u *scan;
				4459	char_u *opnd;
				4460	int mask;
				4461	int testval = 0;
				4462
				4463	scan = reginput; /* Make local copy of reginput for speed. */
				4464	opnd = OPERAND(p);
				4465	switch (OP(p))
				4466	{
				4467	case ANY:
				4468	case ANY + ADD_NL:
				4469	while (count < maxcount)
				4470	{
				4471	/* Matching anything means we continue until end-of-line (or
				4472	* end-of-file for ANY + ADD_NL), only limited by maxcount. */
				4473	while (*scan != NUL && count < maxcount)
				4474	{
				4475	++count;
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4476	mb_ptr_adv(scan);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4477	}
				4478	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline \|\| count == maxcount)
				4479	break;
				4480	++count; /* count the line-break */
				4481	reg_nextline();
				4482	scan = reginput;
				4483	if (got_int)
				4484	break;
				4485	}
				4486	break;
				4487
				4488	case IDENT:
				4489	case IDENT + ADD_NL:
				4490	testval = TRUE;
				4491	/FALLTHROUGH/
				4492	case SIDENT:
				4493	case SIDENT + ADD_NL:
				4494	while (count < maxcount)
				4495	{
				4496	if (vim_isIDc(scan) && (testval \|\| !VIM_ISDIGIT(scan)))
				4497	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4498	mb_ptr_adv(scan);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4499	}
				4500	else if (*scan == NUL)
				4501	{
				4502	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4503	break;
				4504	reg_nextline();
				4505	scan = reginput;
				4506	if (got_int)
				4507	break;
				4508	}
				4509	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4510	++scan;
				4511	else
				4512	break;
				4513	++count;
				4514	}
				4515	break;
				4516
				4517	case KWORD:
				4518	case KWORD + ADD_NL:
				4519	testval = TRUE;
				4520	/FALLTHROUGH/
				4521	case SKWORD:
				4522	case SKWORD + ADD_NL:
				4523	while (count < maxcount)
				4524	{
				4525	if (vim_iswordp(scan) && (testval \|\| !VIM_ISDIGIT(*scan)))
				4526	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4527	mb_ptr_adv(scan);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4528	}
				4529	else if (*scan == NUL)
				4530	{
				4531	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4532	break;
				4533	reg_nextline();
				4534	scan = reginput;
				4535	if (got_int)
				4536	break;
				4537	}
				4538	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4539	++scan;
				4540	else
				4541	break;
				4542	++count;
				4543	}
				4544	break;
				4545
				4546	case FNAME:
				4547	case FNAME + ADD_NL:
				4548	testval = TRUE;
				4549	/FALLTHROUGH/
				4550	case SFNAME:
				4551	case SFNAME + ADD_NL:
				4552	while (count < maxcount)
				4553	{
				4554	if (vim_isfilec(scan) && (testval \|\| !VIM_ISDIGIT(scan)))
				4555	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4556	mb_ptr_adv(scan);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4557	}
				4558	else if (*scan == NUL)
				4559	{
				4560	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4561	break;
				4562	reg_nextline();
				4563	scan = reginput;
				4564	if (got_int)
				4565	break;
				4566	}
				4567	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4568	++scan;
				4569	else
				4570	break;
				4571	++count;
				4572	}
				4573	break;
				4574
				4575	case PRINT:
				4576	case PRINT + ADD_NL:
				4577	testval = TRUE;
				4578	/FALLTHROUGH/
				4579	case SPRINT:
				4580	case SPRINT + ADD_NL:
				4581	while (count < maxcount)
				4582	{
				4583	if (*scan == NUL)
				4584	{
				4585	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4586	break;
				4587	reg_nextline();
				4588	scan = reginput;
				4589	if (got_int)
				4590	break;
				4591	}
				4592	else if (ptr2cells(scan) == 1 && (testval \|\| !VIM_ISDIGIT(*scan)))
				4593	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4594	mb_ptr_adv(scan);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4595	}
				4596	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4597	++scan;
				4598	else
				4599	break;
				4600	++count;
				4601	}
				4602	break;
				4603
				4604	case WHITE:
				4605	case WHITE + ADD_NL:
				4606	testval = mask = RI_WHITE;
				4607	do_class:
				4608	while (count < maxcount)
				4609	{
				4610	#ifdef FEAT_MBYTE
				4611	int l;
				4612	#endif
				4613	if (*scan == NUL)
				4614	{
				4615	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4616	break;
				4617	reg_nextline();
				4618	scan = reginput;
				4619	if (got_int)
				4620	break;
				4621	}
				4622	#ifdef FEAT_MBYTE
				4623	else if (has_mbyte && (l = (*mb_ptr2len_check)(scan)) > 1)
				4624	{
				4625	if (testval != 0)
				4626	break;
				4627	scan += l;
				4628	}
				4629	#endif
				4630	else if ((class_tab[*scan] & mask) == testval)
				4631	++scan;
				4632	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4633	++scan;
				4634	else
				4635	break;
				4636	++count;
				4637	}
				4638	break;
				4639
				4640	case NWHITE:
				4641	case NWHITE + ADD_NL:
				4642	mask = RI_WHITE;
				4643	goto do_class;
				4644	case DIGIT:
				4645	case DIGIT + ADD_NL:
				4646	testval = mask = RI_DIGIT;
				4647	goto do_class;
				4648	case NDIGIT:
				4649	case NDIGIT + ADD_NL:
				4650	mask = RI_DIGIT;
				4651	goto do_class;
				4652	case HEX:
				4653	case HEX + ADD_NL:
				4654	testval = mask = RI_HEX;
				4655	goto do_class;
				4656	case NHEX:
				4657	case NHEX + ADD_NL:
				4658	mask = RI_HEX;
				4659	goto do_class;
				4660	case OCTAL:
				4661	case OCTAL + ADD_NL:
				4662	testval = mask = RI_OCTAL;
				4663	goto do_class;
				4664	case NOCTAL:
				4665	case NOCTAL + ADD_NL:
				4666	mask = RI_OCTAL;
				4667	goto do_class;
				4668	case WORD:
				4669	case WORD + ADD_NL:
				4670	testval = mask = RI_WORD;
				4671	goto do_class;
				4672	case NWORD:
				4673	case NWORD + ADD_NL:
				4674	mask = RI_WORD;
				4675	goto do_class;
				4676	case HEAD:
				4677	case HEAD + ADD_NL:
				4678	testval = mask = RI_HEAD;
				4679	goto do_class;
				4680	case NHEAD:
				4681	case NHEAD + ADD_NL:
				4682	mask = RI_HEAD;
				4683	goto do_class;
				4684	case ALPHA:
				4685	case ALPHA + ADD_NL:
				4686	testval = mask = RI_ALPHA;
				4687	goto do_class;
				4688	case NALPHA:
				4689	case NALPHA + ADD_NL:
				4690	mask = RI_ALPHA;
				4691	goto do_class;
				4692	case LOWER:
				4693	case LOWER + ADD_NL:
				4694	testval = mask = RI_LOWER;
				4695	goto do_class;
				4696	case NLOWER:
				4697	case NLOWER + ADD_NL:
				4698	mask = RI_LOWER;
				4699	goto do_class;
				4700	case UPPER:
				4701	case UPPER + ADD_NL:
				4702	testval = mask = RI_UPPER;
				4703	goto do_class;
				4704	case NUPPER:
				4705	case NUPPER + ADD_NL:
				4706	mask = RI_UPPER;
				4707	goto do_class;
				4708
				4709	case EXACTLY:
				4710	{
				4711	int cu, cl;
				4712
				4713	/* This doesn't do a multi-byte character, because a MULTIBYTECODE
				4714	* would have been used for it. */
				4715	if (ireg_ic)
				4716	{
				4717	cu = TOUPPER_LOC(*opnd);
				4718	cl = TOLOWER_LOC(*opnd);
				4719	while (count < maxcount && (scan == cu \|\| scan == cl))
				4720	{
				4721	count++;
				4722	scan++;
				4723	}
				4724	}
				4725	else
				4726	{
				4727	cu = *opnd;
				4728	while (count < maxcount && *scan == cu)
				4729	{
				4730	count++;
				4731	scan++;
				4732	}
				4733	}
				4734	break;
				4735	}
				4736
				4737	#ifdef FEAT_MBYTE
				4738	case MULTIBYTECODE:
				4739	{
				4740	int i, len, cf = 0;
				4741
				4742	/* Safety check (just in case 'encoding' was changed since
				4743	* compiling the program). */
				4744	if ((len = (*mb_ptr2len_check)(opnd)) > 1)
				4745	{
				4746	if (ireg_ic && enc_utf8)
				4747	cf = utf_fold(utf_ptr2char(opnd));
				4748	while (count < maxcount)
				4749	{
				4750	for (i = 0; i < len; ++i)
				4751	if (opnd[i] != scan[i])
				4752	break;
				4753	if (i < len && (!ireg_ic \|\| !enc_utf8
				4754	\|\| utf_fold(utf_ptr2char(scan)) != cf))
				4755	break;
				4756	scan += len;
				4757	++count;
				4758	}
				4759	}
				4760	}
				4761	break;
				4762	#endif
				4763
				4764	case ANYOF:
				4765	case ANYOF + ADD_NL:
				4766	testval = TRUE;
				4767	/FALLTHROUGH/
				4768
				4769	case ANYBUT:
				4770	case ANYBUT + ADD_NL:
				4771	while (count < maxcount)
				4772	{
				4773	#ifdef FEAT_MBYTE
				4774	int len;
				4775	#endif
				4776	if (*scan == NUL)
				4777	{
				4778	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4779	break;
				4780	reg_nextline();
				4781	scan = reginput;
				4782	if (got_int)
				4783	break;
				4784	}
				4785	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4786	++scan;
				4787	#ifdef FEAT_MBYTE
				4788	else if (has_mbyte && (len = (*mb_ptr2len_check)(scan)) > 1)
				4789	{
				4790	if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
				4791	break;
				4792	scan += len;
				4793	}
				4794	#endif
				4795	else
				4796	{
				4797	if ((cstrchr(opnd, *scan) == NULL) == testval)
				4798	break;
				4799	++scan;
				4800	}
				4801	++count;
				4802	}
				4803	break;
				4804
				4805	case NEWL:
				4806	while (count < maxcount
				4807	&& ((*scan == NUL && reglnum < reg_maxline)
				4808	\|\| (*scan == '\n' && reg_line_lbr)))
				4809	{
				4810	count++;
				4811	if (reg_line_lbr)
				4812	ADVANCE_REGINPUT();
				4813	else
				4814	reg_nextline();
				4815	scan = reginput;
				4816	if (got_int)
				4817	break;
				4818	}
				4819	break;
				4820
				4821	default: /* Oh dear. Called inappropriately. */
				4822	EMSG(_(e_re_corr));
				4823	#ifdef DEBUG
				4824	printf("Called regrepeat with op code %d\n", OP(p));
				4825	#endif
				4826	break;
				4827	}
				4828
				4829	reginput = scan;
				4830
				4831	return (int)count;
				4832	}
				4833
				4834	/*
				4835	* regnext - dig the "next" pointer out of a node
				4836	*/
				4837	static char_u *
				4838	regnext(p)
				4839	char_u *p;
				4840	{
				4841	int offset;
				4842
				4843	if (p == JUST_CALC_SIZE)
				4844	return NULL;
				4845
				4846	offset = NEXT(p);
				4847	if (offset == 0)
				4848	return NULL;
				4849
				4850	if (OP(p) == BACK)
				4851	return p - offset;
				4852	else
				4853	return p + offset;
				4854	}
				4855
				4856	/*
				4857	* Check the regexp program for its magic number.
				4858	* Return TRUE if it's wrong.
				4859	*/
				4860	static int
				4861	prog_magic_wrong()
				4862	{
				4863	if (UCHARAT(REG_MULTI
				4864	? reg_mmatch->regprog->program
				4865	: reg_match->regprog->program) != REGMAGIC)
				4866	{
				4867	EMSG(_(e_re_corr));
				4868	return TRUE;
				4869	}
				4870	return FALSE;
				4871	}
				4872
				4873	/*
				4874	* Cleanup the subexpressions, if this wasn't done yet.
				4875	* This construction is used to clear the subexpressions only when they are
				4876	* used (to increase speed).
				4877	*/
				4878	static void
				4879	cleanup_subexpr()
				4880	{
				4881	if (need_clear_subexpr)
				4882	{
				4883	if (REG_MULTI)
				4884	{
				4885	/* Use 0xff to set lnum to -1 */
				4886	vim_memset(reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				4887	vim_memset(reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				4888	}
				4889	else
				4890	{
				4891	vim_memset(reg_startp, 0, sizeof(char_u ) NSUBEXP);
				4892	vim_memset(reg_endp, 0, sizeof(char_u ) NSUBEXP);
				4893	}
				4894	need_clear_subexpr = FALSE;
				4895	}
				4896	}
				4897
				4898	#ifdef FEAT_SYN_HL
				4899	static void
				4900	cleanup_zsubexpr()
				4901	{
				4902	if (need_clear_zsubexpr)
				4903	{
				4904	if (REG_MULTI)
				4905	{
				4906	/* Use 0xff to set lnum to -1 */
				4907	vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				4908	vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				4909	}
				4910	else
				4911	{
				4912	vim_memset(reg_startzp, 0, sizeof(char_u ) NSUBEXP);
				4913	vim_memset(reg_endzp, 0, sizeof(char_u ) NSUBEXP);
				4914	}
				4915	need_clear_zsubexpr = FALSE;
				4916	}
				4917	}
				4918	#endif
				4919
				4920	/*
				4921	* Advance reglnum, regline and reginput to the next line.
				4922	*/
				4923	static void
				4924	reg_nextline()
				4925	{
				4926	regline = reg_getline(++reglnum);
				4927	reginput = regline;
				4928	fast_breakcheck();
				4929	}
				4930
				4931	/*
				4932	* Save the input line and position in a regsave_T.
				4933	*/
				4934	static void
				4935	reg_save(save)
				4936	regsave_T *save;
				4937	{
				4938	if (REG_MULTI)
				4939	{
				4940	save->rs_u.pos.col = (colnr_T)(reginput - regline);
				4941	save->rs_u.pos.lnum = reglnum;
				4942	}
				4943	else
				4944	save->rs_u.ptr = reginput;
				4945	}
				4946
				4947	/*
				4948	* Restore the input line and position from a regsave_T.
				4949	*/
				4950	static void
				4951	reg_restore(save)
				4952	regsave_T *save;
				4953	{
				4954	if (REG_MULTI)
				4955	{
				4956	if (reglnum != save->rs_u.pos.lnum)
				4957	{
				4958	/* only call reg_getline() when the line number changed to save
				4959	* a bit of time */
				4960	reglnum = save->rs_u.pos.lnum;
				4961	regline = reg_getline(reglnum);
				4962	}
				4963	reginput = regline + save->rs_u.pos.col;
				4964	}
				4965	else
				4966	reginput = save->rs_u.ptr;
				4967	}
				4968
				4969	/*
				4970	* Return TRUE if current position is equal to saved position.
				4971	*/
				4972	static int
				4973	reg_save_equal(save)
				4974	regsave_T *save;
				4975	{
				4976	if (REG_MULTI)
				4977	return reglnum == save->rs_u.pos.lnum
				4978	&& reginput == regline + save->rs_u.pos.col;
				4979	return reginput == save->rs_u.ptr;
				4980	}
				4981
				4982	/*
				4983	* Tentatively set the sub-expression start to the current position (after
				4984	* calling regmatch() they will have changed). Need to save the existing
				4985	* values for when there is no match.
				4986	* Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
				4987	* depending on REG_MULTI.
				4988	*/
				4989	static void
				4990	save_se_multi(savep, posp)
				4991	save_se_T *savep;
				4992	lpos_T *posp;
				4993	{
				4994	savep->se_u.pos = *posp;
				4995	posp->lnum = reglnum;
				4996	posp->col = (colnr_T)(reginput - regline);
				4997	}
				4998
				4999	static void
				5000	save_se_one(savep, pp)
				5001	save_se_T *savep;
				5002	char_u **pp;
				5003	{
				5004	savep->se_u.ptr = *pp;
				5005	*pp = reginput;
				5006	}
				5007
				5008	/*
				5009	* Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
				5010	*/
				5011	static int
				5012	re_num_cmp(val, scan)
				5013	long_u val;
				5014	char_u *scan;
				5015	{
				5016	long_u n = OPERAND_MIN(scan);
				5017
				5018	if (OPERAND_CMP(scan) == '>')
				5019	return val > n;
				5020	if (OPERAND_CMP(scan) == '<')
				5021	return val < n;
				5022	return val == n;
				5023	}
				5024
				5025
				5026	#ifdef DEBUG
				5027
				5028	/*
				5029	* regdump - dump a regexp onto stdout in vaguely comprehensible form
				5030	*/
				5031	static void
				5032	regdump(pattern, r)
				5033	char_u *pattern;
				5034	regprog_T *r;
				5035	{
				5036	char_u *s;
				5037	int op = EXACTLY; /* Arbitrary non-END op. */
				5038	char_u *next;
				5039	char_u *end = NULL;
				5040
				5041	printf("\r\nregcomp(%s):\r\n", pattern);
				5042
				5043	s = r->program + 1;
				5044	/*
				5045	* Loop until we find the END that isn't before a referred next (an END
				5046	* can also appear in a NOMATCH operand).
				5047	*/
				5048	while (op != END \|\| s <= end)
				5049	{
				5050	op = OP(s);
				5051	printf("%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
				5052	next = regnext(s);
				5053	if (next == NULL) /* Next ptr. */
				5054	printf("(0)");
				5055	else
				5056	printf("(%d)", (int)((s - r->program) + (next - s)));
				5057	if (end < next)
				5058	end = next;
				5059	if (op == BRACE_LIMITS)
				5060	{
				5061	/* Two short ints */
				5062	printf(" minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
				5063	s += 8;
				5064	}
				5065	s += 3;
				5066	if (op == ANYOF \|\| op == ANYOF + ADD_NL
				5067	\|\| op == ANYBUT \|\| op == ANYBUT + ADD_NL
				5068	\|\| op == EXACTLY)
				5069	{
				5070	/* Literal string, where present. */
				5071	while (*s != NUL)
				5072	printf("%c", *s++);
				5073	s++;
				5074	}
				5075	printf("\r\n");
				5076	}
				5077
				5078	/* Header fields of interest. */
				5079	if (r->regstart != NUL)
				5080	printf("start `%s' 0x%x; ", r->regstart < 256
				5081	? (char *)transchar(r->regstart)
				5082	: "multibyte", r->regstart);
				5083	if (r->reganch)
				5084	printf("anchored; ");
				5085	if (r->regmust != NULL)
				5086	printf("must have \"%s\"", r->regmust);
				5087	printf("\r\n");
				5088	}
				5089
				5090	/*
				5091	* regprop - printable representation of opcode
				5092	*/
				5093	static char_u *
				5094	regprop(op)
				5095	char_u *op;
				5096	{
				5097	char_u *p;
				5098	static char_u buf[50];
				5099
				5100	(void) strcpy(buf, ":");
				5101
				5102	switch (OP(op))
				5103	{
				5104	case BOL:
				5105	p = "BOL";
				5106	break;
				5107	case EOL:
				5108	p = "EOL";
				5109	break;
				5110	case RE_BOF:
				5111	p = "BOF";
				5112	break;
				5113	case RE_EOF:
				5114	p = "EOF";
				5115	break;
				5116	case CURSOR:
				5117	p = "CURSOR";
				5118	break;
				5119	case RE_LNUM:
				5120	p = "RE_LNUM";
				5121	break;
				5122	case RE_COL:
				5123	p = "RE_COL";
				5124	break;
				5125	case RE_VCOL:
				5126	p = "RE_VCOL";
				5127	break;
				5128	case BOW:
				5129	p = "BOW";
				5130	break;
				5131	case EOW:
				5132	p = "EOW";
				5133	break;
				5134	case ANY:
				5135	p = "ANY";
				5136	break;
				5137	case ANY + ADD_NL:
				5138	p = "ANY+NL";
				5139	break;
				5140	case ANYOF:
				5141	p = "ANYOF";
				5142	break;
				5143	case ANYOF + ADD_NL:
				5144	p = "ANYOF+NL";
				5145	break;
				5146	case ANYBUT:
				5147	p = "ANYBUT";
				5148	break;
				5149	case ANYBUT + ADD_NL:
				5150	p = "ANYBUT+NL";
				5151	break;
				5152	case IDENT:
				5153	p = "IDENT";
				5154	break;
				5155	case IDENT + ADD_NL:
				5156	p = "IDENT+NL";
				5157	break;
				5158	case SIDENT:
				5159	p = "SIDENT";
				5160	break;
				5161	case SIDENT + ADD_NL:
				5162	p = "SIDENT+NL";
				5163	break;
				5164	case KWORD:
				5165	p = "KWORD";
				5166	break;
				5167	case KWORD + ADD_NL:
				5168	p = "KWORD+NL";
				5169	break;
				5170	case SKWORD:
				5171	p = "SKWORD";
				5172	break;
				5173	case SKWORD + ADD_NL:
				5174	p = "SKWORD+NL";
				5175	break;
				5176	case FNAME:
				5177	p = "FNAME";
				5178	break;
				5179	case FNAME + ADD_NL:
				5180	p = "FNAME+NL";
				5181	break;
				5182	case SFNAME:
				5183	p = "SFNAME";
				5184	break;
				5185	case SFNAME + ADD_NL:
				5186	p = "SFNAME+NL";
				5187	break;
				5188	case PRINT:
				5189	p = "PRINT";
				5190	break;
				5191	case PRINT + ADD_NL:
				5192	p = "PRINT+NL";
				5193	break;
				5194	case SPRINT:
				5195	p = "SPRINT";
				5196	break;
				5197	case SPRINT + ADD_NL:
				5198	p = "SPRINT+NL";
				5199	break;
				5200	case WHITE:
				5201	p = "WHITE";
				5202	break;
				5203	case WHITE + ADD_NL:
				5204	p = "WHITE+NL";
				5205	break;
				5206	case NWHITE:
				5207	p = "NWHITE";
				5208	break;
				5209	case NWHITE + ADD_NL:
				5210	p = "NWHITE+NL";
				5211	break;
				5212	case DIGIT:
				5213	p = "DIGIT";
				5214	break;
				5215	case DIGIT + ADD_NL:
				5216	p = "DIGIT+NL";
				5217	break;
				5218	case NDIGIT:
				5219	p = "NDIGIT";
				5220	break;
				5221	case NDIGIT + ADD_NL:
				5222	p = "NDIGIT+NL";
				5223	break;
				5224	case HEX:
				5225	p = "HEX";
				5226	break;
				5227	case HEX + ADD_NL:
				5228	p = "HEX+NL";
				5229	break;
				5230	case NHEX:
				5231	p = "NHEX";
				5232	break;
				5233	case NHEX + ADD_NL:
				5234	p = "NHEX+NL";
				5235	break;
				5236	case OCTAL:
				5237	p = "OCTAL";
				5238	break;
				5239	case OCTAL + ADD_NL:
				5240	p = "OCTAL+NL";
				5241	break;
				5242	case NOCTAL:
				5243	p = "NOCTAL";
				5244	break;
				5245	case NOCTAL + ADD_NL:
				5246	p = "NOCTAL+NL";
				5247	break;
				5248	case WORD:
				5249	p = "WORD";
				5250	break;
				5251	case WORD + ADD_NL:
				5252	p = "WORD+NL";
				5253	break;
				5254	case NWORD:
				5255	p = "NWORD";
				5256	break;
				5257	case NWORD + ADD_NL:
				5258	p = "NWORD+NL";
				5259	break;
				5260	case HEAD:
				5261	p = "HEAD";
				5262	break;
				5263	case HEAD + ADD_NL:
				5264	p = "HEAD+NL";
				5265	break;
				5266	case NHEAD:
				5267	p = "NHEAD";
				5268	break;
				5269	case NHEAD + ADD_NL:
				5270	p = "NHEAD+NL";
				5271	break;
				5272	case ALPHA:
				5273	p = "ALPHA";
				5274	break;
				5275	case ALPHA + ADD_NL:
				5276	p = "ALPHA+NL";
				5277	break;
				5278	case NALPHA:
				5279	p = "NALPHA";
				5280	break;
				5281	case NALPHA + ADD_NL:
				5282	p = "NALPHA+NL";
				5283	break;
				5284	case LOWER:
				5285	p = "LOWER";
				5286	break;
				5287	case LOWER + ADD_NL:
				5288	p = "LOWER+NL";
				5289	break;
				5290	case NLOWER:
				5291	p = "NLOWER";
				5292	break;
				5293	case NLOWER + ADD_NL:
				5294	p = "NLOWER+NL";
				5295	break;
				5296	case UPPER:
				5297	p = "UPPER";
				5298	break;
				5299	case UPPER + ADD_NL:
				5300	p = "UPPER+NL";
				5301	break;
				5302	case NUPPER:
				5303	p = "NUPPER";
				5304	break;
				5305	case NUPPER + ADD_NL:
				5306	p = "NUPPER+NL";
				5307	break;
				5308	case BRANCH:
				5309	p = "BRANCH";
				5310	break;
				5311	case EXACTLY:
				5312	p = "EXACTLY";
				5313	break;
				5314	case NOTHING:
				5315	p = "NOTHING";
				5316	break;
				5317	case BACK:
				5318	p = "BACK";
				5319	break;
				5320	case END:
				5321	p = "END";
				5322	break;
				5323	case MOPEN + 0:
				5324	p = "MATCH START";
				5325	break;
				5326	case MOPEN + 1:
				5327	case MOPEN + 2:
				5328	case MOPEN + 3:
				5329	case MOPEN + 4:
				5330	case MOPEN + 5:
				5331	case MOPEN + 6:
				5332	case MOPEN + 7:
				5333	case MOPEN + 8:
				5334	case MOPEN + 9:
				5335	sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
				5336	p = NULL;
				5337	break;
				5338	case MCLOSE + 0:
				5339	p = "MATCH END";
				5340	break;
				5341	case MCLOSE + 1:
				5342	case MCLOSE + 2:
				5343	case MCLOSE + 3:
				5344	case MCLOSE + 4:
				5345	case MCLOSE + 5:
				5346	case MCLOSE + 6:
				5347	case MCLOSE + 7:
				5348	case MCLOSE + 8:
				5349	case MCLOSE + 9:
				5350	sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
				5351	p = NULL;
				5352	break;
				5353	case BACKREF + 1:
				5354	case BACKREF + 2:
				5355	case BACKREF + 3:
				5356	case BACKREF + 4:
				5357	case BACKREF + 5:
				5358	case BACKREF + 6:
				5359	case BACKREF + 7:
				5360	case BACKREF + 8:
				5361	case BACKREF + 9:
				5362	sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
				5363	p = NULL;
				5364	break;
				5365	case NOPEN:
				5366	p = "NOPEN";
				5367	break;
				5368	case NCLOSE:
				5369	p = "NCLOSE";
				5370	break;
				5371	#ifdef FEAT_SYN_HL
				5372	case ZOPEN + 1:
				5373	case ZOPEN + 2:
				5374	case ZOPEN + 3:
				5375	case ZOPEN + 4:
				5376	case ZOPEN + 5:
				5377	case ZOPEN + 6:
				5378	case ZOPEN + 7:
				5379	case ZOPEN + 8:
				5380	case ZOPEN + 9:
				5381	sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
				5382	p = NULL;
				5383	break;
				5384	case ZCLOSE + 1:
				5385	case ZCLOSE + 2:
				5386	case ZCLOSE + 3:
				5387	case ZCLOSE + 4:
				5388	case ZCLOSE + 5:
				5389	case ZCLOSE + 6:
				5390	case ZCLOSE + 7:
				5391	case ZCLOSE + 8:
				5392	case ZCLOSE + 9:
				5393	sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
				5394	p = NULL;
				5395	break;
				5396	case ZREF + 1:
				5397	case ZREF + 2:
				5398	case ZREF + 3:
				5399	case ZREF + 4:
				5400	case ZREF + 5:
				5401	case ZREF + 6:
				5402	case ZREF + 7:
				5403	case ZREF + 8:
				5404	case ZREF + 9:
				5405	sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
				5406	p = NULL;
				5407	break;
				5408	#endif
				5409	case STAR:
				5410	p = "STAR";
				5411	break;
				5412	case PLUS:
				5413	p = "PLUS";
				5414	break;
				5415	case NOMATCH:
				5416	p = "NOMATCH";
				5417	break;
				5418	case MATCH:
				5419	p = "MATCH";
				5420	break;
				5421	case BEHIND:
				5422	p = "BEHIND";
				5423	break;
				5424	case NOBEHIND:
				5425	p = "NOBEHIND";
				5426	break;
				5427	case SUBPAT:
				5428	p = "SUBPAT";
				5429	break;
				5430	case BRACE_LIMITS:
				5431	p = "BRACE_LIMITS";
				5432	break;
				5433	case BRACE_SIMPLE:
				5434	p = "BRACE_SIMPLE";
				5435	break;
				5436	case BRACE_COMPLEX + 0:
				5437	case BRACE_COMPLEX + 1:
				5438	case BRACE_COMPLEX + 2:
				5439	case BRACE_COMPLEX + 3:
				5440	case BRACE_COMPLEX + 4:
				5441	case BRACE_COMPLEX + 5:
				5442	case BRACE_COMPLEX + 6:
				5443	case BRACE_COMPLEX + 7:
				5444	case BRACE_COMPLEX + 8:
				5445	case BRACE_COMPLEX + 9:
				5446	sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
				5447	p = NULL;
				5448	break;
				5449	#ifdef FEAT_MBYTE
				5450	case MULTIBYTECODE:
				5451	p = "MULTIBYTECODE";
				5452	break;
				5453	#endif
				5454	case NEWL:
				5455	p = "NEWL";
				5456	break;
				5457	default:
				5458	sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
				5459	p = NULL;
				5460	break;
				5461	}
				5462	if (p != NULL)
				5463	(void) strcat(buf, p);
				5464	return buf;
				5465	}
				5466	#endif
				5467
				5468	#ifdef FEAT_MBYTE
				5469	static void mb_decompose __ARGS((int c, int c1, int c2, int *c3));
				5470
				5471	typedef struct
				5472	{
				5473	int a, b, c;
				5474	} decomp_T;
				5475
				5476
				5477	/* 0xfb20 - 0xfb4f */
				5478	decomp_T decomp_table[0xfb4f-0xfb20+1] =
				5479	{
				5480	{0x5e2,0,0}, /* 0xfb20 alt ayin */
				5481	{0x5d0,0,0}, /* 0xfb21 alt alef */
				5482	{0x5d3,0,0}, /* 0xfb22 alt dalet */
				5483	{0x5d4,0,0}, /* 0xfb23 alt he */
				5484	{0x5db,0,0}, /* 0xfb24 alt kaf */
				5485	{0x5dc,0,0}, /* 0xfb25 alt lamed */
				5486	{0x5dd,0,0}, /* 0xfb26 alt mem-sofit */
				5487	{0x5e8,0,0}, /* 0xfb27 alt resh */
				5488	{0x5ea,0,0}, /* 0xfb28 alt tav */
				5489	{'+', 0, 0}, /* 0xfb29 alt plus */
				5490	{0x5e9, 0x5c1, 0}, /* 0xfb2a shin+shin-dot */
				5491	{0x5e9, 0x5c2, 0}, /* 0xfb2b shin+sin-dot */
				5492	{0x5e9, 0x5c1, 0x5bc}, /* 0xfb2c shin+shin-dot+dagesh */
				5493	{0x5e9, 0x5c2, 0x5bc}, /* 0xfb2d shin+sin-dot+dagesh */
				5494	{0x5d0, 0x5b7, 0}, /* 0xfb2e alef+patah */
				5495	{0x5d0, 0x5b8, 0}, /* 0xfb2f alef+qamats */
				5496	{0x5d0, 0x5b4, 0}, /* 0xfb30 alef+hiriq */
				5497	{0x5d1, 0x5bc, 0}, /* 0xfb31 bet+dagesh */
				5498	{0x5d2, 0x5bc, 0}, /* 0xfb32 gimel+dagesh */
				5499	{0x5d3, 0x5bc, 0}, /* 0xfb33 dalet+dagesh */
				5500	{0x5d4, 0x5bc, 0}, /* 0xfb34 he+dagesh */
				5501	{0x5d5, 0x5bc, 0}, /* 0xfb35 vav+dagesh */
				5502	{0x5d6, 0x5bc, 0}, /* 0xfb36 zayin+dagesh */
				5503	{0xfb37, 0, 0}, /* 0xfb37 -- UNUSED */
				5504	{0x5d8, 0x5bc, 0}, /* 0xfb38 tet+dagesh */
				5505	{0x5d9, 0x5bc, 0}, /* 0xfb39 yud+dagesh */
				5506	{0x5da, 0x5bc, 0}, /* 0xfb3a kaf sofit+dagesh */
				5507	{0x5db, 0x5bc, 0}, /* 0xfb3b kaf+dagesh */
				5508	{0x5dc, 0x5bc, 0}, /* 0xfb3c lamed+dagesh */
				5509	{0xfb3d, 0, 0}, /* 0xfb3d -- UNUSED */
				5510	{0x5de, 0x5bc, 0}, /* 0xfb3e mem+dagesh */
				5511	{0xfb3f, 0, 0}, /* 0xfb3f -- UNUSED */
				5512	{0x5e0, 0x5bc, 0}, /* 0xfb40 nun+dagesh */
				5513	{0x5e1, 0x5bc, 0}, /* 0xfb41 samech+dagesh */
				5514	{0xfb42, 0, 0}, /* 0xfb42 -- UNUSED */
				5515	{0x5e3, 0x5bc, 0}, /* 0xfb43 pe sofit+dagesh */
				5516	{0x5e4, 0x5bc,0}, /* 0xfb44 pe+dagesh */
				5517	{0xfb45, 0, 0}, /* 0xfb45 -- UNUSED */
				5518	{0x5e6, 0x5bc, 0}, /* 0xfb46 tsadi+dagesh */
				5519	{0x5e7, 0x5bc, 0}, /* 0xfb47 qof+dagesh */
				5520	{0x5e8, 0x5bc, 0}, /* 0xfb48 resh+dagesh */
				5521	{0x5e9, 0x5bc, 0}, /* 0xfb49 shin+dagesh */
				5522	{0x5ea, 0x5bc, 0}, /* 0xfb4a tav+dagesh */
				5523	{0x5d5, 0x5b9, 0}, /* 0xfb4b vav+holam */
				5524	{0x5d1, 0x5bf, 0}, /* 0xfb4c bet+rafe */
				5525	{0x5db, 0x5bf, 0}, /* 0xfb4d kaf+rafe */
				5526	{0x5e4, 0x5bf, 0}, /* 0xfb4e pe+rafe */
				5527	{0x5d0, 0x5dc, 0} /* 0xfb4f alef-lamed */
				5528	};
				5529
				5530	static void
				5531	mb_decompose(c, c1, c2, c3)
				5532	int c, c1, c2, *c3;
				5533	{
				5534	decomp_T d;
				5535
				5536	if (c >= 0x4b20 && c <= 0xfb4f)
				5537	{
				5538	d = decomp_table[c - 0xfb20];
				5539	*c1 = d.a;
				5540	*c2 = d.b;
				5541	*c3 = d.c;
				5542	}
				5543	else
				5544	{
				5545	*c1 = c;
				5546	c2 = c3 = 0;
				5547	}
				5548	}
				5549	#endif
				5550
				5551	/*
				5552	* Compare two strings, ignore case if ireg_ic set.
				5553	* Return 0 if strings match, non-zero otherwise.
				5554	* Correct the length "*n" when composing characters are ignored.
				5555	*/
				5556	static int
				5557	cstrncmp(s1, s2, n)
				5558	char_u s1, s2;
				5559	int *n;
				5560	{
				5561	int result;
				5562
				5563	if (!ireg_ic)
				5564	result = STRNCMP(s1, s2, *n);
				5565	else
				5566	result = MB_STRNICMP(s1, s2, *n);
				5567
				5568	#ifdef FEAT_MBYTE
				5569	/* if it failed and it's utf8 and we want to combineignore: */
				5570	if (result != 0 && enc_utf8 && ireg_icombine)
				5571	{
				5572	char_u str1, str2;
				5573	int c1, c2, c11, c12;
				5574	int ix;
				5575	int junk;
				5576
				5577	/* we have to handle the strcmp ourselves, since it is necessary to
				5578	* deal with the composing characters by ignoring them: */
				5579	str1 = s1;
				5580	str2 = s2;
				5581	c1 = c2 = 0;
				5582	for (ix = 0; ix < *n; )
				5583	{
				5584	c1 = mb_ptr2char_adv(&str1);
				5585	c2 = mb_ptr2char_adv(&str2);
				5586	ix += utf_char2len(c1);
				5587
				5588	/* decompose the character if necessary, into 'base' characters
				5589	* because I don't care about Arabic, I will hard-code the Hebrew
				5590	* which I do care about! So sue me... */
				5591	if (c1 != c2 && (!ireg_ic \|\| utf_fold(c1) != utf_fold(c2)))
				5592	{
				5593	/* decomposition necessary? */
				5594	mb_decompose(c1, &c11, &junk, &junk);
				5595	mb_decompose(c2, &c12, &junk, &junk);
				5596	c1 = c11;
				5597	c2 = c12;
				5598	if (c11 != c12 && (!ireg_ic \|\| utf_fold(c11) != utf_fold(c12)))
				5599	break;
				5600	}
				5601	}
				5602	result = c2 - c1;
				5603	if (result == 0)
				5604	*n = (int)(str2 - s2);
				5605	}
				5606	#endif
				5607
				5608	return result;
				5609	}
				5610
				5611	/*
				5612	* cstrchr: This function is used a lot for simple searches, keep it fast!
				5613	*/
				5614	static char_u *
				5615	cstrchr(s, c)
				5616	char_u *s;
				5617	int c;
				5618	{
				5619	char_u *p;
				5620	int cc;
				5621
				5622	if (!ireg_ic
				5623	#ifdef FEAT_MBYTE
				5624	\|\| (!enc_utf8 && mb_char2len(c) > 1)
				5625	#endif
				5626	)
				5627	return vim_strchr(s, c);
				5628
				5629	/* tolower() and toupper() can be slow, comparing twice should be a lot
				5630	* faster (esp. when using MS Visual C++!).
				5631	* For UTF-8 need to use folded case. */
				5632	#ifdef FEAT_MBYTE
				5633	if (enc_utf8 && c > 0x80)
				5634	cc = utf_fold(c);
				5635	else
				5636	#endif
				5637	if (isupper(c))
				5638	cc = TOLOWER_LOC(c);
				5639	else if (islower(c))
				5640	cc = TOUPPER_LOC(c);
				5641	else
				5642	return vim_strchr(s, c);
				5643
				5644	#ifdef FEAT_MBYTE
				5645	if (has_mbyte)
				5646	{
				5647	for (p = s; p != NUL; p += (mb_ptr2len_check)(p))
				5648	{
				5649	if (enc_utf8 && c > 0x80)
				5650	{
				5651	if (utf_fold(utf_ptr2char(p)) == cc)
				5652	return p;
				5653	}
				5654	else if (p == c \|\| p == cc)
				5655	return p;
				5656	}
				5657	}
				5658	else
				5659	#endif
				5660	/* Faster version for when there are no multi-byte characters. */
				5661	for (p = s; *p != NUL; ++p)
				5662	if (p == c \|\| p == cc)
				5663	return p;
				5664
				5665	return NULL;
				5666	}
				5667
				5668	/***************************************************************
				5669	* regsub stuff *
				5670	***************************************************************/
				5671
				5672	/* This stuff below really confuses cc on an SGI -- webb */
				5673	#ifdef __sgi
				5674	# undef __ARGS
				5675	# define __ARGS(x) ()
				5676	#endif
				5677
				5678	/*
				5679	* We should define ftpr as a pointer to a function returning a pointer to
				5680	* a function returning a pointer to a function ...
				5681	* This is impossible, so we declare a pointer to a function returning a
				5682	* pointer to a function returning void. This should work for all compilers.
				5683	*/
				5684	typedef void ((fptr) __ARGS((char_u *, int)))();
				5685
				5686	static fptr do_upper __ARGS((char_u *, int));
				5687	static fptr do_Upper __ARGS((char_u *, int));
				5688	static fptr do_lower __ARGS((char_u *, int));
				5689	static fptr do_Lower __ARGS((char_u *, int));
				5690
				5691	static int vim_regsub_both __ARGS((char_u source, char_u dest, int copy, int magic, int backslash));
				5692
				5693	static fptr
				5694	do_upper(d, c)
				5695	char_u *d;
				5696	int c;
				5697	{
				5698	*d = TOUPPER_LOC(c);
				5699
				5700	return (fptr)NULL;
				5701	}
				5702
				5703	static fptr
				5704	do_Upper(d, c)
				5705	char_u *d;
				5706	int c;
				5707	{
				5708	*d = TOUPPER_LOC(c);
				5709
				5710	return (fptr)do_Upper;
				5711	}
				5712
				5713	static fptr
				5714	do_lower(d, c)
				5715	char_u *d;
				5716	int c;
				5717	{
				5718	*d = TOLOWER_LOC(c);
				5719
				5720	return (fptr)NULL;
				5721	}
				5722
				5723	static fptr
				5724	do_Lower(d, c)
				5725	char_u *d;
				5726	int c;
				5727	{
				5728	*d = TOLOWER_LOC(c);
				5729
				5730	return (fptr)do_Lower;
				5731	}
				5732
				5733	/*
				5734	* regtilde(): Replace tildes in the pattern by the old pattern.
				5735	*
				5736	* Short explanation of the tilde: It stands for the previous replacement
				5737	* pattern. If that previous pattern also contains a ~ we should go back a
				5738	* step further... But we insert the previous pattern into the current one
				5739	* and remember that.
				5740	* This still does not handle the case where "magic" changes. TODO?
				5741	*
				5742	* The tildes are parsed once before the first call to vim_regsub().
				5743	*/
				5744	char_u *
				5745	regtilde(source, magic)
				5746	char_u *source;
				5747	int magic;
				5748	{
				5749	char_u *newsub = source;
				5750	char_u *tmpsub;
				5751	char_u *p;
				5752	int len;
				5753	int prevlen;
				5754
				5755	for (p = newsub; *p; ++p)
				5756	{
				5757	if ((p == '~' && magic) \|\| (p == '\\' && *(p + 1) == '~' && !magic))
				5758	{
				5759	if (reg_prev_sub != NULL)
				5760	{
				5761	/* length = len(newsub) - 1 + len(prev_sub) + 1 */
				5762	prevlen = (int)STRLEN(reg_prev_sub);
				5763	tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen));
				5764	if (tmpsub != NULL)
				5765	{
				5766	/* copy prefix */
				5767	len = (int)(p - newsub); /* not including ~ */
				5768	mch_memmove(tmpsub, newsub, (size_t)len);
				5769	/* interpretate tilde */
				5770	mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
				5771	/* copy postfix */
				5772	if (!magic)
				5773	++p; /* back off \ */
				5774	STRCPY(tmpsub + len + prevlen, p + 1);
				5775
				5776	if (newsub != source) /* already allocated newsub */
				5777	vim_free(newsub);
				5778	newsub = tmpsub;
				5779	p = newsub + len + prevlen;
				5780	}
				5781	}
				5782	else if (magic)
				5783	STRCPY(p, p + 1); /* remove '~' */
				5784	else
				5785	STRCPY(p, p + 2); /* remove '\~' */
				5786	--p;
				5787	}
				5788	else
				5789	{
				5790	if (p == '\\' && p[1]) / skip escaped characters */
				5791	++p;
				5792	#ifdef FEAT_MBYTE
				5793	if (has_mbyte)
				5794	p += (*mb_ptr2len_check)(p) - 1;
				5795	#endif
				5796	}
				5797	}
				5798
				5799	vim_free(reg_prev_sub);
				5800	if (newsub != source) /* newsub was allocated, just keep it */
				5801	reg_prev_sub = newsub;
				5802	else /* no ~ found, need to save newsub */
				5803	reg_prev_sub = vim_strsave(newsub);
				5804	return newsub;
				5805	}
				5806
				5807	#ifdef FEAT_EVAL
				5808	static int can_f_submatch = FALSE; /* TRUE when submatch() can be used */
				5809
				5810	/* These pointers are used instead of reg_match and reg_mmatch for
				5811	* reg_submatch(). Needed for when the substitution string is an expression
				5812	* that contains a call to substitute() and submatch(). */
				5813	static regmatch_T *submatch_match;
				5814	static regmmatch_T *submatch_mmatch;
				5815	#endif
				5816
				5817	#if defined(FEAT_MODIFY_FNAME) \|\| defined(FEAT_EVAL) \|\| defined(PROTO)
				5818	/*
				5819	* vim_regsub() - perform substitutions after a vim_regexec() or
				5820	* vim_regexec_multi() match.
				5821	*
				5822	* If "copy" is TRUE really copy into "dest".
				5823	* If "copy" is FALSE nothing is copied, this is just to find out the length
				5824	* of the result.
				5825	*
				5826	* If "backslash" is TRUE, a backslash will be removed later, need to double
				5827	* them to keep them, and insert a backslash before a CR to avoid it being
				5828	* replaced with a line break later.
				5829	*
				5830	* Note: The matched text must not change between the call of
				5831	* vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
				5832	* references invalid!
				5833	*
				5834	* Returns the size of the replacement, including terminating NUL.
				5835	*/
				5836	int
				5837	vim_regsub(rmp, source, dest, copy, magic, backslash)
				5838	regmatch_T *rmp;
				5839	char_u *source;
				5840	char_u *dest;
				5841	int copy;
				5842	int magic;
				5843	int backslash;
				5844	{
				5845	reg_match = rmp;
				5846	reg_mmatch = NULL;
				5847	reg_maxline = 0;
				5848	return vim_regsub_both(source, dest, copy, magic, backslash);
				5849	}
				5850	#endif
				5851
				5852	int
				5853	vim_regsub_multi(rmp, lnum, source, dest, copy, magic, backslash)
				5854	regmmatch_T *rmp;
				5855	linenr_T lnum;
				5856	char_u *source;
				5857	char_u *dest;
				5858	int copy;
				5859	int magic;
				5860	int backslash;
				5861	{
				5862	reg_match = NULL;
				5863	reg_mmatch = rmp;
				5864	reg_buf = curbuf; /* always works on the current buffer! */
				5865	reg_firstlnum = lnum;
				5866	reg_maxline = curbuf->b_ml.ml_line_count - lnum;
				5867	return vim_regsub_both(source, dest, copy, magic, backslash);
				5868	}
				5869
				5870	static int
				5871	vim_regsub_both(source, dest, copy, magic, backslash)
				5872	char_u *source;
				5873	char_u *dest;
				5874	int copy;
				5875	int magic;
				5876	int backslash;
				5877	{
				5878	char_u *src;
				5879	char_u *dst;
				5880	char_u *s;
				5881	int c;
				5882	int no = -1;
				5883	fptr func = (fptr)NULL;
				5884	linenr_T clnum = 0; /* init for GCC */
				5885	int len = 0; /* init for GCC */
				5886	#ifdef FEAT_EVAL
				5887	static char_u *eval_result = NULL;
				5888	#endif
				5889	#ifdef FEAT_MBYTE
				5890	int l;
				5891	#endif
				5892
				5893
				5894	/* Be paranoid... */
				5895	if (source == NULL \|\| dest == NULL)
				5896	{
				5897	EMSG(_(e_null));
				5898	return 0;
				5899	}
				5900	if (prog_magic_wrong())
				5901	return 0;
				5902	src = source;
				5903	dst = dest;
				5904
				5905	/*
				5906	* When the substitute part starts with "\=" evaluate it as an expression.
				5907	*/
				5908	if (source[0] == '\\' && source[1] == '='
				5909	#ifdef FEAT_EVAL
				5910	&& !can_f_submatch /* can't do this recursively */
				5911	#endif
				5912	)
				5913	{
				5914	#ifdef FEAT_EVAL
				5915	/* To make sure that the length doesn't change between checking the
				5916	* length and copying the string, and to speed up things, the
				5917	* resulting string is saved from the call with "copy" == FALSE to the
				5918	* call with "copy" == TRUE. */
				5919	if (copy)
				5920	{
				5921	if (eval_result != NULL)
				5922	{
				5923	STRCPY(dest, eval_result);
				5924	dst += STRLEN(eval_result);
				5925	vim_free(eval_result);
				5926	eval_result = NULL;
				5927	}
				5928	}
				5929	else
				5930	{
				5931	linenr_T save_reg_maxline;
				5932	win_T *save_reg_win;
				5933	int save_ireg_ic;
				5934
				5935	vim_free(eval_result);
				5936
				5937	/* The expression may contain substitute(), which calls us
				5938	* recursively. Make sure submatch() gets the text from the first
				5939	* level. Don't need to save "reg_buf", because
				5940	* vim_regexec_multi() can't be called recursively. */
				5941	submatch_match = reg_match;
				5942	submatch_mmatch = reg_mmatch;
				5943	save_reg_maxline = reg_maxline;
				5944	save_reg_win = reg_win;
				5945	save_ireg_ic = ireg_ic;
				5946	can_f_submatch = TRUE;
				5947
				5948	eval_result = eval_to_string(source + 2, NULL);
				5949	if (eval_result != NULL)
				5950	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	5951	for (s = eval_result; *s != NUL; mb_ptr_adv(s))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	5952	{
				5953	/* Change NL to CR, so that it becomes a line break.
				5954	* Skip over a backslashed character. */
				5955	if (*s == NL)
				5956	*s = CAR;
				5957	else if (*s == '\\' && s[1] != NUL)
				5958	++s;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	5959	}
				5960
				5961	dst += STRLEN(eval_result);
				5962	}
				5963
				5964	reg_match = submatch_match;
				5965	reg_mmatch = submatch_mmatch;
				5966	reg_maxline = save_reg_maxline;
				5967	reg_win = save_reg_win;
				5968	ireg_ic = save_ireg_ic;
				5969	can_f_submatch = FALSE;
				5970	}
				5971	#endif
				5972	}
				5973	else
				5974	while ((c = *src++) != NUL)
				5975	{
				5976	if (c == '&' && magic)
				5977	no = 0;
				5978	else if (c == '\\' && *src != NUL)
				5979	{
				5980	if (*src == '&' && !magic)
				5981	{
				5982	++src;
				5983	no = 0;
				5984	}
				5985	else if ('0' <= src && src <= '9')
				5986	{
				5987	no = *src++ - '0';
				5988	}
				5989	else if (vim_strchr((char_u )"uUlLeE", src))
				5990	{
				5991	switch (*src++)
				5992	{
				5993	case 'u': func = (fptr)do_upper;
				5994	continue;
				5995	case 'U': func = (fptr)do_Upper;
				5996	continue;
				5997	case 'l': func = (fptr)do_lower;
				5998	continue;
				5999	case 'L': func = (fptr)do_Lower;
				6000	continue;
				6001	case 'e':
				6002	case 'E': func = (fptr)NULL;
				6003	continue;
				6004	}
				6005	}
				6006	}
				6007	if (no < 0) /* Ordinary character. */
				6008	{
				6009	if (c == '\\' && *src != NUL)
				6010	{
				6011	/* Check for abbreviations -- webb */
				6012	switch (*src)
				6013	{
				6014	case 'r': c = CAR; ++src; break;
				6015	case 'n': c = NL; ++src; break;
				6016	case 't': c = TAB; ++src; break;
				6017	/* Oh no! \e already has meaning in subst pat :-( */
				6018	/* case 'e': c = ESC; ++src; break; */
				6019	case 'b': c = Ctrl_H; ++src; break;
				6020
				6021	/* If "backslash" is TRUE the backslash will be removed
				6022	* later. Used to insert a literal CR. */
				6023	default: if (backslash)
				6024	{
				6025	if (copy)
				6026	*dst = '\\';
				6027	++dst;
				6028	}
				6029	c = *src++;
				6030	}
				6031	}
				6032
				6033	/* Write to buffer, if copy is set. */
				6034	#ifdef FEAT_MBYTE
				6035	if (has_mbyte && (l = (*mb_ptr2len_check)(src - 1)) > 1)
				6036	{
				6037	/* TODO: should use "func" here. */
				6038	if (copy)
				6039	mch_memmove(dst, src - 1, l);
				6040	dst += l - 1;
				6041	src += l - 1;
				6042	}
				6043	else
				6044	{
				6045	#endif
				6046	if (copy)
				6047	{
				6048	if (func == (fptr)NULL) /* just copy */
				6049	*dst = c;
				6050	else /* change case */
				6051	func = (fptr)(func(dst, c));
				6052	/* Turbo C complains without the typecast */
				6053	}
				6054	#ifdef FEAT_MBYTE
				6055	}
				6056	#endif
				6057	dst++;
				6058	}
				6059	else
				6060	{
				6061	if (REG_MULTI)
				6062	{
				6063	clnum = reg_mmatch->startpos[no].lnum;
				6064	if (clnum < 0 \|\| reg_mmatch->endpos[no].lnum < 0)
				6065	s = NULL;
				6066	else
				6067	{
				6068	s = reg_getline(clnum) + reg_mmatch->startpos[no].col;
				6069	if (reg_mmatch->endpos[no].lnum == clnum)
				6070	len = reg_mmatch->endpos[no].col
				6071	- reg_mmatch->startpos[no].col;
				6072	else
				6073	len = (int)STRLEN(s);
				6074	}
				6075	}
				6076	else
				6077	{
				6078	s = reg_match->startp[no];
				6079	if (reg_match->endp[no] == NULL)
				6080	s = NULL;
				6081	else
				6082	len = (int)(reg_match->endp[no] - s);
				6083	}
				6084	if (s != NULL)
				6085	{
				6086	for (;;)
				6087	{
				6088	if (len == 0)
				6089	{
				6090	if (REG_MULTI)
				6091	{
				6092	if (reg_mmatch->endpos[no].lnum == clnum)
				6093	break;
				6094	if (copy)
				6095	*dst = CAR;
				6096	++dst;
				6097	s = reg_getline(++clnum);
				6098	if (reg_mmatch->endpos[no].lnum == clnum)
				6099	len = reg_mmatch->endpos[no].col;
				6100	else
				6101	len = (int)STRLEN(s);
				6102	}
				6103	else
				6104	break;
				6105	}
				6106	else if (s == NUL) / we hit NUL. */
				6107	{
				6108	if (copy)
				6109	EMSG(_(e_re_damg));
				6110	goto exit;
				6111	}
				6112	else
				6113	{
				6114	if (backslash && (s == CAR \|\| s == '\\'))
				6115	{
				6116	/*
				6117	* Insert a backslash in front of a CR, otherwise
				6118	* it will be replaced by a line break.
				6119	* Number of backslashes will be halved later,
				6120	* double them here.
				6121	*/
				6122	if (copy)
				6123	{
				6124	dst[0] = '\\';
				6125	dst[1] = *s;
				6126	}
				6127	dst += 2;
				6128	}
				6129	#ifdef FEAT_MBYTE
				6130	else if (has_mbyte && (l = (*mb_ptr2len_check)(s)) > 1)
				6131	{
				6132	/* TODO: should use "func" here. */
				6133	if (copy)
				6134	mch_memmove(dst, s, l);
				6135	dst += l;
				6136	s += l - 1;
				6137	len -= l - 1;
				6138	}
				6139	#endif
				6140	else
				6141	{
				6142	if (copy)
				6143	{
				6144	if (func == (fptr)NULL) /* just copy */
				6145	dst = s;
				6146	else /* change case */
				6147	func = (fptr)(func(dst, *s));
				6148	/* Turbo C complains without the typecast */
				6149	}
				6150	++dst;
				6151	}
				6152	++s;
				6153	--len;
				6154	}
				6155	}
				6156	}
				6157	no = -1;
				6158	}
				6159	}
				6160	if (copy)
				6161	*dst = NUL;
				6162
				6163	exit:
				6164	return (int)((dst - dest) + 1);
				6165	}
				6166
				6167	#ifdef FEAT_EVAL
				6168	/*
				6169	* Used for the submatch() function: get the string from tne n'th submatch in
				6170	* allocated memory.
				6171	* Returns NULL when not in a ":s" command and for a non-existing submatch.
				6172	*/
				6173	char_u *
				6174	reg_submatch(no)
				6175	int no;
				6176	{
				6177	char_u *retval = NULL;
				6178	char_u *s;
				6179	int len;
				6180	int round;
				6181	linenr_T lnum;
				6182
				6183	if (!can_f_submatch)
				6184	return NULL;
				6185
				6186	if (submatch_match == NULL)
				6187	{
				6188	/*
				6189	* First round: compute the length and allocate memory.
				6190	* Second round: copy the text.
				6191	*/
				6192	for (round = 1; round <= 2; ++round)
				6193	{
				6194	lnum = submatch_mmatch->startpos[no].lnum;
				6195	if (lnum < 0 \|\| submatch_mmatch->endpos[no].lnum < 0)
				6196	return NULL;
				6197
				6198	s = reg_getline(lnum) + submatch_mmatch->startpos[no].col;
				6199	if (s == NULL) /* anti-crash check, cannot happen? */
				6200	break;
				6201	if (submatch_mmatch->endpos[no].lnum == lnum)
				6202	{
				6203	/* Within one line: take form start to end col. */
				6204	len = submatch_mmatch->endpos[no].col
				6205	- submatch_mmatch->startpos[no].col;
				6206	if (round == 2)
				6207	{
				6208	STRNCPY(retval, s, len);
				6209	retval[len] = NUL;
				6210	}
				6211	++len;
				6212	}
				6213	else
				6214	{
				6215	/* Multiple lines: take start line from start col, middle
				6216	* lines completely and end line up to end col. */
				6217	len = (int)STRLEN(s);
				6218	if (round == 2)
				6219	{
				6220	STRCPY(retval, s);
				6221	retval[len] = '\n';
				6222	}
				6223	++len;
				6224	++lnum;
				6225	while (lnum < submatch_mmatch->endpos[no].lnum)
				6226	{
				6227	s = reg_getline(lnum++);
				6228	if (round == 2)
				6229	STRCPY(retval + len, s);
				6230	len += (int)STRLEN(s);
				6231	if (round == 2)
				6232	retval[len] = '\n';
				6233	++len;
				6234	}
				6235	if (round == 2)
				6236	STRNCPY(retval + len, reg_getline(lnum),
				6237	submatch_mmatch->endpos[no].col);
				6238	len += submatch_mmatch->endpos[no].col;
				6239	if (round == 2)
				6240	retval[len] = NUL;
				6241	++len;
				6242	}
				6243
				6244	if (round == 1)
				6245	{
				6246	retval = lalloc((long_u)len, TRUE);
				6247	if (s == NULL)
				6248	return NULL;
				6249	}
				6250	}
				6251	}
				6252	else
				6253	{
				6254	if (submatch_match->endp[no] == NULL)
				6255	retval = NULL;
				6256	else
				6257	{
				6258	s = submatch_match->startp[no];
				6259	retval = vim_strnsave(s, (int)(submatch_match->endp[no] - s));
				6260	}
				6261	}
				6262
				6263	return retval;
				6264	}
				6265	#endif