Blame - src/regexp.c - android_external_vim

blob: c4f8920789ac17e1e6e16b37870cc68bcb0a117b [file] [log] [blame]

Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1	/* vi:set ts=8 sts=4 sw=4:
				2	*
				3	* Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
				4	*
				5	* NOTICE:
				6	*
				7	* This is NOT the original regular expression code as written by Henry
				8	* Spencer. This code has been modified specifically for use with the VIM
				9	* editor, and should not be used separately from Vim. If you want a good
				10	* regular expression library, get the original code. The copyright notice
				11	* that follows is from the original.
				12	*
				13	* END NOTICE
				14	*
				15	* Copyright (c) 1986 by University of Toronto.
				16	* Written by Henry Spencer. Not derived from licensed software.
				17	*
				18	* Permission is granted to anyone to use this software for any
				19	* purpose on any computer system, and to redistribute it freely,
				20	* subject to the following restrictions:
				21	*
				22	* 1. The author is not responsible for the consequences of use of
				23	* this software, no matter how awful, even if they arise
				24	* from defects in it.
				25	*
				26	* 2. The origin of this software must not be misrepresented, either
				27	* by explicit claim or by omission.
				28	*
				29	* 3. Altered versions must be plainly marked as such, and must not
				30	* be misrepresented as being the original software.
				31	*
				32	* Beware that some of this code is subtly aware of the way operator
				33	* precedence is structured in regular expressions. Serious changes in
				34	* regular-expression syntax might require a total rethink.
				35	*
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	36	* Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
				37	* Webb, Ciaran McCreesh and Bram Moolenaar.
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	38	* Named character class support added by Walter Briscoe (1998 Jul 01)
				39	*/
				40
				41	#include "vim.h"
				42
				43	#undef DEBUG
				44
				45	/*
				46	* The "internal use only" fields in regexp.h are present to pass info from
				47	* compile to execute that permits the execute phase to run lots faster on
				48	* simple cases. They are:
				49	*
				50	* regstart char that must begin a match; NUL if none obvious; Can be a
				51	* multi-byte character.
				52	* reganch is the match anchored (at beginning-of-line only)?
				53	* regmust string (pointer into program) that match must include, or NULL
				54	* regmlen length of regmust string
				55	* regflags RF_ values or'ed together
				56	*
				57	* Regstart and reganch permit very fast decisions on suitable starting points
				58	* for a match, cutting down the work a lot. Regmust permits fast rejection
				59	* of lines that cannot possibly match. The regmust tests are costly enough
				60	* that vim_regcomp() supplies a regmust only if the r.e. contains something
				61	* potentially expensive (at present, the only such thing detected is * or +
				62	* at the start of the r.e., which can involve a lot of backup). Regmlen is
				63	* supplied because the test in vim_regexec() needs it and vim_regcomp() is
				64	* computing it anyway.
				65	*/
				66
				67	/*
				68	* Structure for regexp "program". This is essentially a linear encoding
				69	* of a nondeterministic finite-state machine (aka syntax charts or
				70	* "railroad normal form" in parsing technology). Each node is an opcode
				71	* plus a "next" pointer, possibly plus an operand. "Next" pointers of
				72	* all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
				73	* pointer with a BRANCH on both ends of it is connecting two alternatives.
				74	* (Here we have one of the subtle syntax dependencies: an individual BRANCH
				75	* (as opposed to a collection of them) is never concatenated with anything
				76	* because of operator precedence). The "next" pointer of a BRACES_COMPLEX
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	77	* node points to the node after the stuff to be repeated.
				78	* The operand of some types of node is a literal string; for others, it is a
				79	* node leading into a sub-FSM. In particular, the operand of a BRANCH node
				80	* is the first node of the branch.
				81	* (NB this is not a tree structure: the tail of the branch connects to the
				82	* thing following the set of BRANCHes.)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	83	*
				84	* pattern is coded like:
				85	*
				86	* +-----------------+
				87	* \| V
				88	* <aa>\\|<bb> BRANCH <aa> BRANCH <bb> --> END
				89	* \| ^ \| ^
				90	* +------+ +----------+
				91	*
				92	*
				93	* +------------------+
				94	* V \|
				95	* <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
				96	* \| \| ^ ^
				97	* \| +---------------+ \|
				98	* +---------------------------------------------+
				99	*
				100	*
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	101	* +----------------------+
				102	* V \|
				103	* <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END
				104	* \| \| ^ ^
				105	* \| +----------+ \|
				106	* +-------------------------------------------------+
				107	*
				108	*
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	109	* +-------------------------+
				110	* V \|
				111	* <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
				112	* \| \| ^
				113	* \| +----------------+
				114	* +-----------------------------------------------+
				115	*
				116	*
				117	* <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END
				118	* \| \| ^ ^
				119	* \| +----------------+ \|
				120	* +--------------------------------+
				121	*
				122	* +---------+
				123	* \| V
				124	* \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END
				125	* \| \| \| \| ^ ^
				126	* \| \| \| +-----+ \|
				127	* \| \| +----------------+ \|
				128	* \| +---------------------------+ \|
				129	* +------------------------------------------------------+
				130	*
				131	* They all start with a BRANCH for "\\|" alternaties, even when there is only
				132	* one alternative.
				133	*/
				134
				135	/*
				136	* The opcodes are:
				137	*/
				138
				139	/* definition number opnd? meaning */
				140	#define END 0 /* End of program or NOMATCH operand. */
				141	#define BOL 1 /* Match "" at beginning of line. */
				142	#define EOL 2 /* Match "" at end of line. */
				143	#define BRANCH 3 /* node Match this alternative, or the
				144	* next... */
				145	#define BACK 4 /* Match "", "next" ptr points backward. */
				146	#define EXACTLY 5 /* str Match this string. */
				147	#define NOTHING 6 /* Match empty string. */
				148	#define STAR 7 /* node Match this (simple) thing 0 or more
				149	* times. */
				150	#define PLUS 8 /* node Match this (simple) thing 1 or more
				151	* times. */
				152	#define MATCH 9 /* node match the operand zero-width */
				153	#define NOMATCH 10 /* node check for no match with operand */
				154	#define BEHIND 11 /* node look behind for a match with operand */
				155	#define NOBEHIND 12 /* node look behind for no match with operand */
				156	#define SUBPAT 13 /* node match the operand here */
				157	#define BRACE_SIMPLE 14 /* node Match this (simple) thing between m and
				158	* n times (\{m,n\}). */
				159	#define BOW 15 /* Match "" after [^a-zA-Z0-9_] */
				160	#define EOW 16 /* Match "" at [^a-zA-Z0-9_] */
				161	#define BRACE_LIMITS 17 /* nr nr define the min & max for BRACE_SIMPLE
				162	* and BRACE_COMPLEX. */
				163	#define NEWL 18 /* Match line-break */
				164	#define BHPOS 19 /* End position for BEHIND or NOBEHIND */
				165
				166
				167	/* character classes: 20-48 normal, 50-78 include a line-break */
				168	#define ADD_NL 30
				169	#define FIRST_NL ANY + ADD_NL
				170	#define ANY 20 /* Match any one character. */
				171	#define ANYOF 21 /* str Match any character in this string. */
				172	#define ANYBUT 22 /* str Match any character not in this
				173	* string. */
				174	#define IDENT 23 /* Match identifier char */
				175	#define SIDENT 24 /* Match identifier char but no digit */
				176	#define KWORD 25 /* Match keyword char */
				177	#define SKWORD 26 /* Match word char but no digit */
				178	#define FNAME 27 /* Match file name char */
				179	#define SFNAME 28 /* Match file name char but no digit */
				180	#define PRINT 29 /* Match printable char */
				181	#define SPRINT 30 /* Match printable char but no digit */
				182	#define WHITE 31 /* Match whitespace char */
				183	#define NWHITE 32 /* Match non-whitespace char */
				184	#define DIGIT 33 /* Match digit char */
				185	#define NDIGIT 34 /* Match non-digit char */
				186	#define HEX 35 /* Match hex char */
				187	#define NHEX 36 /* Match non-hex char */
				188	#define OCTAL 37 /* Match octal char */
				189	#define NOCTAL 38 /* Match non-octal char */
				190	#define WORD 39 /* Match word char */
				191	#define NWORD 40 /* Match non-word char */
				192	#define HEAD 41 /* Match head char */
				193	#define NHEAD 42 /* Match non-head char */
				194	#define ALPHA 43 /* Match alpha char */
				195	#define NALPHA 44 /* Match non-alpha char */
				196	#define LOWER 45 /* Match lowercase char */
				197	#define NLOWER 46 /* Match non-lowercase char */
				198	#define UPPER 47 /* Match uppercase char */
				199	#define NUPPER 48 /* Match non-uppercase char */
				200	#define LAST_NL NUPPER + ADD_NL
				201	#define WITH_NL(op) ((op) >= FIRST_NL && (op) <= LAST_NL)
				202
				203	#define MOPEN 80 /* -89 Mark this point in input as start of
				204	* \( subexpr. MOPEN + 0 marks start of
				205	* match. */
				206	#define MCLOSE 90 /* -99 Analogous to MOPEN. MCLOSE + 0 marks
				207	* end of match. */
				208	#define BACKREF 100 /* -109 node Match same string again \1-\9 */
				209
				210	#ifdef FEAT_SYN_HL
				211	# define ZOPEN 110 /* -119 Mark this point in input as start of
				212	* \z( subexpr. */
				213	# define ZCLOSE 120 /* -129 Analogous to ZOPEN. */
				214	# define ZREF 130 /* -139 node Match external submatch \z1-\z9 */
				215	#endif
				216
				217	#define BRACE_COMPLEX 140 /* -149 node Match nodes between m & n times */
				218
				219	#define NOPEN 150 /* Mark this point in input as start of
				220	\%( subexpr. */
				221	#define NCLOSE 151 /* Analogous to NOPEN. */
				222
				223	#define MULTIBYTECODE 200 /* mbc Match one multi-byte character */
				224	#define RE_BOF 201 /* Match "" at beginning of file. */
				225	#define RE_EOF 202 /* Match "" at end of file. */
				226	#define CURSOR 203 /* Match location of cursor. */
				227
				228	#define RE_LNUM 204 /* nr cmp Match line number */
				229	#define RE_COL 205 /* nr cmp Match column number */
				230	#define RE_VCOL 206 /* nr cmp Match virtual column number */
				231
				232	/*
				233	* Magic characters have a special meaning, they don't match literally.
				234	* Magic characters are negative. This separates them from literal characters
				235	* (possibly multi-byte). Only ASCII characters can be Magic.
				236	*/
				237	#define Magic(x) ((int)(x) - 256)
				238	#define un_Magic(x) ((x) + 256)
				239	#define is_Magic(x) ((x) < 0)
				240
				241	static int no_Magic __ARGS((int x));
				242	static int toggle_Magic __ARGS((int x));
				243
				244	static int
				245	no_Magic(x)
				246	int x;
				247	{
				248	if (is_Magic(x))
				249	return un_Magic(x);
				250	return x;
				251	}
				252
				253	static int
				254	toggle_Magic(x)
				255	int x;
				256	{
				257	if (is_Magic(x))
				258	return un_Magic(x);
				259	return Magic(x);
				260	}
				261
				262	/*
				263	* The first byte of the regexp internal "program" is actually this magic
				264	* number; the start node begins in the second byte. It's used to catch the
				265	* most severe mutilation of the program by the caller.
				266	*/
				267
				268	#define REGMAGIC 0234
				269
				270	/*
				271	* Opcode notes:
				272	*
				273	* BRANCH The set of branches constituting a single choice are hooked
				274	* together with their "next" pointers, since precedence prevents
				275	* anything being concatenated to any individual branch. The
				276	* "next" pointer of the last BRANCH in a choice points to the
				277	* thing following the whole choice. This is also where the
				278	* final "next" pointer of each individual branch points; each
				279	* branch starts with the operand node of a BRANCH node.
				280	*
				281	* BACK Normal "next" pointers all implicitly point forward; BACK
				282	* exists to make loop structures possible.
				283	*
				284	* STAR,PLUS '=', and complex '*' and '+', are implemented as circular
				285	* BRANCH structures using BACK. Simple cases (one character
				286	* per match) are implemented with STAR and PLUS for speed
				287	* and to minimize recursive plunges.
				288	*
				289	* BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
				290	* node, and defines the min and max limits to be used for that
				291	* node.
				292	*
				293	* MOPEN,MCLOSE ...are numbered at compile time.
				294	* ZOPEN,ZCLOSE ...ditto
				295	*/
				296
				297	/*
				298	* A node is one char of opcode followed by two chars of "next" pointer.
				299	* "Next" pointers are stored as two 8-bit bytes, high order first. The
				300	* value is a positive offset from the opcode of the node containing it.
				301	* An operand, if any, simply follows the node. (Note that much of the
				302	* code generation knows about this implicit relationship.)
				303	*
				304	* Using two bytes for the "next" pointer is vast overkill for most things,
				305	* but allows patterns to get big without disasters.
				306	*/
				307	#define OP(p) ((int)*(p))
				308	#define NEXT(p) (((((p) + 1) & 0377) << 8) + (((p) + 2) & 0377))
				309	#define OPERAND(p) ((p) + 3)
				310	/* Obtain an operand that was stored as four bytes, MSB first. */
				311	#define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
				312	+ ((long)(p)[5] << 8) + (long)(p)[6])
				313	/* Obtain a second operand stored as four bytes. */
				314	#define OPERAND_MAX(p) OPERAND_MIN((p) + 4)
				315	/* Obtain a second single-byte operand stored after a four bytes operand. */
				316	#define OPERAND_CMP(p) (p)[7]
				317
				318	/*
				319	* Utility definitions.
				320	*/
				321	#define UCHARAT(p) ((int)(char_u )(p))
				322
				323	/* Used for an error (down from) vim_regcomp(): give the error message, set
				324	* rc_did_emsg and return NULL */
				325	#define EMSG_RET_NULL(m) { EMSG(m); rc_did_emsg = TRUE; return NULL; }
				326	#define EMSG_M_RET_NULL(m, c) { EMSG2(m, c ? "" : "\\"); rc_did_emsg = TRUE; return NULL; }
				327	#define EMSG_RET_FAIL(m) { EMSG(m); rc_did_emsg = TRUE; return FAIL; }
				328	#define EMSG_ONE_RET_NULL EMSG_M_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
				329
				330	#define MAX_LIMIT (32767L << 16L)
				331
				332	static int re_multi_type __ARGS((int));
				333	static int cstrncmp __ARGS((char_u s1, char_u s2, int *n));
				334	static char_u cstrchr __ARGS((char_u , int));
				335
				336	#ifdef DEBUG
				337	static void regdump __ARGS((char_u , regprog_T ));
				338	static char_u regprop __ARGS((char_u ));
				339	#endif
				340
				341	#define NOT_MULTI 0
				342	#define MULTI_ONE 1
				343	#define MULTI_MULT 2
				344	/*
				345	* Return NOT_MULTI if c is not a "multi" operator.
				346	* Return MULTI_ONE if c is a single "multi" operator.
				347	* Return MULTI_MULT if c is a multi "multi" operator.
				348	*/
				349	static int
				350	re_multi_type(c)
				351	int c;
				352	{
				353	if (c == Magic('@') \|\| c == Magic('=') \|\| c == Magic('?'))
				354	return MULTI_ONE;
				355	if (c == Magic('*') \|\| c == Magic('+') \|\| c == Magic('{'))
				356	return MULTI_MULT;
				357	return NOT_MULTI;
				358	}
				359
				360	/*
				361	* Flags to be passed up and down.
				362	*/
				363	#define HASWIDTH 0x1 /* Known never to match null string. */
				364	#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */
				365	#define SPSTART 0x4 /* Starts with * or +. */
				366	#define HASNL 0x8 /* Contains some \n. */
				367	#define HASLOOKBH 0x10 /* Contains "\@<=" or "\@<!". */
				368	#define WORST 0 /* Worst case. */
				369
				370	/*
				371	* When regcode is set to this value, code is not emitted and size is computed
				372	* instead.
				373	*/
				374	#define JUST_CALC_SIZE ((char_u *) -1)
				375
				376	static char_u *reg_prev_sub;
				377
				378	/*
				379	* REGEXP_INRANGE contains all characters which are always special in a []
				380	* range after '\'.
				381	* REGEXP_ABBR contains all characters which act as abbreviations after '\'.
				382	* These are:
				383	* \n - New line (NL).
				384	* \r - Carriage Return (CR).
				385	* \t - Tab (TAB).
				386	* \e - Escape (ESC).
				387	* \b - Backspace (Ctrl_H).
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	388	* \d - Character code in decimal, eg \d123
				389	* \o - Character code in octal, eg \o80
				390	* \x - Character code in hex, eg \x4a
				391	* \u - Multibyte character code, eg \u20ac
				392	* \U - Long multibyte character code, eg \U12345678
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	393	*/
				394	static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	395	static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	396
				397	static int backslash_trans __ARGS((int c));
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	398	static int get_char_class __ARGS((char_u **pp));
				399	static int get_equi_class __ARGS((char_u **pp));
				400	static void reg_equi_class __ARGS((int c));
				401	static int get_coll_element __ARGS((char_u **pp));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	402	static char_u skip_anyof __ARGS((char_u p));
				403	static void init_class_tab __ARGS((void));
				404
				405	/*
				406	* Translate '\x' to its control character, except "\n", which is Magic.
				407	*/
				408	static int
				409	backslash_trans(c)
				410	int c;
				411	{
				412	switch (c)
				413	{
				414	case 'r': return CAR;
				415	case 't': return TAB;
				416	case 'e': return ESC;
				417	case 'b': return BS;
				418	}
				419	return c;
				420	}
				421
				422	/*
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	423	* Check for a character class name "[:name:]". "pp" points to the '['.
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	424	* Returns one of the CLASS_ items. CLASS_NONE means that no item was
				425	* recognized. Otherwise "pp" is advanced to after the item.
				426	*/
				427	static int
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	428	get_char_class(pp)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	429	char_u **pp;
				430	{
				431	static const char *(class_names[]) =
				432	{
				433	"alnum:]",
				434	#define CLASS_ALNUM 0
				435	"alpha:]",
				436	#define CLASS_ALPHA 1
				437	"blank:]",
				438	#define CLASS_BLANK 2
				439	"cntrl:]",
				440	#define CLASS_CNTRL 3
				441	"digit:]",
				442	#define CLASS_DIGIT 4
				443	"graph:]",
				444	#define CLASS_GRAPH 5
				445	"lower:]",
				446	#define CLASS_LOWER 6
				447	"print:]",
				448	#define CLASS_PRINT 7
				449	"punct:]",
				450	#define CLASS_PUNCT 8
				451	"space:]",
				452	#define CLASS_SPACE 9
				453	"upper:]",
				454	#define CLASS_UPPER 10
				455	"xdigit:]",
				456	#define CLASS_XDIGIT 11
				457	"tab:]",
				458	#define CLASS_TAB 12
				459	"return:]",
				460	#define CLASS_RETURN 13
				461	"backspace:]",
				462	#define CLASS_BACKSPACE 14
				463	"escape:]",
				464	#define CLASS_ESCAPE 15
				465	};
				466	#define CLASS_NONE 99
				467	int i;
				468
				469	if ((*pp)[1] == ':')
				470	{
				471	for (i = 0; i < sizeof(class_names) / sizeof(*class_names); ++i)
				472	if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
				473	{
				474	*pp += STRLEN(class_names[i]) + 2;
				475	return i;
				476	}
				477	}
				478	return CLASS_NONE;
				479	}
				480
				481	/*
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	482	* Specific version of character class functions.
				483	* Using a table to keep this fast.
				484	*/
				485	static short class_tab[256];
				486
				487	#define RI_DIGIT 0x01
				488	#define RI_HEX 0x02
				489	#define RI_OCTAL 0x04
				490	#define RI_WORD 0x08
				491	#define RI_HEAD 0x10
				492	#define RI_ALPHA 0x20
				493	#define RI_LOWER 0x40
				494	#define RI_UPPER 0x80
				495	#define RI_WHITE 0x100
				496
				497	static void
				498	init_class_tab()
				499	{
				500	int i;
				501	static int done = FALSE;
				502
				503	if (done)
				504	return;
				505
				506	for (i = 0; i < 256; ++i)
				507	{
				508	if (i >= '0' && i <= '7')
				509	class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
				510	else if (i >= '8' && i <= '9')
				511	class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
				512	else if (i >= 'a' && i <= 'f')
				513	class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
				514	#ifdef EBCDIC
				515	else if ((i >= 'g' && i <= 'i') \|\| (i >= 'j' && i <= 'r')
				516	\|\| (i >= 's' && i <= 'z'))
				517	#else
				518	else if (i >= 'g' && i <= 'z')
				519	#endif
				520	class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
				521	else if (i >= 'A' && i <= 'F')
				522	class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
				523	#ifdef EBCDIC
				524	else if ((i >= 'G' && i <= 'I') \|\| ( i >= 'J' && i <= 'R')
				525	\|\| (i >= 'S' && i <= 'Z'))
				526	#else
				527	else if (i >= 'G' && i <= 'Z')
				528	#endif
				529	class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
				530	else if (i == '_')
				531	class_tab[i] = RI_WORD + RI_HEAD;
				532	else
				533	class_tab[i] = 0;
				534	}
				535	class_tab[' '] \|= RI_WHITE;
				536	class_tab['\t'] \|= RI_WHITE;
				537	done = TRUE;
				538	}
				539
				540	#ifdef FEAT_MBYTE
				541	# define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT))
				542	# define ri_hex(c) (c < 0x100 && (class_tab[c] & RI_HEX))
				543	# define ri_octal(c) (c < 0x100 && (class_tab[c] & RI_OCTAL))
				544	# define ri_word(c) (c < 0x100 && (class_tab[c] & RI_WORD))
				545	# define ri_head(c) (c < 0x100 && (class_tab[c] & RI_HEAD))
				546	# define ri_alpha(c) (c < 0x100 && (class_tab[c] & RI_ALPHA))
				547	# define ri_lower(c) (c < 0x100 && (class_tab[c] & RI_LOWER))
				548	# define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER))
				549	# define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE))
				550	#else
				551	# define ri_digit(c) (class_tab[c] & RI_DIGIT)
				552	# define ri_hex(c) (class_tab[c] & RI_HEX)
				553	# define ri_octal(c) (class_tab[c] & RI_OCTAL)
				554	# define ri_word(c) (class_tab[c] & RI_WORD)
				555	# define ri_head(c) (class_tab[c] & RI_HEAD)
				556	# define ri_alpha(c) (class_tab[c] & RI_ALPHA)
				557	# define ri_lower(c) (class_tab[c] & RI_LOWER)
				558	# define ri_upper(c) (class_tab[c] & RI_UPPER)
				559	# define ri_white(c) (class_tab[c] & RI_WHITE)
				560	#endif
				561
				562	/* flags for regflags */
				563	#define RF_ICASE 1 /* ignore case */
				564	#define RF_NOICASE 2 /* don't ignore case */
				565	#define RF_HASNL 4 /* can match a NL */
				566	#define RF_ICOMBINE 8 /* ignore combining characters */
				567	#define RF_LOOKBH 16 /* uses "\@<=" or "\@<!" */
				568
				569	/*
				570	* Global work variables for vim_regcomp().
				571	*/
				572
				573	static char_u regparse; / Input-scan pointer. */
				574	static int prevchr_len; /* byte length of previous char */
				575	static int num_complex_braces; /* Complex \{...} count */
				576	static int regnpar; /* () count. */
				577	#ifdef FEAT_SYN_HL
				578	static int regnzpar; /* \z() count. */
				579	static int re_has_z; /* \z item detected */
				580	#endif
				581	static char_u regcode; / Code-emit pointer, or JUST_CALC_SIZE */
				582	static long regsize; /* Code size. */
				583	static char_u had_endbrace[NSUBEXP]; /* flags, TRUE if end of () found */
				584	static unsigned regflags; /* RF_ flags for prog */
				585	static long brace_min[10]; /* Minimums for complex brace repeats */
				586	static long brace_max[10]; /* Maximums for complex brace repeats */
				587	static int brace_count[10]; /* Current counts for complex brace repeats */
				588	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				589	static int had_eol; /* TRUE when EOL found by vim_regcomp() */
				590	#endif
				591	static int one_exactly = FALSE; /* only do one char for EXACTLY */
				592
				593	static int reg_magic; /* magicness of the pattern: */
				594	#define MAGIC_NONE 1 /* "\V" very unmagic */
				595	#define MAGIC_OFF 2 /* "\M" or 'magic' off */
				596	#define MAGIC_ON 3 /* "\m" or 'magic' */
				597	#define MAGIC_ALL 4 /* "\v" very magic */
				598
				599	static int reg_string; /* matching with a string instead of a buffer
				600	line */
				601
				602	/*
				603	* META contains all characters that may be magic, except '^' and '$'.
				604	*/
				605
				606	#ifdef EBCDIC
				607	static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{\|~";
				608	#else
				609	/* META[] is used often enough to justify turning it into a table. */
				610	static char_u META_flags[] = {
				611	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				612	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				613	/* % & ( ) * + . */
				614	0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
				615	/* 1 2 3 4 5 6 7 8 9 < = > ? */
				616	0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
				617	/* @ A C D F H I K L M O */
				618	1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
				619	/* P S U V W X Z [ _ */
				620	1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
				621	/* a c d f h i k l m n o */
				622	0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
				623	/* p s u v w x z { \| ~ */
				624	1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
				625	};
				626	#endif
				627
				628	static int curchr;
				629
				630	/* arguments for reg() */
				631	#define REG_NOPAREN 0 /* toplevel reg() */
				632	#define REG_PAREN 1 /* */
				633	#define REG_ZPAREN 2 /* \z(\) */
				634	#define REG_NPAREN 3 /* \%(\) */
				635
				636	/*
				637	* Forward declarations for vim_regcomp()'s friends.
				638	*/
				639	static void initchr __ARGS((char_u *));
				640	static int getchr __ARGS((void));
				641	static void skipchr_keepstart __ARGS((void));
				642	static int peekchr __ARGS((void));
				643	static void skipchr __ARGS((void));
				644	static void ungetchr __ARGS((void));
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	645	static int gethexchrs __ARGS((int maxinputlen));
				646	static int getoctchrs __ARGS((void));
				647	static int getdecchrs __ARGS((void));
				648	static int coll_get_char __ARGS((void));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	649	static void regcomp_start __ARGS((char_u *expr, int flags));
				650	static char_u reg __ARGS((int, int ));
				651	static char_u regbranch __ARGS((int flagp));
				652	static char_u regconcat __ARGS((int flagp));
				653	static char_u regpiece __ARGS((int ));
				654	static char_u regatom __ARGS((int ));
				655	static char_u *regnode __ARGS((int));
				656	static int prog_magic_wrong __ARGS((void));
				657	static char_u regnext __ARGS((char_u ));
				658	static void regc __ARGS((int b));
				659	#ifdef FEAT_MBYTE
				660	static void regmbc __ARGS((int c));
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	661	#else
				662	# define regmbc(c) regc(c)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	663	#endif
				664	static void reginsert __ARGS((int, char_u *));
				665	static void reginsert_limits __ARGS((int, long, long, char_u *));
				666	static char_u re_put_long __ARGS((char_u pr, long_u val));
				667	static int read_limits __ARGS((long , long ));
				668	static void regtail __ARGS((char_u , char_u ));
				669	static void regoptail __ARGS((char_u , char_u ));
				670
				671	/*
				672	* Return TRUE if compiled regular expression "prog" can match a line break.
				673	*/
				674	int
				675	re_multiline(prog)
				676	regprog_T *prog;
				677	{
				678	return (prog->regflags & RF_HASNL);
				679	}
				680
				681	/*
				682	* Return TRUE if compiled regular expression "prog" looks before the start
				683	* position (pattern contains "\@<=" or "\@<!").
				684	*/
				685	int
				686	re_lookbehind(prog)
				687	regprog_T *prog;
				688	{
				689	return (prog->regflags & RF_LOOKBH);
				690	}
				691
				692	/*
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	693	* Check for an equivalence class name "[=a=]". "pp" points to the '['.
				694	* Returns a character representing the class. Zero means that no item was
				695	* recognized. Otherwise "pp" is advanced to after the item.
				696	*/
				697	static int
				698	get_equi_class(pp)
				699	char_u **pp;
				700	{
				701	int c;
				702	int l = 1;
				703	char_u p = pp;
				704
				705	if (p[1] == '=')
				706	{
				707	#ifdef FEAT_MBYTE
				708	if (has_mbyte)
				709	l = mb_ptr2len_check(p + 2);
				710	#endif
				711	if (p[l + 2] == '=' && p[l + 3] == ']')
				712	{
				713	#ifdef FEAT_MBYTE
				714	if (has_mbyte)
				715	c = mb_ptr2char(p + 2);
				716	else
				717	#endif
				718	c = p[2];
				719	*pp += l + 4;
				720	return c;
				721	}
				722	}
				723	return 0;
				724	}
				725
				726	/*
				727	* Produce the bytes for equivalence class "c".
				728	* Currently only handles latin1, latin9 and utf-8.
				729	*/
				730	static void
				731	reg_equi_class(c)
				732	int c;
				733	{
				734	#ifdef FEAT_MBYTE
				735	if (enc_utf8 \|\| STRCMP(p_enc, "latin1") == 0
				736	\|\| STRCMP(p_enc, "latin9") == 0)
				737	#endif
				738	{
				739	switch (c)
				740	{
				741	case 'A': case 'À': case 'Á': case 'Â':
				742	case 'Ã': case 'Ä': case 'Å':
				743	regmbc('A'); regmbc('À'); regmbc('Á'); regmbc('Â');
				744	regmbc('Ã'); regmbc('Ä'); regmbc('Å');
				745	return;
				746	case 'C': case 'Ç':
				747	regmbc('C'); regmbc('Ç');
				748	return;
				749	case 'E': case 'È': case 'É': case 'Ê': case 'Ë':
				750	regmbc('E'); regmbc('È'); regmbc('É'); regmbc('Ê');
				751	regmbc('Ë');
				752	return;
				753	case 'I': case 'Ì': case 'Í': case 'Î': case 'Ï':
				754	regmbc('I'); regmbc('Ì'); regmbc('Í'); regmbc('Î');
				755	regmbc('Ï');
				756	return;
				757	case 'N': case 'Ñ':
				758	regmbc('N'); regmbc('Ñ');
				759	return;
				760	case 'O': case 'Ò': case 'Ó': case 'Ô': case 'Õ': case 'Ö':
				761	regmbc('O'); regmbc('Ò'); regmbc('Ó'); regmbc('Ô');
				762	regmbc('Õ'); regmbc('Ö');
				763	return;
				764	case 'U': case 'Ù': case 'Ú': case 'Û': case 'Ü':
				765	regmbc('U'); regmbc('Ù'); regmbc('Ú'); regmbc('Û');
				766	regmbc('Ü');
				767	return;
				768	case 'Y': case 'Ý':
				769	regmbc('Y'); regmbc('Ý');
				770	return;
				771	case 'a': case 'à': case 'á': case 'â':
				772	case 'ã': case 'ä': case 'å':
				773	regmbc('a'); regmbc('à'); regmbc('á'); regmbc('â');
				774	regmbc('ã'); regmbc('ä'); regmbc('å');
				775	return;
				776	case 'c': case 'ç':
				777	regmbc('c'); regmbc('ç');
				778	return;
				779	case 'e': case 'è': case 'é': case 'ê': case 'ë':
				780	regmbc('e'); regmbc('è'); regmbc('é'); regmbc('ê');
				781	regmbc('ë');
				782	return;
				783	case 'i': case 'ì': case 'í': case 'î': case 'ï':
				784	regmbc('i'); regmbc('ì'); regmbc('í'); regmbc('î');
				785	regmbc('ï');
				786	return;
				787	case 'n': case 'ñ':
				788	regmbc('n'); regmbc('ñ');
				789	return;
				790	case 'o': case 'ò': case 'ó': case 'ô': case 'õ': case 'ö':
				791	regmbc('o'); regmbc('ò'); regmbc('ó'); regmbc('ô');
				792	regmbc('õ'); regmbc('ö');
				793	return;
				794	case 'u': case 'ù': case 'ú': case 'û': case 'ü':
				795	regmbc('u'); regmbc('ù'); regmbc('ú'); regmbc('û');
				796	regmbc('ü');
				797	return;
				798	case 'y': case 'ý': case 'ÿ':
				799	regmbc('y'); regmbc('ý'); regmbc('ÿ');
				800	return;
				801	}
				802	}
				803	regmbc(c);
				804	}
				805
				806	/*
				807	* Check for a collating element "[.a.]". "pp" points to the '['.
				808	* Returns a character. Zero means that no item was recognized. Otherwise
				809	* "pp" is advanced to after the item.
				810	* Currently only single characters are recognized!
				811	*/
				812	static int
				813	get_coll_element(pp)
				814	char_u **pp;
				815	{
				816	int c;
				817	int l = 1;
				818	char_u p = pp;
				819
				820	if (p[1] == '.')
				821	{
				822	#ifdef FEAT_MBYTE
				823	if (has_mbyte)
				824	l = mb_ptr2len_check(p + 2);
				825	#endif
				826	if (p[l + 2] == '.' && p[l + 3] == ']')
				827	{
				828	#ifdef FEAT_MBYTE
				829	if (has_mbyte)
				830	c = mb_ptr2char(p + 2);
				831	else
				832	#endif
				833	c = p[2];
				834	*pp += l + 4;
				835	return c;
				836	}
				837	}
				838	return 0;
				839	}
				840
				841
				842	/*
				843	* Skip over a "[]" range.
				844	* "p" must point to the character after the '['.
				845	* The returned pointer is on the matching ']', or the terminating NUL.
				846	*/
				847	static char_u *
				848	skip_anyof(p)
				849	char_u *p;
				850	{
				851	int cpo_lit; /* 'cpoptions' contains 'l' flag */
				852	int cpo_bsl; /* 'cpoptions' contains '\' flag */
				853	#ifdef FEAT_MBYTE
				854	int l;
				855	#endif
				856
				857	cpo_lit = (!reg_syn && vim_strchr(p_cpo, CPO_LITERAL) != NULL);
				858	cpo_bsl = (!reg_syn && vim_strchr(p_cpo, CPO_BACKSL) != NULL);
				859
				860	if (p == '^') / Complement of range. */
				861	++p;
				862	if (p == ']' \|\| p == '-')
				863	++p;
				864	while (p != NUL && p != ']')
				865	{
				866	#ifdef FEAT_MBYTE
				867	if (has_mbyte && (l = (*mb_ptr2len_check)(p)) > 1)
				868	p += l;
				869	else
				870	#endif
				871	if (*p == '-')
				872	{
				873	++p;
				874	if (p != ']' && p != NUL)
				875	mb_ptr_adv(p);
				876	}
				877	else if (*p == '\\'
				878	&& !cpo_bsl
				879	&& (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
				880	\|\| (!cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
				881	p += 2;
				882	else if (*p == '[')
				883	{
				884	if (get_char_class(&p) == CLASS_NONE
				885	&& get_equi_class(&p) == 0
				886	&& get_coll_element(&p) == 0)
				887	++p; /* It was not a class name */
				888	}
				889	else
				890	++p;
				891	}
				892
				893	return p;
				894	}
				895
				896	/*
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	897	* Skip past regular expression.
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	898	* Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	899	* Take care of characters with a backslash in front of it.
				900	* Skip strings inside [ and ].
				901	* When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
				902	* expression and change "\?" to "?". If "*newp" is not NULL the expression
				903	* is changed in-place.
				904	*/
				905	char_u *
				906	skip_regexp(startp, dirc, magic, newp)
				907	char_u *startp;
				908	int dirc;
				909	int magic;
				910	char_u **newp;
				911	{
				912	int mymagic;
				913	char_u *p = startp;
				914
				915	if (magic)
				916	mymagic = MAGIC_ON;
				917	else
				918	mymagic = MAGIC_OFF;
				919
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	920	for (; p[0] != NUL; mb_ptr_adv(p))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	921	{
				922	if (p[0] == dirc) /* found end of regexp */
				923	break;
				924	if ((p[0] == '[' && mymagic >= MAGIC_ON)
				925	\|\| (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
				926	{
				927	p = skip_anyof(p + 1);
				928	if (p[0] == NUL)
				929	break;
				930	}
				931	else if (p[0] == '\\' && p[1] != NUL)
				932	{
				933	if (dirc == '?' && newp != NULL && p[1] == '?')
				934	{
				935	/* change "\?" to "?", make a copy first. */
				936	if (*newp == NULL)
				937	{
				938	*newp = vim_strsave(startp);
				939	if (*newp != NULL)
				940	p = *newp + (p - startp);
				941	}
				942	if (*newp != NULL)
				943	mch_memmove(p, p + 1, STRLEN(p));
				944	else
				945	++p;
				946	}
				947	else
				948	++p; /* skip next character */
				949	if (*p == 'v')
				950	mymagic = MAGIC_ALL;
				951	else if (*p == 'V')
				952	mymagic = MAGIC_NONE;
				953	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	954	}
				955	return p;
				956	}
				957
				958	/*
Bram Moolenaar	86b6835	2004-12-27 21:59:20 +0000	[diff] [blame]	959	* vim_regcomp() - compile a regular expression into internal code
				960	* Returns the program in allocated space. Returns NULL for an error.
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	961	*
				962	* We can't allocate space until we know how big the compiled form will be,
				963	* but we can't compile it (and thus know how big it is) until we've got a
				964	* place to put the code. So we cheat: we compile it twice, once with code
				965	* generation turned off and size counting turned on, and once "for real".
				966	* This also means that we don't allocate space until we are sure that the
				967	* thing really will compile successfully, and we never have to move the
				968	* code and thus invalidate pointers into it. (Note that it has to be in
				969	* one piece because vim_free() must be able to free it all.)
				970	*
				971	* Whether upper/lower case is to be ignored is decided when executing the
				972	* program, it does not matter here.
				973	*
				974	* Beware that the optimization-preparation code in here knows about some
				975	* of the structure of the compiled regexp.
				976	* "re_flags": RE_MAGIC and/or RE_STRING.
				977	*/
				978	regprog_T *
				979	vim_regcomp(expr, re_flags)
				980	char_u *expr;
				981	int re_flags;
				982	{
				983	regprog_T *r;
				984	char_u *scan;
				985	char_u *longest;
				986	int len;
				987	int flags;
				988
				989	if (expr == NULL)
				990	EMSG_RET_NULL(_(e_null));
				991
				992	init_class_tab();
				993
				994	/*
				995	* First pass: determine size, legality.
				996	*/
				997	regcomp_start(expr, re_flags);
				998	regcode = JUST_CALC_SIZE;
				999	regc(REGMAGIC);
				1000	if (reg(REG_NOPAREN, &flags) == NULL)
				1001	return NULL;
				1002
				1003	/* Small enough for pointer-storage convention? */
				1004	#ifdef SMALL_MALLOC /* 16 bit storage allocation */
				1005	if (regsize >= 65536L - 256L)
				1006	EMSG_RET_NULL(_("E339: Pattern too long"));
				1007	#endif
				1008
				1009	/* Allocate space. */
				1010	r = (regprog_T *)lalloc(sizeof(regprog_T) + regsize, TRUE);
				1011	if (r == NULL)
				1012	return NULL;
				1013
				1014	/*
				1015	* Second pass: emit code.
				1016	*/
				1017	regcomp_start(expr, re_flags);
				1018	regcode = r->program;
				1019	regc(REGMAGIC);
				1020	if (reg(REG_NOPAREN, &flags) == NULL)
				1021	{
				1022	vim_free(r);
				1023	return NULL;
				1024	}
				1025
				1026	/* Dig out information for optimizations. */
				1027	r->regstart = NUL; /* Worst-case defaults. */
				1028	r->reganch = 0;
				1029	r->regmust = NULL;
				1030	r->regmlen = 0;
				1031	r->regflags = regflags;
				1032	if (flags & HASNL)
				1033	r->regflags \|= RF_HASNL;
				1034	if (flags & HASLOOKBH)
				1035	r->regflags \|= RF_LOOKBH;
				1036	#ifdef FEAT_SYN_HL
				1037	/* Remember whether this pattern has any \z specials in it. */
				1038	r->reghasz = re_has_z;
				1039	#endif
				1040	scan = r->program + 1; /* First BRANCH. */
				1041	if (OP(regnext(scan)) == END) /* Only one top-level choice. */
				1042	{
				1043	scan = OPERAND(scan);
				1044
				1045	/* Starting-point info. */
				1046	if (OP(scan) == BOL \|\| OP(scan) == RE_BOF)
				1047	{
				1048	r->reganch++;
				1049	scan = regnext(scan);
				1050	}
				1051
				1052	if (OP(scan) == EXACTLY)
				1053	{
				1054	#ifdef FEAT_MBYTE
				1055	if (has_mbyte)
				1056	r->regstart = (*mb_ptr2char)(OPERAND(scan));
				1057	else
				1058	#endif
				1059	r->regstart = *OPERAND(scan);
				1060	}
				1061	else if ((OP(scan) == BOW
				1062	\|\| OP(scan) == EOW
				1063	\|\| OP(scan) == NOTHING
				1064	\|\| OP(scan) == MOPEN + 0 \|\| OP(scan) == NOPEN
				1065	\|\| OP(scan) == MCLOSE + 0 \|\| OP(scan) == NCLOSE)
				1066	&& OP(regnext(scan)) == EXACTLY)
				1067	{
				1068	#ifdef FEAT_MBYTE
				1069	if (has_mbyte)
				1070	r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
				1071	else
				1072	#endif
				1073	r->regstart = *OPERAND(regnext(scan));
				1074	}
				1075
				1076	/*
				1077	* If there's something expensive in the r.e., find the longest
				1078	* literal string that must appear and make it the regmust. Resolve
				1079	* ties in favor of later strings, since the regstart check works
				1080	* with the beginning of the r.e. and avoiding duplication
				1081	* strengthens checking. Not a strong reason, but sufficient in the
				1082	* absence of others.
				1083	*/
				1084	/*
				1085	* When the r.e. starts with BOW, it is faster to look for a regmust
				1086	* first. Used a lot for "#" and "*" commands. (Added by mool).
				1087	*/
				1088	if ((flags & SPSTART \|\| OP(scan) == BOW \|\| OP(scan) == EOW)
				1089	&& !(flags & HASNL))
				1090	{
				1091	longest = NULL;
				1092	len = 0;
				1093	for (; scan != NULL; scan = regnext(scan))
				1094	if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
				1095	{
				1096	longest = OPERAND(scan);
				1097	len = (int)STRLEN(OPERAND(scan));
				1098	}
				1099	r->regmust = longest;
				1100	r->regmlen = len;
				1101	}
				1102	}
				1103	#ifdef DEBUG
				1104	regdump(expr, r);
				1105	#endif
				1106	return r;
				1107	}
				1108
				1109	/*
				1110	* Setup to parse the regexp. Used once to get the length and once to do it.
				1111	*/
				1112	static void
				1113	regcomp_start(expr, re_flags)
				1114	char_u *expr;
				1115	int re_flags; /* see vim_regcomp() */
				1116	{
				1117	initchr(expr);
				1118	if (re_flags & RE_MAGIC)
				1119	reg_magic = MAGIC_ON;
				1120	else
				1121	reg_magic = MAGIC_OFF;
				1122	reg_string = (re_flags & RE_STRING);
				1123
				1124	num_complex_braces = 0;
				1125	regnpar = 1;
				1126	vim_memset(had_endbrace, 0, sizeof(had_endbrace));
				1127	#ifdef FEAT_SYN_HL
				1128	regnzpar = 1;
				1129	re_has_z = 0;
				1130	#endif
				1131	regsize = 0L;
				1132	regflags = 0;
				1133	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				1134	had_eol = FALSE;
				1135	#endif
				1136	}
				1137
				1138	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				1139	/*
				1140	* Check if during the previous call to vim_regcomp the EOL item "$" has been
				1141	* found. This is messy, but it works fine.
				1142	*/
				1143	int
				1144	vim_regcomp_had_eol()
				1145	{
				1146	return had_eol;
				1147	}
				1148	#endif
				1149
				1150	/*
				1151	* reg - regular expression, i.e. main body or parenthesized thing
				1152	*
				1153	* Caller must absorb opening parenthesis.
				1154	*
				1155	* Combining parenthesis handling with the base level of regular expression
				1156	* is a trifle forced, but the need to tie the tails of the branches to what
				1157	* follows makes it hard to avoid.
				1158	*/
				1159	static char_u *
				1160	reg(paren, flagp)
				1161	int paren; /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
				1162	int *flagp;
				1163	{
				1164	char_u *ret;
				1165	char_u *br;
				1166	char_u *ender;
				1167	int parno = 0;
				1168	int flags;
				1169
				1170	flagp = HASWIDTH; / Tentatively. */
				1171
				1172	#ifdef FEAT_SYN_HL
				1173	if (paren == REG_ZPAREN)
				1174	{
				1175	/* Make a ZOPEN node. */
				1176	if (regnzpar >= NSUBEXP)
				1177	EMSG_RET_NULL(_("E50: Too many \\z("));
				1178	parno = regnzpar;
				1179	regnzpar++;
				1180	ret = regnode(ZOPEN + parno);
				1181	}
				1182	else
				1183	#endif
				1184	if (paren == REG_PAREN)
				1185	{
				1186	/* Make a MOPEN node. */
				1187	if (regnpar >= NSUBEXP)
				1188	EMSG_M_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
				1189	parno = regnpar;
				1190	++regnpar;
				1191	ret = regnode(MOPEN + parno);
				1192	}
				1193	else if (paren == REG_NPAREN)
				1194	{
				1195	/* Make a NOPEN node. */
				1196	ret = regnode(NOPEN);
				1197	}
				1198	else
				1199	ret = NULL;
				1200
				1201	/* Pick up the branches, linking them together. */
				1202	br = regbranch(&flags);
				1203	if (br == NULL)
				1204	return NULL;
				1205	if (ret != NULL)
				1206	regtail(ret, br); /* [MZ]OPEN -> first. */
				1207	else
				1208	ret = br;
				1209	/* If one of the branches can be zero-width, the whole thing can.
				1210	* If one of the branches has * at start or matches a line-break, the
				1211	* whole thing can. */
				1212	if (!(flags & HASWIDTH))
				1213	*flagp &= ~HASWIDTH;
				1214	*flagp \|= flags & (SPSTART \| HASNL \| HASLOOKBH);
				1215	while (peekchr() == Magic('\|'))
				1216	{
				1217	skipchr();
				1218	br = regbranch(&flags);
				1219	if (br == NULL)
				1220	return NULL;
				1221	regtail(ret, br); /* BRANCH -> BRANCH. */
				1222	if (!(flags & HASWIDTH))
				1223	*flagp &= ~HASWIDTH;
				1224	*flagp \|= flags & (SPSTART \| HASNL \| HASLOOKBH);
				1225	}
				1226
				1227	/* Make a closing node, and hook it on the end. */
				1228	ender = regnode(
				1229	#ifdef FEAT_SYN_HL
				1230	paren == REG_ZPAREN ? ZCLOSE + parno :
				1231	#endif
				1232	paren == REG_PAREN ? MCLOSE + parno :
				1233	paren == REG_NPAREN ? NCLOSE : END);
				1234	regtail(ret, ender);
				1235
				1236	/* Hook the tails of the branches to the closing node. */
				1237	for (br = ret; br != NULL; br = regnext(br))
				1238	regoptail(br, ender);
				1239
				1240	/* Check for proper termination. */
				1241	if (paren != REG_NOPAREN && getchr() != Magic(')'))
				1242	{
				1243	#ifdef FEAT_SYN_HL
				1244	if (paren == REG_ZPAREN)
				1245	EMSG_RET_NULL(_("E52: Unmatched \\z("))
				1246	else
				1247	#endif
				1248	if (paren == REG_NPAREN)
				1249	EMSG_M_RET_NULL(_("E53: Unmatched %s%%("), reg_magic == MAGIC_ALL)
				1250	else
				1251	EMSG_M_RET_NULL(_("E54: Unmatched %s("), reg_magic == MAGIC_ALL)
				1252	}
				1253	else if (paren == REG_NOPAREN && peekchr() != NUL)
				1254	{
				1255	if (curchr == Magic(')'))
				1256	EMSG_M_RET_NULL(_("E55: Unmatched %s)"), reg_magic == MAGIC_ALL)
				1257	else
				1258	EMSG_RET_NULL(_(e_trailing)) /* "Can't happen". */
				1259	/* NOTREACHED */
				1260	}
				1261	/*
				1262	* Here we set the flag allowing back references to this set of
				1263	* parentheses.
				1264	*/
				1265	if (paren == REG_PAREN)
				1266	had_endbrace[parno] = TRUE; /* have seen the close paren */
				1267	return ret;
				1268	}
				1269
				1270	/*
				1271	* regbranch - one alternative of an \| operator
				1272	*
				1273	* Implements the & operator.
				1274	*/
				1275	static char_u *
				1276	regbranch(flagp)
				1277	int *flagp;
				1278	{
				1279	char_u *ret;
				1280	char_u *chain = NULL;
				1281	char_u *latest;
				1282	int flags;
				1283
				1284	flagp = WORST \| HASNL; / Tentatively. */
				1285
				1286	ret = regnode(BRANCH);
				1287	for (;;)
				1288	{
				1289	latest = regconcat(&flags);
				1290	if (latest == NULL)
				1291	return NULL;
				1292	/* If one of the branches has width, the whole thing has. If one of
				1293	* the branches anchors at start-of-line, the whole thing does.
				1294	* If one of the branches uses look-behind, the whole thing does. */
				1295	*flagp \|= flags & (HASWIDTH \| SPSTART \| HASLOOKBH);
				1296	/* If one of the branches doesn't match a line-break, the whole thing
				1297	* doesn't. */
				1298	*flagp &= ~HASNL \| (flags & HASNL);
				1299	if (chain != NULL)
				1300	regtail(chain, latest);
				1301	if (peekchr() != Magic('&'))
				1302	break;
				1303	skipchr();
				1304	regtail(latest, regnode(END)); /* operand ends */
				1305	reginsert(MATCH, latest);
				1306	chain = latest;
				1307	}
				1308
				1309	return ret;
				1310	}
				1311
				1312	/*
				1313	* regbranch - one alternative of an \| or & operator
				1314	*
				1315	* Implements the concatenation operator.
				1316	*/
				1317	static char_u *
				1318	regconcat(flagp)
				1319	int *flagp;
				1320	{
				1321	char_u *first = NULL;
				1322	char_u *chain = NULL;
				1323	char_u *latest;
				1324	int flags;
				1325	int cont = TRUE;
				1326
				1327	flagp = WORST; / Tentatively. */
				1328
				1329	while (cont)
				1330	{
				1331	switch (peekchr())
				1332	{
				1333	case NUL:
				1334	case Magic('\|'):
				1335	case Magic('&'):
				1336	case Magic(')'):
				1337	cont = FALSE;
				1338	break;
				1339	case Magic('Z'):
				1340	#ifdef FEAT_MBYTE
				1341	regflags \|= RF_ICOMBINE;
				1342	#endif
				1343	skipchr_keepstart();
				1344	break;
				1345	case Magic('c'):
				1346	regflags \|= RF_ICASE;
				1347	skipchr_keepstart();
				1348	break;
				1349	case Magic('C'):
				1350	regflags \|= RF_NOICASE;
				1351	skipchr_keepstart();
				1352	break;
				1353	case Magic('v'):
				1354	reg_magic = MAGIC_ALL;
				1355	skipchr_keepstart();
				1356	curchr = -1;
				1357	break;
				1358	case Magic('m'):
				1359	reg_magic = MAGIC_ON;
				1360	skipchr_keepstart();
				1361	curchr = -1;
				1362	break;
				1363	case Magic('M'):
				1364	reg_magic = MAGIC_OFF;
				1365	skipchr_keepstart();
				1366	curchr = -1;
				1367	break;
				1368	case Magic('V'):
				1369	reg_magic = MAGIC_NONE;
				1370	skipchr_keepstart();
				1371	curchr = -1;
				1372	break;
				1373	default:
				1374	latest = regpiece(&flags);
				1375	if (latest == NULL)
				1376	return NULL;
				1377	*flagp \|= flags & (HASWIDTH \| HASNL \| HASLOOKBH);
				1378	if (chain == NULL) /* First piece. */
				1379	*flagp \|= flags & SPSTART;
				1380	else
				1381	regtail(chain, latest);
				1382	chain = latest;
				1383	if (first == NULL)
				1384	first = latest;
				1385	break;
				1386	}
				1387	}
				1388	if (first == NULL) /* Loop ran zero times. */
				1389	first = regnode(NOTHING);
				1390	return first;
				1391	}
				1392
				1393	/*
				1394	* regpiece - something followed by possible [*+=]
				1395	*
				1396	* Note that the branching code sequences used for = and the general cases
				1397	* of * and + are somewhat optimized: they use the same NOTHING node as
				1398	* both the endmarker for their branch list and the body of the last branch.
				1399	* It might seem that this node could be dispensed with entirely, but the
				1400	* endmarker role is not redundant.
				1401	*/
				1402	static char_u *
				1403	regpiece(flagp)
				1404	int *flagp;
				1405	{
				1406	char_u *ret;
				1407	int op;
				1408	char_u *next;
				1409	int flags;
				1410	long minval;
				1411	long maxval;
				1412
				1413	ret = regatom(&flags);
				1414	if (ret == NULL)
				1415	return NULL;
				1416
				1417	op = peekchr();
				1418	if (re_multi_type(op) == NOT_MULTI)
				1419	{
				1420	*flagp = flags;
				1421	return ret;
				1422	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1423	/* default flags */
				1424	*flagp = (WORST \| SPSTART \| (flags & (HASNL \| HASLOOKBH)));
				1425
				1426	skipchr();
				1427	switch (op)
				1428	{
				1429	case Magic('*'):
				1430	if (flags & SIMPLE)
				1431	reginsert(STAR, ret);
				1432	else
				1433	{
				1434	/* Emit x* as (x&\|), where & means "self". */
				1435	reginsert(BRANCH, ret); /* Either x */
				1436	regoptail(ret, regnode(BACK)); /* and loop */
				1437	regoptail(ret, ret); /* back */
				1438	regtail(ret, regnode(BRANCH)); /* or */
				1439	regtail(ret, regnode(NOTHING)); /* null. */
				1440	}
				1441	break;
				1442
				1443	case Magic('+'):
				1444	if (flags & SIMPLE)
				1445	reginsert(PLUS, ret);
				1446	else
				1447	{
				1448	/* Emit x+ as x(&\|), where & means "self". */
				1449	next = regnode(BRANCH); /* Either */
				1450	regtail(ret, next);
				1451	regtail(regnode(BACK), ret); /* loop back */
				1452	regtail(next, regnode(BRANCH)); /* or */
				1453	regtail(ret, regnode(NOTHING)); /* null. */
				1454	}
				1455	*flagp = (WORST \| HASWIDTH \| (flags & (HASNL \| HASLOOKBH)));
				1456	break;
				1457
				1458	case Magic('@'):
				1459	{
				1460	int lop = END;
				1461
				1462	switch (no_Magic(getchr()))
				1463	{
				1464	case '=': lop = MATCH; break; /* \@= */
				1465	case '!': lop = NOMATCH; break; /* \@! */
				1466	case '>': lop = SUBPAT; break; /* \@> */
				1467	case '<': switch (no_Magic(getchr()))
				1468	{
				1469	case '=': lop = BEHIND; break; /* \@<= */
				1470	case '!': lop = NOBEHIND; break; /* \@<! */
				1471	}
				1472	}
				1473	if (lop == END)
				1474	EMSG_M_RET_NULL(_("E59: invalid character after %s@"),
				1475	reg_magic == MAGIC_ALL);
				1476	/* Look behind must match with behind_pos. */
				1477	if (lop == BEHIND \|\| lop == NOBEHIND)
				1478	{
				1479	regtail(ret, regnode(BHPOS));
				1480	*flagp \|= HASLOOKBH;
				1481	}
				1482	regtail(ret, regnode(END)); /* operand ends */
				1483	reginsert(lop, ret);
				1484	break;
				1485	}
				1486
				1487	case Magic('?'):
				1488	case Magic('='):
				1489	/* Emit x= as (x\|) */
				1490	reginsert(BRANCH, ret); /* Either x */
				1491	regtail(ret, regnode(BRANCH)); /* or */
				1492	next = regnode(NOTHING); /* null. */
				1493	regtail(ret, next);
				1494	regoptail(ret, next);
				1495	break;
				1496
				1497	case Magic('{'):
				1498	if (!read_limits(&minval, &maxval))
				1499	return NULL;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1500	if (flags & SIMPLE)
				1501	{
				1502	reginsert(BRACE_SIMPLE, ret);
				1503	reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
				1504	}
				1505	else
				1506	{
				1507	if (num_complex_braces >= 10)
				1508	EMSG_M_RET_NULL(_("E60: Too many complex %s{...}s"),
				1509	reg_magic == MAGIC_ALL);
				1510	reginsert(BRACE_COMPLEX + num_complex_braces, ret);
				1511	regoptail(ret, regnode(BACK));
				1512	regoptail(ret, ret);
				1513	reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
				1514	++num_complex_braces;
				1515	}
				1516	if (minval > 0 && maxval > 0)
				1517	*flagp = (HASWIDTH \| (flags & (HASNL \| HASLOOKBH)));
				1518	break;
				1519	}
				1520	if (re_multi_type(peekchr()) != NOT_MULTI)
				1521	{
				1522	/* Can't have a multi follow a multi. */
				1523	if (peekchr() == Magic('*'))
				1524	sprintf((char )IObuff, _("E61: Nested %s"),
				1525	reg_magic >= MAGIC_ON ? "" : "\\");
				1526	else
				1527	sprintf((char *)IObuff, _("E62: Nested %s%c"),
				1528	reg_magic == MAGIC_ALL ? "" : "\\", no_Magic(peekchr()));
				1529	EMSG_RET_NULL(IObuff);
				1530	}
				1531
				1532	return ret;
				1533	}
				1534
				1535	/*
				1536	* regatom - the lowest level
				1537	*
				1538	* Optimization: gobbles an entire sequence of ordinary characters so that
				1539	* it can turn them into a single node, which is smaller to store and
				1540	* faster to run. Don't do this when one_exactly is set.
				1541	*/
				1542	static char_u *
				1543	regatom(flagp)
				1544	int *flagp;
				1545	{
				1546	char_u *ret;
				1547	int flags;
				1548	int cpo_lit; /* 'cpoptions' contains 'l' flag */
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	1549	int cpo_bsl; /* 'cpoptions' contains '\' flag */
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1550	int c;
				1551	static char_u classchars = (char_u )".iIkKfFpPsSdDxXoOwWhHaAlLuU";
				1552	static int classcodes[] = {ANY, IDENT, SIDENT, KWORD, SKWORD,
				1553	FNAME, SFNAME, PRINT, SPRINT,
				1554	WHITE, NWHITE, DIGIT, NDIGIT,
				1555	HEX, NHEX, OCTAL, NOCTAL,
				1556	WORD, NWORD, HEAD, NHEAD,
				1557	ALPHA, NALPHA, LOWER, NLOWER,
				1558	UPPER, NUPPER
				1559	};
				1560	char_u *p;
				1561	int extra = 0;
				1562
				1563	flagp = WORST; / Tentatively. */
				1564	cpo_lit = (!reg_syn && vim_strchr(p_cpo, CPO_LITERAL) != NULL);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	1565	cpo_bsl = (!reg_syn && vim_strchr(p_cpo, CPO_BACKSL) != NULL);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1566
				1567	c = getchr();
				1568	switch (c)
				1569	{
				1570	case Magic('^'):
				1571	ret = regnode(BOL);
				1572	break;
				1573
				1574	case Magic('$'):
				1575	ret = regnode(EOL);
				1576	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				1577	had_eol = TRUE;
				1578	#endif
				1579	break;
				1580
				1581	case Magic('<'):
				1582	ret = regnode(BOW);
				1583	break;
				1584
				1585	case Magic('>'):
				1586	ret = regnode(EOW);
				1587	break;
				1588
				1589	case Magic('_'):
				1590	c = no_Magic(getchr());
				1591	if (c == '^') /* "\_^" is start-of-line */
				1592	{
				1593	ret = regnode(BOL);
				1594	break;
				1595	}
				1596	if (c == '$') /* "\_$" is end-of-line */
				1597	{
				1598	ret = regnode(EOL);
				1599	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				1600	had_eol = TRUE;
				1601	#endif
				1602	break;
				1603	}
				1604
				1605	extra = ADD_NL;
				1606	*flagp \|= HASNL;
				1607
				1608	/* "\_[" is character range plus newline */
				1609	if (c == '[')
				1610	goto collection;
				1611
				1612	/* "\_x" is character class plus newline */
				1613	/FALLTHROUGH/
				1614
				1615	/*
				1616	* Character classes.
				1617	*/
				1618	case Magic('.'):
				1619	case Magic('i'):
				1620	case Magic('I'):
				1621	case Magic('k'):
				1622	case Magic('K'):
				1623	case Magic('f'):
				1624	case Magic('F'):
				1625	case Magic('p'):
				1626	case Magic('P'):
				1627	case Magic('s'):
				1628	case Magic('S'):
				1629	case Magic('d'):
				1630	case Magic('D'):
				1631	case Magic('x'):
				1632	case Magic('X'):
				1633	case Magic('o'):
				1634	case Magic('O'):
				1635	case Magic('w'):
				1636	case Magic('W'):
				1637	case Magic('h'):
				1638	case Magic('H'):
				1639	case Magic('a'):
				1640	case Magic('A'):
				1641	case Magic('l'):
				1642	case Magic('L'):
				1643	case Magic('u'):
				1644	case Magic('U'):
				1645	p = vim_strchr(classchars, no_Magic(c));
				1646	if (p == NULL)
				1647	EMSG_RET_NULL(_("E63: invalid use of \\_"));
				1648	ret = regnode(classcodes[p - classchars] + extra);
				1649	*flagp \|= HASWIDTH \| SIMPLE;
				1650	break;
				1651
				1652	case Magic('n'):
				1653	if (reg_string)
				1654	{
				1655	/* In a string "\n" matches a newline character. */
				1656	ret = regnode(EXACTLY);
				1657	regc(NL);
				1658	regc(NUL);
				1659	*flagp \|= HASWIDTH \| SIMPLE;
				1660	}
				1661	else
				1662	{
				1663	/* In buffer text "\n" matches the end of a line. */
				1664	ret = regnode(NEWL);
				1665	*flagp \|= HASWIDTH \| HASNL;
				1666	}
				1667	break;
				1668
				1669	case Magic('('):
				1670	if (one_exactly)
				1671	EMSG_ONE_RET_NULL;
				1672	ret = reg(REG_PAREN, &flags);
				1673	if (ret == NULL)
				1674	return NULL;
				1675	*flagp \|= flags & (HASWIDTH \| SPSTART \| HASNL \| HASLOOKBH);
				1676	break;
				1677
				1678	case NUL:
				1679	case Magic('\|'):
				1680	case Magic('&'):
				1681	case Magic(')'):
				1682	EMSG_RET_NULL(_(e_internal)); /* Supposed to be caught earlier. */
				1683	/* NOTREACHED */
				1684
				1685	case Magic('='):
				1686	case Magic('?'):
				1687	case Magic('+'):
				1688	case Magic('@'):
				1689	case Magic('{'):
				1690	case Magic('*'):
				1691	c = no_Magic(c);
				1692	sprintf((char *)IObuff, _("E64: %s%c follows nothing"),
				1693	(c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL)
				1694	? "" : "\\", c);
				1695	EMSG_RET_NULL(IObuff);
				1696	/* NOTREACHED */
				1697
				1698	case Magic('~'): /* previous substitute pattern */
				1699	if (reg_prev_sub)
				1700	{
				1701	char_u *lp;
				1702
				1703	ret = regnode(EXACTLY);
				1704	lp = reg_prev_sub;
				1705	while (*lp != NUL)
				1706	regc(*lp++);
				1707	regc(NUL);
				1708	if (*reg_prev_sub != NUL)
				1709	{
				1710	*flagp \|= HASWIDTH;
				1711	if ((lp - reg_prev_sub) == 1)
				1712	*flagp \|= SIMPLE;
				1713	}
				1714	}
				1715	else
				1716	EMSG_RET_NULL(_(e_nopresub));
				1717	break;
				1718
				1719	case Magic('1'):
				1720	case Magic('2'):
				1721	case Magic('3'):
				1722	case Magic('4'):
				1723	case Magic('5'):
				1724	case Magic('6'):
				1725	case Magic('7'):
				1726	case Magic('8'):
				1727	case Magic('9'):
				1728	{
				1729	int refnum;
				1730
				1731	refnum = c - Magic('0');
				1732	/*
				1733	* Check if the back reference is legal. We must have seen the
				1734	* close brace.
				1735	* TODO: Should also check that we don't refer to something
				1736	* that is repeated (+*=): what instance of the repetition
				1737	* should we match?
				1738	*/
				1739	if (!had_endbrace[refnum])
				1740	{
				1741	/* Trick: check if "@<=" or "@<!" follows, in which case
				1742	* the \1 can appear before the referenced match. */
				1743	for (p = regparse; *p != NUL; ++p)
				1744	if (p[0] == '@' && p[1] == '<'
				1745	&& (p[2] == '!' \|\| p[2] == '='))
				1746	break;
				1747	if (*p == NUL)
				1748	EMSG_RET_NULL(_("E65: Illegal back reference"));
				1749	}
				1750	ret = regnode(BACKREF + refnum);
				1751	}
				1752	break;
				1753
				1754	#ifdef FEAT_SYN_HL
				1755	case Magic('z'):
				1756	{
				1757	c = no_Magic(getchr());
				1758	switch (c)
				1759	{
				1760	case '(': if (reg_do_extmatch != REX_SET)
				1761	EMSG_RET_NULL(_("E66: \\z( not allowed here"));
				1762	if (one_exactly)
				1763	EMSG_ONE_RET_NULL;
				1764	ret = reg(REG_ZPAREN, &flags);
				1765	if (ret == NULL)
				1766	return NULL;
				1767	*flagp \|= flags & (HASWIDTH\|SPSTART\|HASNL\|HASLOOKBH);
				1768	re_has_z = REX_SET;
				1769	break;
				1770
				1771	case '1':
				1772	case '2':
				1773	case '3':
				1774	case '4':
				1775	case '5':
				1776	case '6':
				1777	case '7':
				1778	case '8':
				1779	case '9': if (reg_do_extmatch != REX_USE)
				1780	EMSG_RET_NULL(_("E67: \\z1 et al. not allowed here"));
				1781	ret = regnode(ZREF + c - '0');
				1782	re_has_z = REX_USE;
				1783	break;
				1784
				1785	case 's': ret = regnode(MOPEN + 0);
				1786	break;
				1787
				1788	case 'e': ret = regnode(MCLOSE + 0);
				1789	break;
				1790
				1791	default: EMSG_RET_NULL(_("E68: Invalid character after \\z"));
				1792	}
				1793	}
				1794	break;
				1795	#endif
				1796
				1797	case Magic('%'):
				1798	{
				1799	c = no_Magic(getchr());
				1800	switch (c)
				1801	{
				1802	/* () without a back reference */
				1803	case '(':
				1804	if (one_exactly)
				1805	EMSG_ONE_RET_NULL;
				1806	ret = reg(REG_NPAREN, &flags);
				1807	if (ret == NULL)
				1808	return NULL;
				1809	*flagp \|= flags & (HASWIDTH \| SPSTART \| HASNL \| HASLOOKBH);
				1810	break;
				1811
				1812	/* Catch \%^ and \%$ regardless of where they appear in the
				1813	* pattern -- regardless of whether or not it makes sense. */
				1814	case '^':
				1815	ret = regnode(RE_BOF);
				1816	break;
				1817
				1818	case '$':
				1819	ret = regnode(RE_EOF);
				1820	break;
				1821
				1822	case '#':
				1823	ret = regnode(CURSOR);
				1824	break;
				1825
				1826	/* \%[abc]: Emit as a list of branches, all ending at the last
				1827	* branch which matches nothing. */
				1828	case '[':
				1829	if (one_exactly) /* doesn't nest */
				1830	EMSG_ONE_RET_NULL;
				1831	{
				1832	char_u *lastbranch;
				1833	char_u *lastnode = NULL;
				1834	char_u *br;
				1835
				1836	ret = NULL;
				1837	while ((c = getchr()) != ']')
				1838	{
				1839	if (c == NUL)
				1840	EMSG_M_RET_NULL(_("E69: Missing ] after %s%%["),
				1841	reg_magic == MAGIC_ALL);
				1842	br = regnode(BRANCH);
				1843	if (ret == NULL)
				1844	ret = br;
				1845	else
				1846	regtail(lastnode, br);
				1847
				1848	ungetchr();
				1849	one_exactly = TRUE;
				1850	lastnode = regatom(flagp);
				1851	one_exactly = FALSE;
				1852	if (lastnode == NULL)
				1853	return NULL;
				1854	}
				1855	if (ret == NULL)
				1856	EMSG_M_RET_NULL(_("E70: Empty %s%%[]"),
				1857	reg_magic == MAGIC_ALL);
				1858	lastbranch = regnode(BRANCH);
				1859	br = regnode(NOTHING);
				1860	if (ret != JUST_CALC_SIZE)
				1861	{
				1862	regtail(lastnode, br);
				1863	regtail(lastbranch, br);
				1864	/* connect all branches to the NOTHING
				1865	* branch at the end */
				1866	for (br = ret; br != lastnode; )
				1867	{
				1868	if (OP(br) == BRANCH)
				1869	{
				1870	regtail(br, lastbranch);
				1871	br = OPERAND(br);
				1872	}
				1873	else
				1874	br = regnext(br);
				1875	}
				1876	}
				1877	*flagp &= ~HASWIDTH;
				1878	break;
				1879	}
				1880
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	1881	case 'd': /* %d123 decimal */
				1882	case 'o': /* %o123 octal */
				1883	case 'x': /* %xab hex 2 */
				1884	case 'u': /* %uabcd hex 4 */
				1885	case 'U': /* %U1234abcd hex 8 */
				1886	{
				1887	int i;
				1888
				1889	switch (c)
				1890	{
				1891	case 'd': i = getdecchrs(); break;
				1892	case 'o': i = getoctchrs(); break;
				1893	case 'x': i = gethexchrs(2); break;
				1894	case 'u': i = gethexchrs(4); break;
				1895	case 'U': i = gethexchrs(8); break;
				1896	default: i = -1; break;
				1897	}
				1898
				1899	if (i < 0)
				1900	EMSG_M_RET_NULL(
				1901	_("E678: Invalid character after %s%%[dxouU]"),
				1902	reg_magic == MAGIC_ALL);
				1903	ret = regnode(EXACTLY);
				1904	if (i == 0)
				1905	regc(0x0a);
				1906	else
				1907	#ifdef FEAT_MBYTE
				1908	regmbc(i);
				1909	#else
				1910	regc(i);
				1911	#endif
				1912	regc(NUL);
				1913	*flagp \|= HASWIDTH;
				1914	break;
				1915	}
				1916
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1917	default:
				1918	if (VIM_ISDIGIT(c) \|\| c == '<' \|\| c == '>')
				1919	{
				1920	long_u n = 0;
				1921	int cmp;
				1922
				1923	cmp = c;
				1924	if (cmp == '<' \|\| cmp == '>')
				1925	c = getchr();
				1926	while (VIM_ISDIGIT(c))
				1927	{
				1928	n = n * 10 + (c - '0');
				1929	c = getchr();
				1930	}
				1931	if (c == 'l' \|\| c == 'c' \|\| c == 'v')
				1932	{
				1933	if (c == 'l')
				1934	ret = regnode(RE_LNUM);
				1935	else if (c == 'c')
				1936	ret = regnode(RE_COL);
				1937	else
				1938	ret = regnode(RE_VCOL);
				1939	if (ret == JUST_CALC_SIZE)
				1940	regsize += 5;
				1941	else
				1942	{
				1943	/* put the number and the optional
				1944	* comparator after the opcode */
				1945	regcode = re_put_long(regcode, n);
				1946	*regcode++ = cmp;
				1947	}
				1948	break;
				1949	}
				1950	}
				1951
				1952	EMSG_M_RET_NULL(_("E71: Invalid character after %s%%"),
				1953	reg_magic == MAGIC_ALL);
				1954	}
				1955	}
				1956	break;
				1957
				1958	case Magic('['):
				1959	collection:
				1960	{
				1961	char_u *lp;
				1962
				1963	/*
				1964	* If there is no matching ']', we assume the '[' is a normal
				1965	* character. This makes 'incsearch' and ":help [" work.
				1966	*/
				1967	lp = skip_anyof(regparse);
				1968	if (lp == ']') / there is a matching ']' */
				1969	{
				1970	int startc = -1; /* > 0 when next '-' is a range */
				1971	int endc;
				1972
				1973	/*
				1974	* In a character class, different parsing rules apply.
				1975	* Not even \ is special anymore, nothing is.
				1976	*/
				1977	if (regparse == '^') / Complement of range. */
				1978	{
				1979	ret = regnode(ANYBUT + extra);
				1980	regparse++;
				1981	}
				1982	else
				1983	ret = regnode(ANYOF + extra);
				1984
				1985	/* At the start ']' and '-' mean the literal character. */
				1986	if (regparse == ']' \|\| regparse == '-')
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	1987	{
				1988	startc = *regparse;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1989	regc(*regparse++);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	1990	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1991
				1992	while (regparse != NUL && regparse != ']')
				1993	{
				1994	if (*regparse == '-')
				1995	{
				1996	++regparse;
				1997	/* The '-' is not used for a range at the end and
				1998	* after or before a '\n'. */
				1999	if (regparse == ']' \|\| regparse == NUL
				2000	\|\| startc == -1
				2001	\|\| (regparse[0] == '\\' && regparse[1] == 'n'))
				2002	{
				2003	regc('-');
				2004	startc = '-'; /* [--x] is a range */
				2005	}
				2006	else
				2007	{
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2008	/* Also accept "a-[.z.]" */
				2009	endc = 0;
				2010	if (*regparse == '[')
				2011	endc = get_coll_element(&regparse);
				2012	if (endc == 0)
				2013	{
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2014	#ifdef FEAT_MBYTE
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2015	if (has_mbyte)
				2016	endc = mb_ptr2char_adv(&regparse);
				2017	else
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2018	#endif
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2019	endc = *regparse++;
				2020	}
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	2021
				2022	/* Handle \o40, \x20 and \u20AC style sequences */
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2023	if (endc == '\\' && !cpo_lit && !cpo_bsl)
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	2024	endc = coll_get_char();
				2025
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2026	if (startc > endc)
				2027	EMSG_RET_NULL(_(e_invrange));
				2028	#ifdef FEAT_MBYTE
				2029	if (has_mbyte && ((*mb_char2len)(startc) > 1
				2030	\|\| (*mb_char2len)(endc) > 1))
				2031	{
				2032	/* Limit to a range of 256 chars */
				2033	if (endc > startc + 256)
				2034	EMSG_RET_NULL(_(e_invrange));
				2035	while (++startc <= endc)
				2036	regmbc(startc);
				2037	}
				2038	else
				2039	#endif
				2040	{
				2041	#ifdef EBCDIC
				2042	int alpha_only = FALSE;
				2043
				2044	/* for alphabetical range skip the gaps
				2045	* 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'. */
				2046	if (isalpha(startc) && isalpha(endc))
				2047	alpha_only = TRUE;
				2048	#endif
				2049	while (++startc <= endc)
				2050	#ifdef EBCDIC
				2051	if (!alpha_only \|\| isalpha(startc))
				2052	#endif
				2053	regc(startc);
				2054	}
				2055	startc = -1;
				2056	}
				2057	}
				2058	/*
				2059	* Only "\]", "\^", "\]" and "\\" are special in Vi. Vim
				2060	* accepts "\t", "\e", etc., but only when the 'l' flag in
				2061	* 'cpoptions' is not included.
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2062	* Posix doesn't recognize backslash at all.
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2063	*/
				2064	else if (*regparse == '\\'
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2065	&& !cpo_bsl
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2066	&& (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
				2067	\|\| (!cpo_lit
				2068	&& vim_strchr(REGEXP_ABBR,
				2069	regparse[1]) != NULL)))
				2070	{
				2071	regparse++;
				2072	if (*regparse == 'n')
				2073	{
				2074	/* '\n' in range: also match NL */
				2075	if (ret != JUST_CALC_SIZE)
				2076	{
				2077	if (*ret == ANYBUT)
				2078	*ret = ANYBUT + ADD_NL;
				2079	else if (*ret == ANYOF)
				2080	*ret = ANYOF + ADD_NL;
				2081	/* else: must have had a \n already */
				2082	}
				2083	*flagp \|= HASNL;
				2084	regparse++;
				2085	startc = -1;
				2086	}
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	2087	else if (*regparse == 'd'
				2088	\|\| *regparse == 'o'
				2089	\|\| *regparse == 'x'
				2090	\|\| *regparse == 'u'
				2091	\|\| *regparse == 'U')
				2092	{
				2093	startc = coll_get_char();
				2094	if (startc == 0)
				2095	regc(0x0a);
				2096	else
				2097	#ifdef FEAT_MBYTE
				2098	regmbc(startc);
				2099	#else
				2100	regc(startc);
				2101	#endif
				2102	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2103	else
				2104	{
				2105	startc = backslash_trans(*regparse++);
				2106	regc(startc);
				2107	}
				2108	}
				2109	else if (*regparse == '[')
				2110	{
				2111	int c_class;
				2112	int cu;
				2113
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2114	c_class = get_char_class(&regparse);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2115	startc = -1;
				2116	/* Characters assumed to be 8 bits! */
				2117	switch (c_class)
				2118	{
				2119	case CLASS_NONE:
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2120	c_class = get_equi_class(&regparse);
				2121	if (c_class != 0)
				2122	{
				2123	/* produce equivalence class */
				2124	reg_equi_class(c_class);
				2125	}
				2126	else if ((c_class =
				2127	get_coll_element(&regparse)) != 0)
				2128	{
				2129	/* produce a collating element */
				2130	regmbc(c_class);
				2131	}
				2132	else
				2133	{
				2134	/* literal '[', allow [[-x] as a range */
				2135	startc = *regparse++;
				2136	regc(startc);
				2137	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2138	break;
				2139	case CLASS_ALNUM:
				2140	for (cu = 1; cu <= 255; cu++)
				2141	if (isalnum(cu))
				2142	regc(cu);
				2143	break;
				2144	case CLASS_ALPHA:
				2145	for (cu = 1; cu <= 255; cu++)
				2146	if (isalpha(cu))
				2147	regc(cu);
				2148	break;
				2149	case CLASS_BLANK:
				2150	regc(' ');
				2151	regc('\t');
				2152	break;
				2153	case CLASS_CNTRL:
				2154	for (cu = 1; cu <= 255; cu++)
				2155	if (iscntrl(cu))
				2156	regc(cu);
				2157	break;
				2158	case CLASS_DIGIT:
				2159	for (cu = 1; cu <= 255; cu++)
				2160	if (VIM_ISDIGIT(cu))
				2161	regc(cu);
				2162	break;
				2163	case CLASS_GRAPH:
				2164	for (cu = 1; cu <= 255; cu++)
				2165	if (isgraph(cu))
				2166	regc(cu);
				2167	break;
				2168	case CLASS_LOWER:
				2169	for (cu = 1; cu <= 255; cu++)
				2170	if (islower(cu))
				2171	regc(cu);
				2172	break;
				2173	case CLASS_PRINT:
				2174	for (cu = 1; cu <= 255; cu++)
				2175	if (vim_isprintc(cu))
				2176	regc(cu);
				2177	break;
				2178	case CLASS_PUNCT:
				2179	for (cu = 1; cu <= 255; cu++)
				2180	if (ispunct(cu))
				2181	regc(cu);
				2182	break;
				2183	case CLASS_SPACE:
				2184	for (cu = 9; cu <= 13; cu++)
				2185	regc(cu);
				2186	regc(' ');
				2187	break;
				2188	case CLASS_UPPER:
				2189	for (cu = 1; cu <= 255; cu++)
				2190	if (isupper(cu))
				2191	regc(cu);
				2192	break;
				2193	case CLASS_XDIGIT:
				2194	for (cu = 1; cu <= 255; cu++)
				2195	if (vim_isxdigit(cu))
				2196	regc(cu);
				2197	break;
				2198	case CLASS_TAB:
				2199	regc('\t');
				2200	break;
				2201	case CLASS_RETURN:
				2202	regc('\r');
				2203	break;
				2204	case CLASS_BACKSPACE:
				2205	regc('\b');
				2206	break;
				2207	case CLASS_ESCAPE:
				2208	regc('\033');
				2209	break;
				2210	}
				2211	}
				2212	else
				2213	{
				2214	#ifdef FEAT_MBYTE
				2215	if (has_mbyte)
				2216	{
				2217	int len;
				2218
				2219	/* produce a multibyte character, including any
				2220	* following composing characters */
				2221	startc = mb_ptr2char(regparse);
				2222	len = (*mb_ptr2len_check)(regparse);
				2223	if (enc_utf8 && utf_char2len(startc) != len)
				2224	startc = -1; /* composing chars */
				2225	while (--len >= 0)
				2226	regc(*regparse++);
				2227	}
				2228	else
				2229	#endif
				2230	{
				2231	startc = *regparse++;
				2232	regc(startc);
				2233	}
				2234	}
				2235	}
				2236	regc(NUL);
				2237	prevchr_len = 1; /* last char was the ']' */
				2238	if (*regparse != ']')
				2239	EMSG_RET_NULL(_(e_toomsbra)); /* Cannot happen? */
				2240	skipchr(); /* let's be friends with the lexer again */
				2241	*flagp \|= HASWIDTH \| SIMPLE;
				2242	break;
				2243	}
				2244	}
				2245	/* FALLTHROUGH */
				2246
				2247	default:
				2248	{
				2249	int len;
				2250
				2251	#ifdef FEAT_MBYTE
				2252	/* A multi-byte character is handled as a separate atom if it's
				2253	* before a multi. */
				2254	if (has_mbyte && (*mb_char2len)(c) > 1
				2255	&& re_multi_type(peekchr()) != NOT_MULTI)
				2256	{
				2257	ret = regnode(MULTIBYTECODE);
				2258	regmbc(c);
				2259	*flagp \|= HASWIDTH \| SIMPLE;
				2260	break;
				2261	}
				2262	#endif
				2263
				2264	ret = regnode(EXACTLY);
				2265
				2266	/*
				2267	* Append characters as long as:
				2268	* - there is no following multi, we then need the character in
				2269	* front of it as a single character operand
				2270	* - not running into a Magic character
				2271	* - "one_exactly" is not set
				2272	* But always emit at least one character. Might be a Multi,
				2273	* e.g., a "[" without matching "]".
				2274	*/
				2275	for (len = 0; c != NUL && (len == 0
				2276	\|\| (re_multi_type(peekchr()) == NOT_MULTI
				2277	&& !one_exactly
				2278	&& !is_Magic(c))); ++len)
				2279	{
				2280	c = no_Magic(c);
				2281	#ifdef FEAT_MBYTE
				2282	if (has_mbyte)
				2283	{
				2284	regmbc(c);
				2285	if (enc_utf8)
				2286	{
				2287	int off;
				2288	int l;
				2289
				2290	/* Need to get composing character too, directly
				2291	* access regparse for that, because skipchr() skips
				2292	* over composing chars. */
				2293	ungetchr();
				2294	if (*regparse == '\\' && regparse[1] != NUL)
				2295	off = 1;
				2296	else
				2297	off = 0;
				2298	for (;;)
				2299	{
				2300	l = utf_ptr2len_check(regparse + off);
				2301	if (!UTF_COMPOSINGLIKE(regparse + off,
				2302	regparse + off + l))
				2303	break;
				2304	off += l;
				2305	regmbc(utf_ptr2char(regparse + off));
				2306	}
				2307	skipchr();
				2308	}
				2309	}
				2310	else
				2311	#endif
				2312	regc(c);
				2313	c = getchr();
				2314	}
				2315	ungetchr();
				2316
				2317	regc(NUL);
				2318	*flagp \|= HASWIDTH;
				2319	if (len == 1)
				2320	*flagp \|= SIMPLE;
				2321	}
				2322	break;
				2323	}
				2324
				2325	return ret;
				2326	}
				2327
				2328	/*
				2329	* emit a node
				2330	* Return pointer to generated code.
				2331	*/
				2332	static char_u *
				2333	regnode(op)
				2334	int op;
				2335	{
				2336	char_u *ret;
				2337
				2338	ret = regcode;
				2339	if (ret == JUST_CALC_SIZE)
				2340	regsize += 3;
				2341	else
				2342	{
				2343	*regcode++ = op;
				2344	regcode++ = NUL; / Null "next" pointer. */
				2345	*regcode++ = NUL;
				2346	}
				2347	return ret;
				2348	}
				2349
				2350	/*
				2351	* Emit (if appropriate) a byte of code
				2352	*/
				2353	static void
				2354	regc(b)
				2355	int b;
				2356	{
				2357	if (regcode == JUST_CALC_SIZE)
				2358	regsize++;
				2359	else
				2360	*regcode++ = b;
				2361	}
				2362
				2363	#ifdef FEAT_MBYTE
				2364	/*
				2365	* Emit (if appropriate) a multi-byte character of code
				2366	*/
				2367	static void
				2368	regmbc(c)
				2369	int c;
				2370	{
				2371	if (regcode == JUST_CALC_SIZE)
				2372	regsize += (*mb_char2len)(c);
				2373	else
				2374	regcode += (*mb_char2bytes)(c, regcode);
				2375	}
				2376	#endif
				2377
				2378	/*
				2379	* reginsert - insert an operator in front of already-emitted operand
				2380	*
				2381	* Means relocating the operand.
				2382	*/
				2383	static void
				2384	reginsert(op, opnd)
				2385	int op;
				2386	char_u *opnd;
				2387	{
				2388	char_u *src;
				2389	char_u *dst;
				2390	char_u *place;
				2391
				2392	if (regcode == JUST_CALC_SIZE)
				2393	{
				2394	regsize += 3;
				2395	return;
				2396	}
				2397	src = regcode;
				2398	regcode += 3;
				2399	dst = regcode;
				2400	while (src > opnd)
				2401	--dst = --src;
				2402
				2403	place = opnd; /* Op node, where operand used to be. */
				2404	*place++ = op;
				2405	*place++ = NUL;
				2406	*place = NUL;
				2407	}
				2408
				2409	/*
				2410	* reginsert_limits - insert an operator in front of already-emitted operand.
				2411	* The operator has the given limit values as operands. Also set next pointer.
				2412	*
				2413	* Means relocating the operand.
				2414	*/
				2415	static void
				2416	reginsert_limits(op, minval, maxval, opnd)
				2417	int op;
				2418	long minval;
				2419	long maxval;
				2420	char_u *opnd;
				2421	{
				2422	char_u *src;
				2423	char_u *dst;
				2424	char_u *place;
				2425
				2426	if (regcode == JUST_CALC_SIZE)
				2427	{
				2428	regsize += 11;
				2429	return;
				2430	}
				2431	src = regcode;
				2432	regcode += 11;
				2433	dst = regcode;
				2434	while (src > opnd)
				2435	--dst = --src;
				2436
				2437	place = opnd; /* Op node, where operand used to be. */
				2438	*place++ = op;
				2439	*place++ = NUL;
				2440	*place++ = NUL;
				2441	place = re_put_long(place, (long_u)minval);
				2442	place = re_put_long(place, (long_u)maxval);
				2443	regtail(opnd, place);
				2444	}
				2445
				2446	/*
				2447	* Write a long as four bytes at "p" and return pointer to the next char.
				2448	*/
				2449	static char_u *
				2450	re_put_long(p, val)
				2451	char_u *p;
				2452	long_u val;
				2453	{
				2454	*p++ = (char_u) ((val >> 24) & 0377);
				2455	*p++ = (char_u) ((val >> 16) & 0377);
				2456	*p++ = (char_u) ((val >> 8) & 0377);
				2457	*p++ = (char_u) (val & 0377);
				2458	return p;
				2459	}
				2460
				2461	/*
				2462	* regtail - set the next-pointer at the end of a node chain
				2463	*/
				2464	static void
				2465	regtail(p, val)
				2466	char_u *p;
				2467	char_u *val;
				2468	{
				2469	char_u *scan;
				2470	char_u *temp;
				2471	int offset;
				2472
				2473	if (p == JUST_CALC_SIZE)
				2474	return;
				2475
				2476	/* Find last node. */
				2477	scan = p;
				2478	for (;;)
				2479	{
				2480	temp = regnext(scan);
				2481	if (temp == NULL)
				2482	break;
				2483	scan = temp;
				2484	}
				2485
				2486	if (OP(scan) == BACK)
				2487	offset = (int)(scan - val);
				2488	else
				2489	offset = (int)(val - scan);
				2490	*(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
				2491	*(scan + 2) = (char_u) (offset & 0377);
				2492	}
				2493
				2494	/*
				2495	* regoptail - regtail on item after a BRANCH; nop if none
				2496	*/
				2497	static void
				2498	regoptail(p, val)
				2499	char_u *p;
				2500	char_u *val;
				2501	{
				2502	/* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
				2503	if (p == NULL \|\| p == JUST_CALC_SIZE
				2504	\|\| (OP(p) != BRANCH
				2505	&& (OP(p) < BRACE_COMPLEX \|\| OP(p) > BRACE_COMPLEX + 9)))
				2506	return;
				2507	regtail(OPERAND(p), val);
				2508	}
				2509
				2510	/*
				2511	* getchr() - get the next character from the pattern. We know about
				2512	* magic and such, so therefore we need a lexical analyzer.
				2513	*/
				2514
				2515	/* static int curchr; */
				2516	static int prevprevchr;
				2517	static int prevchr;
				2518	static int nextchr; /* used for ungetchr() */
				2519	/*
				2520	* Note: prevchr is sometimes -1 when we are not at the start,
				2521	* eg in /[ ^I]^ the pattern was never found even if it existed, because ^ was
				2522	* taken to be magic -- webb
				2523	*/
				2524	static int at_start; /* True when on the first character */
				2525	static int prev_at_start; /* True when on the second character */
				2526
				2527	static void
				2528	initchr(str)
				2529	char_u *str;
				2530	{
				2531	regparse = str;
				2532	prevchr_len = 0;
				2533	curchr = prevprevchr = prevchr = nextchr = -1;
				2534	at_start = TRUE;
				2535	prev_at_start = FALSE;
				2536	}
				2537
				2538	static int
				2539	peekchr()
				2540	{
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2541	static int after_slash = FALSE;
				2542
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2543	if (curchr == -1)
				2544	{
				2545	switch (curchr = regparse[0])
				2546	{
				2547	case '.':
				2548	case '[':
				2549	case '~':
				2550	/* magic when 'magic' is on */
				2551	if (reg_magic >= MAGIC_ON)
				2552	curchr = Magic(curchr);
				2553	break;
				2554	case '(':
				2555	case ')':
				2556	case '{':
				2557	case '%':
				2558	case '+':
				2559	case '=':
				2560	case '?':
				2561	case '@':
				2562	case '!':
				2563	case '&':
				2564	case '\|':
				2565	case '<':
				2566	case '>':
				2567	case '#': /* future ext. */
				2568	case '"': /* future ext. */
				2569	case '\'': /* future ext. */
				2570	case ',': /* future ext. */
				2571	case '-': /* future ext. */
				2572	case ':': /* future ext. */
				2573	case ';': /* future ext. */
				2574	case '`': /* future ext. */
				2575	case '/': /* Can't be used in / command */
				2576	/* magic only after "\v" */
				2577	if (reg_magic == MAGIC_ALL)
				2578	curchr = Magic(curchr);
				2579	break;
				2580	case '*':
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2581	/* * is not magic as the very first character, eg "?*ptr", when
				2582	* after '^', eg "/^*ptr" and when after "\(", "\\|", "\&". But
				2583	* "\(\" is not magic, thus must be magic if "after_slash" /
				2584	if (reg_magic >= MAGIC_ON
				2585	&& !at_start
				2586	&& !(prev_at_start && prevchr == Magic('^'))
				2587	&& (after_slash
				2588	\|\| (prevchr != Magic('(')
				2589	&& prevchr != Magic('&')
				2590	&& prevchr != Magic('\|'))))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2591	curchr = Magic('*');
				2592	break;
				2593	case '^':
				2594	/* '^' is only magic as the very first character and if it's after
				2595	* "\(", "\\|", "\&' or "\n" */
				2596	if (reg_magic >= MAGIC_OFF
				2597	&& (at_start
				2598	\|\| reg_magic == MAGIC_ALL
				2599	\|\| prevchr == Magic('(')
				2600	\|\| prevchr == Magic('\|')
				2601	\|\| prevchr == Magic('&')
				2602	\|\| prevchr == Magic('n')
				2603	\|\| (no_Magic(prevchr) == '('
				2604	&& prevprevchr == Magic('%'))))
				2605	{
				2606	curchr = Magic('^');
				2607	at_start = TRUE;
				2608	prev_at_start = FALSE;
				2609	}
				2610	break;
				2611	case '$':
				2612	/* '$' is only magic as the very last char and if it's in front of
				2613	* either "\\|", "\)", "\&", or "\n" */
				2614	if (reg_magic >= MAGIC_OFF)
				2615	{
				2616	char_u *p = regparse + 1;
				2617
				2618	/* ignore \c \C \m and \M after '$' */
				2619	while (p[0] == '\\' && (p[1] == 'c' \|\| p[1] == 'C'
				2620	\|\| p[1] == 'm' \|\| p[1] == 'M' \|\| p[1] == 'Z'))
				2621	p += 2;
				2622	if (p[0] == NUL
				2623	\|\| (p[0] == '\\'
				2624	&& (p[1] == '\|' \|\| p[1] == '&' \|\| p[1] == ')'
				2625	\|\| p[1] == 'n'))
				2626	\|\| reg_magic == MAGIC_ALL)
				2627	curchr = Magic('$');
				2628	}
				2629	break;
				2630	case '\\':
				2631	{
				2632	int c = regparse[1];
				2633
				2634	if (c == NUL)
				2635	curchr = '\\'; /* trailing '\' */
				2636	else if (
				2637	#ifdef EBCDIC
				2638	vim_strchr(META, c)
				2639	#else
				2640	c <= '~' && META_flags[c]
				2641	#endif
				2642	)
				2643	{
				2644	/*
				2645	* META contains everything that may be magic sometimes,
				2646	* except ^ and $ ("\^" and "\$" are only magic after
				2647	* "\v"). We now fetch the next character and toggle its
				2648	* magicness. Therefore, \ is so meta-magic that it is
				2649	* not in META.
				2650	*/
				2651	curchr = -1;
				2652	prev_at_start = at_start;
				2653	at_start = FALSE; /* be able to say "/\ptr" /
				2654	++regparse;
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2655	++after_slash;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2656	peekchr();
				2657	--regparse;
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2658	--after_slash;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2659	curchr = toggle_Magic(curchr);
				2660	}
				2661	else if (vim_strchr(REGEXP_ABBR, c))
				2662	{
				2663	/*
				2664	* Handle abbreviations, like "\t" for TAB -- webb
				2665	*/
				2666	curchr = backslash_trans(c);
				2667	}
				2668	else if (reg_magic == MAGIC_NONE && (c == '$' \|\| c == '^'))
				2669	curchr = toggle_Magic(c);
				2670	else
				2671	{
				2672	/*
				2673	* Next character can never be (made) magic?
				2674	* Then backslashing it won't do anything.
				2675	*/
				2676	#ifdef FEAT_MBYTE
				2677	if (has_mbyte)
				2678	curchr = (*mb_ptr2char)(regparse + 1);
				2679	else
				2680	#endif
				2681	curchr = c;
				2682	}
				2683	break;
				2684	}
				2685
				2686	#ifdef FEAT_MBYTE
				2687	default:
				2688	if (has_mbyte)
				2689	curchr = (*mb_ptr2char)(regparse);
				2690	#endif
				2691	}
				2692	}
				2693
				2694	return curchr;
				2695	}
				2696
				2697	/*
				2698	* Eat one lexed character. Do this in a way that we can undo it.
				2699	*/
				2700	static void
				2701	skipchr()
				2702	{
				2703	/* peekchr() eats a backslash, do the same here */
				2704	if (*regparse == '\\')
				2705	prevchr_len = 1;
				2706	else
				2707	prevchr_len = 0;
				2708	if (regparse[prevchr_len] != NUL)
				2709	{
				2710	#ifdef FEAT_MBYTE
				2711	if (has_mbyte)
				2712	prevchr_len += (*mb_ptr2len_check)(regparse + prevchr_len);
				2713	else
				2714	#endif
				2715	++prevchr_len;
				2716	}
				2717	regparse += prevchr_len;
				2718	prev_at_start = at_start;
				2719	at_start = FALSE;
				2720	prevprevchr = prevchr;
				2721	prevchr = curchr;
				2722	curchr = nextchr; /* use previously unget char, or -1 */
				2723	nextchr = -1;
				2724	}
				2725
				2726	/*
				2727	* Skip a character while keeping the value of prev_at_start for at_start.
				2728	* prevchr and prevprevchr are also kept.
				2729	*/
				2730	static void
				2731	skipchr_keepstart()
				2732	{
				2733	int as = prev_at_start;
				2734	int pr = prevchr;
				2735	int prpr = prevprevchr;
				2736
				2737	skipchr();
				2738	at_start = as;
				2739	prevchr = pr;
				2740	prevprevchr = prpr;
				2741	}
				2742
				2743	static int
				2744	getchr()
				2745	{
				2746	int chr = peekchr();
				2747
				2748	skipchr();
				2749	return chr;
				2750	}
				2751
				2752	/*
				2753	* put character back. Works only once!
				2754	*/
				2755	static void
				2756	ungetchr()
				2757	{
				2758	nextchr = curchr;
				2759	curchr = prevchr;
				2760	prevchr = prevprevchr;
				2761	at_start = prev_at_start;
				2762	prev_at_start = FALSE;
				2763
				2764	/* Backup regparse, so that it's at the same position as before the
				2765	* getchr(). */
				2766	regparse -= prevchr_len;
				2767	}
				2768
				2769	/*
Bram Moolenaar	7b0294c	2004-10-11 10:16:09 +0000	[diff] [blame]	2770	* Get and return the value of the hex string at the current position.
				2771	* Return -1 if there is no valid hex number.
				2772	* The position is updated:
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	2773	* blahblah\%x20asdf
				2774	* before-^ ^-after
				2775	* The parameter controls the maximum number of input characters. This will be
				2776	* 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
				2777	*/
				2778	static int
				2779	gethexchrs(maxinputlen)
				2780	int maxinputlen;
				2781	{
				2782	int nr = 0;
				2783	int c;
				2784	int i;
				2785
				2786	for (i = 0; i < maxinputlen; ++i)
				2787	{
				2788	c = regparse[0];
				2789	if (!vim_isxdigit(c))
				2790	break;
				2791	nr <<= 4;
				2792	nr \|= hex2nr(c);
				2793	++regparse;
				2794	}
				2795
				2796	if (i == 0)
				2797	return -1;
				2798	return nr;
				2799	}
				2800
				2801	/*
				2802	* get and return the value of the decimal string immediately after the
				2803	* current position. Return -1 for invalid. Consumes all digits.
				2804	*/
				2805	static int
				2806	getdecchrs()
				2807	{
				2808	int nr = 0;
				2809	int c;
				2810	int i;
				2811
				2812	for (i = 0; ; ++i)
				2813	{
				2814	c = regparse[0];
				2815	if (c < '0' \|\| c > '9')
				2816	break;
				2817	nr *= 10;
				2818	nr += c - '0';
				2819	++regparse;
				2820	}
				2821
				2822	if (i == 0)
				2823	return -1;
				2824	return nr;
				2825	}
				2826
				2827	/*
				2828	* get and return the value of the octal string immediately after the current
				2829	* position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
				2830	* numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
				2831	* treat 8 or 9 as recognised characters. Position is updated:
				2832	* blahblah\%o210asdf
				2833	* before-^ ^-after
				2834	*/
				2835	static int
				2836	getoctchrs()
				2837	{
				2838	int nr = 0;
				2839	int c;
				2840	int i;
				2841
				2842	for (i = 0; i < 3 && nr < 040; ++i)
				2843	{
				2844	c = regparse[0];
				2845	if (c < '0' \|\| c > '7')
				2846	break;
				2847	nr <<= 3;
				2848	nr \|= hex2nr(c);
				2849	++regparse;
				2850	}
				2851
				2852	if (i == 0)
				2853	return -1;
				2854	return nr;
				2855	}
				2856
				2857	/*
				2858	* Get a number after a backslash that is inside [].
				2859	* When nothing is recognized return a backslash.
				2860	*/
				2861	static int
				2862	coll_get_char()
				2863	{
				2864	int nr = -1;
				2865
				2866	switch (*regparse++)
				2867	{
				2868	case 'd': nr = getdecchrs(); break;
				2869	case 'o': nr = getoctchrs(); break;
				2870	case 'x': nr = gethexchrs(2); break;
				2871	case 'u': nr = gethexchrs(4); break;
				2872	case 'U': nr = gethexchrs(8); break;
				2873	}
				2874	if (nr < 0)
				2875	{
				2876	/* If getting the number fails be backwards compatible: the character
				2877	* is a backslash. */
				2878	--regparse;
				2879	nr = '\\';
				2880	}
				2881	return nr;
				2882	}
				2883
				2884	/*
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2885	* read_limits - Read two integers to be taken as a minimum and maximum.
				2886	* If the first character is '-', then the range is reversed.
				2887	* Should end with 'end'. If minval is missing, zero is default, if maxval is
				2888	* missing, a very big number is the default.
				2889	*/
				2890	static int
				2891	read_limits(minval, maxval)
				2892	long *minval;
				2893	long *maxval;
				2894	{
				2895	int reverse = FALSE;
				2896	char_u *first_char;
				2897	long tmp;
				2898
				2899	if (*regparse == '-')
				2900	{
				2901	/* Starts with '-', so reverse the range later */
				2902	regparse++;
				2903	reverse = TRUE;
				2904	}
				2905	first_char = regparse;
				2906	*minval = getdigits(&regparse);
				2907	if (regparse == ',') / There is a comma */
				2908	{
				2909	if (vim_isdigit(*++regparse))
				2910	*maxval = getdigits(&regparse);
				2911	else
				2912	*maxval = MAX_LIMIT;
				2913	}
				2914	else if (VIM_ISDIGIT(*first_char))
				2915	maxval = minval; /* It was \{n} or \{-n} */
				2916	else
				2917	maxval = MAX_LIMIT; / It was \{} or \{-} */
				2918	if (*regparse == '\\')
				2919	regparse++; /* Allow either \{...} or \{...\} */
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2920	if (*regparse != '}')
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2921	{
				2922	sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"),
				2923	reg_magic == MAGIC_ALL ? "" : "\\");
				2924	EMSG_RET_FAIL(IObuff);
				2925	}
				2926
				2927	/*
				2928	* Reverse the range if there was a '-', or make sure it is in the right
				2929	* order otherwise.
				2930	*/
				2931	if ((!reverse && minval > maxval) \|\| (reverse && minval < maxval))
				2932	{
				2933	tmp = *minval;
				2934	minval = maxval;
				2935	*maxval = tmp;
				2936	}
				2937	skipchr(); /* let's be friends with the lexer again */
				2938	return OK;
				2939	}
				2940
				2941	/*
				2942	* vim_regexec and friends
				2943	*/
				2944
				2945	/*
				2946	* Global work variables for vim_regexec().
				2947	*/
				2948
				2949	/* The current match-position is remembered with these variables: */
				2950	static linenr_T reglnum; /* line number, relative to first line */
				2951	static char_u regline; / start of current line */
				2952	static char_u reginput; / current input, points into "regline" */
				2953
				2954	static int need_clear_subexpr; /* subexpressions still need to be
				2955	* cleared */
				2956	#ifdef FEAT_SYN_HL
				2957	static int need_clear_zsubexpr = FALSE; /* extmatch subexpressions
				2958	* still need to be cleared */
				2959	#endif
				2960
				2961	static int out_of_stack; /* TRUE when ran out of stack space */
				2962
				2963	/*
				2964	* Structure used to save the current input state, when it needs to be
				2965	* restored after trying a match. Used by reg_save() and reg_restore().
				2966	*/
				2967	typedef struct
				2968	{
				2969	union
				2970	{
				2971	char_u ptr; / reginput pointer, for single-line regexp */
				2972	lpos_T pos; /* reginput pos, for multi-line regexp */
				2973	} rs_u;
				2974	} regsave_T;
				2975
				2976	/* struct to save start/end pointer/position in for */
				2977	typedef struct
				2978	{
				2979	union
				2980	{
				2981	char_u *ptr;
				2982	lpos_T pos;
				2983	} se_u;
				2984	} save_se_T;
				2985
				2986	static char_u *reg_getline __ARGS((linenr_T lnum));
				2987	static long vim_regexec_both __ARGS((char_u *line, colnr_T col));
				2988	static long regtry __ARGS((regprog_T *prog, colnr_T col));
				2989	static void cleanup_subexpr __ARGS((void));
				2990	#ifdef FEAT_SYN_HL
				2991	static void cleanup_zsubexpr __ARGS((void));
				2992	#endif
				2993	static void reg_nextline __ARGS((void));
				2994	static void reg_save __ARGS((regsave_T *save));
				2995	static void reg_restore __ARGS((regsave_T *save));
				2996	static int reg_save_equal __ARGS((regsave_T *save));
				2997	static void save_se_multi __ARGS((save_se_T savep, lpos_T posp));
				2998	static void save_se_one __ARGS((save_se_T savep, char_u *pp));
				2999
				3000	/* Save the sub-expressions before attempting a match. */
				3001	#define save_se(savep, posp, pp) \
				3002	REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
				3003
				3004	/* After a failed match restore the sub-expressions. */
				3005	#define restore_se(savep, posp, pp) { \
				3006	if (REG_MULTI) \
				3007	*(posp) = (savep)->se_u.pos; \
				3008	else \
				3009	*(pp) = (savep)->se_u.ptr; }
				3010
				3011	static int re_num_cmp __ARGS((long_u val, char_u *scan));
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	3012	static int regmatch __ARGS((char_u prog, regsave_T startp));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3013	static int regrepeat __ARGS((char_u *p, long maxcount));
				3014
				3015	#ifdef DEBUG
				3016	int regnarrate = 0;
				3017	#endif
				3018
				3019	/*
				3020	* Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
				3021	* Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
				3022	* contains '\c' or '\C' the value is overruled.
				3023	*/
				3024	static int ireg_ic;
				3025
				3026	#ifdef FEAT_MBYTE
				3027	/*
				3028	* Similar to ireg_ic, but only for 'combining' characters. Set with \Z flag
				3029	* in the regexp. Defaults to false, always.
				3030	*/
				3031	static int ireg_icombine;
				3032	#endif
				3033
				3034	/*
				3035	* Sometimes need to save a copy of a line. Since alloc()/free() is very
				3036	* slow, we keep one allocated piece of memory and only re-allocate it when
				3037	* it's too small. It's freed in vim_regexec_both() when finished.
				3038	*/
				3039	static char_u *reg_tofree;
				3040	static unsigned reg_tofreelen;
				3041
				3042	/*
				3043	* These variables are set when executing a regexp to speed up the execution.
				3044	* Which ones are set depends on whethere a single-line or multi-line match is
				3045	* done:
				3046	* single-line multi-line
				3047	* reg_match &regmatch_T NULL
				3048	* reg_mmatch NULL &regmmatch_T
				3049	* reg_startp reg_match->startp <invalid>
				3050	* reg_endp reg_match->endp <invalid>
				3051	* reg_startpos <invalid> reg_mmatch->startpos
				3052	* reg_endpos <invalid> reg_mmatch->endpos
				3053	* reg_win NULL window in which to search
				3054	* reg_buf <invalid> buffer in which to search
				3055	* reg_firstlnum <invalid> first line in which to search
				3056	* reg_maxline 0 last line nr
				3057	* reg_line_lbr FALSE or TRUE FALSE
				3058	*/
				3059	static regmatch_T *reg_match;
				3060	static regmmatch_T *reg_mmatch;
				3061	static char_u **reg_startp = NULL;
				3062	static char_u **reg_endp = NULL;
				3063	static lpos_T *reg_startpos = NULL;
				3064	static lpos_T *reg_endpos = NULL;
				3065	static win_T *reg_win;
				3066	static buf_T *reg_buf;
				3067	static linenr_T reg_firstlnum;
				3068	static linenr_T reg_maxline;
				3069	static int reg_line_lbr; /* "\n" in string is line break */
				3070
				3071	/*
				3072	* Get pointer to the line "lnum", which is relative to "reg_firstlnum".
				3073	*/
				3074	static char_u *
				3075	reg_getline(lnum)
				3076	linenr_T lnum;
				3077	{
				3078	/* when looking behind for a match/no-match lnum is negative. But we
				3079	* can't go before line 1 */
				3080	if (reg_firstlnum + lnum < 1)
				3081	return NULL;
				3082	return ml_get_buf(reg_buf, reg_firstlnum + lnum, FALSE);
				3083	}
				3084
				3085	static regsave_T behind_pos;
				3086
				3087	#ifdef FEAT_SYN_HL
				3088	static char_u reg_startzp[NSUBEXP]; / Workspace to mark beginning */
				3089	static char_u reg_endzp[NSUBEXP]; / and end of \z(...\) matches */
				3090	static lpos_T reg_startzpos[NSUBEXP]; /* idem, beginning pos */
				3091	static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
				3092	#endif
				3093
				3094	/* TRUE if using multi-line regexp. */
				3095	#define REG_MULTI (reg_match == NULL)
				3096
				3097	/*
				3098	* Match a regexp against a string.
				3099	* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
				3100	* Uses curbuf for line count and 'iskeyword'.
				3101	*
				3102	* Return TRUE if there is a match, FALSE if not.
				3103	*/
				3104	int
				3105	vim_regexec(rmp, line, col)
				3106	regmatch_T *rmp;
				3107	char_u line; / string to match against */
				3108	colnr_T col; /* column to start looking for match */
				3109	{
				3110	reg_match = rmp;
				3111	reg_mmatch = NULL;
				3112	reg_maxline = 0;
				3113	reg_line_lbr = FALSE;
				3114	reg_win = NULL;
				3115	ireg_ic = rmp->rm_ic;
				3116	#ifdef FEAT_MBYTE
				3117	ireg_icombine = FALSE;
				3118	#endif
				3119	return (vim_regexec_both(line, col) != 0);
				3120	}
				3121
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	3122	#if defined(FEAT_MODIFY_FNAME) \|\| defined(FEAT_EVAL) \
				3123	\|\| defined(FIND_REPLACE_DIALOG) \|\| defined(PROTO)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3124	/*
				3125	* Like vim_regexec(), but consider a "\n" in "line" to be a line break.
				3126	*/
				3127	int
				3128	vim_regexec_nl(rmp, line, col)
				3129	regmatch_T *rmp;
				3130	char_u line; / string to match against */
				3131	colnr_T col; /* column to start looking for match */
				3132	{
				3133	reg_match = rmp;
				3134	reg_mmatch = NULL;
				3135	reg_maxline = 0;
				3136	reg_line_lbr = TRUE;
				3137	reg_win = NULL;
				3138	ireg_ic = rmp->rm_ic;
				3139	#ifdef FEAT_MBYTE
				3140	ireg_icombine = FALSE;
				3141	#endif
				3142	return (vim_regexec_both(line, col) != 0);
				3143	}
				3144	#endif
				3145
				3146	/*
				3147	* Match a regexp against multiple lines.
				3148	* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
				3149	* Uses curbuf for line count and 'iskeyword'.
				3150	*
				3151	* Return zero if there is no match. Return number of lines contained in the
				3152	* match otherwise.
				3153	*/
				3154	long
				3155	vim_regexec_multi(rmp, win, buf, lnum, col)
				3156	regmmatch_T *rmp;
				3157	win_T win; / window in which to search or NULL */
				3158	buf_T buf; / buffer in which to search */
				3159	linenr_T lnum; /* nr of line to start looking for match */
				3160	colnr_T col; /* column to start looking for match */
				3161	{
				3162	long r;
				3163	buf_T *save_curbuf = curbuf;
				3164
				3165	reg_match = NULL;
				3166	reg_mmatch = rmp;
				3167	reg_buf = buf;
				3168	reg_win = win;
				3169	reg_firstlnum = lnum;
				3170	reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
				3171	reg_line_lbr = FALSE;
				3172	ireg_ic = rmp->rmm_ic;
				3173	#ifdef FEAT_MBYTE
				3174	ireg_icombine = FALSE;
				3175	#endif
				3176
				3177	/* Need to switch to buffer "buf" to make vim_iswordc() work. */
				3178	curbuf = buf;
				3179	r = vim_regexec_both(NULL, col);
				3180	curbuf = save_curbuf;
				3181
				3182	return r;
				3183	}
				3184
				3185	/*
				3186	* Match a regexp against a string ("line" points to the string) or multiple
				3187	* lines ("line" is NULL, use reg_getline()).
				3188	*/
				3189	#ifdef HAVE_SETJMP_H
				3190	static long
				3191	vim_regexec_both(line_arg, col_arg)
				3192	char_u *line_arg;
				3193	colnr_T col_arg; /* column to start looking for match */
				3194	#else
				3195	static long
				3196	vim_regexec_both(line, col)
				3197	char_u *line;
				3198	colnr_T col; /* column to start looking for match */
				3199	#endif
				3200	{
				3201	regprog_T *prog;
				3202	char_u *s;
				3203	long retval;
				3204	#ifdef HAVE_SETJMP_H
				3205	char_u *line;
				3206	colnr_T col;
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	3207	int did_mch_startjmp = FALSE;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3208	#endif
				3209
				3210	reg_tofree = NULL;
				3211
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3212	#ifdef HAVE_SETJMP_H
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3213	/* Trick to avoid "might be clobbered by `longjmp'" warning from gcc. */
				3214	line = line_arg;
				3215	col = col_arg;
				3216	#endif
				3217	retval = 0L;
				3218
				3219	if (REG_MULTI)
				3220	{
				3221	prog = reg_mmatch->regprog;
				3222	line = reg_getline((linenr_T)0);
				3223	reg_startpos = reg_mmatch->startpos;
				3224	reg_endpos = reg_mmatch->endpos;
				3225	}
				3226	else
				3227	{
				3228	prog = reg_match->regprog;
				3229	reg_startp = reg_match->startp;
				3230	reg_endp = reg_match->endp;
				3231	}
				3232
				3233	/* Be paranoid... */
				3234	if (prog == NULL \|\| line == NULL)
				3235	{
				3236	EMSG(_(e_null));
				3237	goto theend;
				3238	}
				3239
				3240	/* Check validity of program. */
				3241	if (prog_magic_wrong())
				3242	goto theend;
				3243
				3244	/* If pattern contains "\c" or "\C": overrule value of ireg_ic */
				3245	if (prog->regflags & RF_ICASE)
				3246	ireg_ic = TRUE;
				3247	else if (prog->regflags & RF_NOICASE)
				3248	ireg_ic = FALSE;
				3249
				3250	#ifdef FEAT_MBYTE
				3251	/* If pattern contains "\Z" overrule value of ireg_icombine */
				3252	if (prog->regflags & RF_ICOMBINE)
				3253	ireg_icombine = TRUE;
				3254	#endif
				3255
				3256	/* If there is a "must appear" string, look for it. */
				3257	if (prog->regmust != NULL)
				3258	{
				3259	int c;
				3260
				3261	#ifdef FEAT_MBYTE
				3262	if (has_mbyte)
				3263	c = (*mb_ptr2char)(prog->regmust);
				3264	else
				3265	#endif
				3266	c = *prog->regmust;
				3267	s = line + col;
				3268	while ((s = cstrchr(s, c)) != NULL)
				3269	{
				3270	if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
				3271	break; /* Found it. */
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	3272	mb_ptr_adv(s);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3273	}
				3274	if (s == NULL) /* Not present. */
				3275	goto theend;
				3276	}
				3277
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	3278	#ifdef HAVE_TRY_EXCEPT
				3279	__try
				3280	{
				3281	#endif
				3282
				3283	#ifdef HAVE_SETJMP_H
				3284	/*
				3285	* Matching with a regexp may cause a very deep recursive call of
				3286	* regmatch(). Vim will crash when running out of stack space. Catch
				3287	* this here if the system supports it.
				3288	* It's a bit slow, do it after the check for "regmust".
				3289	* Don't do it if the caller already set it up.
				3290	*/
				3291	if (!lc_active)
				3292	{
				3293	did_mch_startjmp = TRUE;
				3294	mch_startjmp();
				3295	if (SETJMP(lc_jump_env) != 0)
				3296	{
				3297	mch_didjmp();
				3298	# ifdef SIGHASARG
				3299	if (lc_signal != SIGINT)
				3300	# endif
				3301	EMSG(_(e_complex));
				3302	retval = 0L;
				3303	goto inner_end;
				3304	}
				3305	}
				3306	#endif
				3307
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3308	regline = line;
				3309	reglnum = 0;
				3310	out_of_stack = FALSE;
				3311
				3312	/* Simplest case: Anchored match need be tried only once. */
				3313	if (prog->reganch)
				3314	{
				3315	int c;
				3316
				3317	#ifdef FEAT_MBYTE
				3318	if (has_mbyte)
				3319	c = (*mb_ptr2char)(regline + col);
				3320	else
				3321	#endif
				3322	c = regline[col];
				3323	if (prog->regstart == NUL
				3324	\|\| prog->regstart == c
				3325	\|\| (ireg_ic && ((
				3326	#ifdef FEAT_MBYTE
				3327	(enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
				3328	\|\| (c < 255 && prog->regstart < 255 &&
				3329	#endif
				3330	TOLOWER_LOC(prog->regstart) == TOLOWER_LOC(c)))))
				3331	retval = regtry(prog, col);
				3332	else
				3333	retval = 0;
				3334	}
				3335	else
				3336	{
				3337	/* Messy cases: unanchored match. */
				3338	while (!got_int && !out_of_stack)
				3339	{
				3340	if (prog->regstart != NUL)
				3341	{
				3342	/* Skip until the char we know it must start with. */
				3343	s = cstrchr(regline + col, prog->regstart);
				3344	if (s == NULL)
				3345	{
				3346	retval = 0;
				3347	break;
				3348	}
				3349	col = (int)(s - regline);
				3350	}
				3351
				3352	retval = regtry(prog, col);
				3353	if (retval > 0)
				3354	break;
				3355
				3356	/* if not currently on the first line, get it again */
				3357	if (reglnum != 0)
				3358	{
				3359	regline = reg_getline((linenr_T)0);
				3360	reglnum = 0;
				3361	}
				3362	if (regline[col] == NUL)
				3363	break;
				3364	#ifdef FEAT_MBYTE
				3365	if (has_mbyte)
				3366	col += (*mb_ptr2len_check)(regline + col);
				3367	else
				3368	#endif
				3369	++col;
				3370	}
				3371	}
				3372
				3373	if (out_of_stack)
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	3374	EMSG(_(e_outofstack));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3375
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	3376	#ifdef HAVE_SETJMP_H
				3377	inner_end:
				3378	;
				3379	#endif
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3380	#ifdef HAVE_TRY_EXCEPT
				3381	}
				3382	__except(EXCEPTION_EXECUTE_HANDLER)
				3383	{
				3384	if (GetExceptionCode() == EXCEPTION_STACK_OVERFLOW)
				3385	{
				3386	RESETSTKOFLW();
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	3387	EMSG(_(e_outofstack));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3388	}
				3389	else
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	3390	EMSG(_(e_complex));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3391	retval = 0L;
				3392	}
				3393	#endif
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	3394	#ifdef HAVE_SETJMP_H
				3395	if (did_mch_startjmp)
				3396	mch_endjmp();
				3397	#endif
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3398
				3399	theend:
				3400	/* Didn't find a match. */
				3401	vim_free(reg_tofree);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3402	return retval;
				3403	}
				3404
				3405	#ifdef FEAT_SYN_HL
				3406	static reg_extmatch_T *make_extmatch __ARGS((void));
				3407
				3408	/*
				3409	* Create a new extmatch and mark it as referenced once.
				3410	*/
				3411	static reg_extmatch_T *
				3412	make_extmatch()
				3413	{
				3414	reg_extmatch_T *em;
				3415
				3416	em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T));
				3417	if (em != NULL)
				3418	em->refcnt = 1;
				3419	return em;
				3420	}
				3421
				3422	/*
				3423	* Add a reference to an extmatch.
				3424	*/
				3425	reg_extmatch_T *
				3426	ref_extmatch(em)
				3427	reg_extmatch_T *em;
				3428	{
				3429	if (em != NULL)
				3430	em->refcnt++;
				3431	return em;
				3432	}
				3433
				3434	/*
				3435	* Remove a reference to an extmatch. If there are no references left, free
				3436	* the info.
				3437	*/
				3438	void
				3439	unref_extmatch(em)
				3440	reg_extmatch_T *em;
				3441	{
				3442	int i;
				3443
				3444	if (em != NULL && --em->refcnt <= 0)
				3445	{
				3446	for (i = 0; i < NSUBEXP; ++i)
				3447	vim_free(em->matches[i]);
				3448	vim_free(em);
				3449	}
				3450	}
				3451	#endif
				3452
				3453	/*
				3454	* regtry - try match of "prog" with at regline["col"].
				3455	* Returns 0 for failure, number of lines contained in the match otherwise.
				3456	*/
				3457	static long
				3458	regtry(prog, col)
				3459	regprog_T *prog;
				3460	colnr_T col;
				3461	{
				3462	reginput = regline + col;
				3463	need_clear_subexpr = TRUE;
				3464	#ifdef FEAT_SYN_HL
				3465	/* Clear the external match subpointers if necessary. */
				3466	if (prog->reghasz == REX_SET)
				3467	need_clear_zsubexpr = TRUE;
				3468	#endif
				3469
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	3470	if (regmatch(prog->program + 1, NULL))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3471	{
				3472	cleanup_subexpr();
				3473	if (REG_MULTI)
				3474	{
				3475	if (reg_startpos[0].lnum < 0)
				3476	{
				3477	reg_startpos[0].lnum = 0;
				3478	reg_startpos[0].col = col;
				3479	}
				3480	if (reg_endpos[0].lnum < 0)
				3481	{
				3482	reg_endpos[0].lnum = reglnum;
				3483	reg_endpos[0].col = (int)(reginput - regline);
				3484	}
				3485	else
				3486	/* Use line number of "\ze". */
				3487	reglnum = reg_endpos[0].lnum;
				3488	}
				3489	else
				3490	{
				3491	if (reg_startp[0] == NULL)
				3492	reg_startp[0] = regline + col;
				3493	if (reg_endp[0] == NULL)
				3494	reg_endp[0] = reginput;
				3495	}
				3496	#ifdef FEAT_SYN_HL
				3497	/* Package any found \z(...\) matches for export. Default is none. */
				3498	unref_extmatch(re_extmatch_out);
				3499	re_extmatch_out = NULL;
				3500
				3501	if (prog->reghasz == REX_SET)
				3502	{
				3503	int i;
				3504
				3505	cleanup_zsubexpr();
				3506	re_extmatch_out = make_extmatch();
				3507	for (i = 0; i < NSUBEXP; i++)
				3508	{
				3509	if (REG_MULTI)
				3510	{
				3511	/* Only accept single line matches. */
				3512	if (reg_startzpos[i].lnum >= 0
				3513	&& reg_endzpos[i].lnum == reg_startzpos[i].lnum)
				3514	re_extmatch_out->matches[i] =
				3515	vim_strnsave(reg_getline(reg_startzpos[i].lnum)
				3516	+ reg_startzpos[i].col,
				3517	reg_endzpos[i].col - reg_startzpos[i].col);
				3518	}
				3519	else
				3520	{
				3521	if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
				3522	re_extmatch_out->matches[i] =
				3523	vim_strnsave(reg_startzp[i],
				3524	(int)(reg_endzp[i] - reg_startzp[i]));
				3525	}
				3526	}
				3527	}
				3528	#endif
				3529	return 1 + reglnum;
				3530	}
				3531	return 0;
				3532	}
				3533
				3534	#ifdef FEAT_MBYTE
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3535	static int reg_prev_class __ARGS((void));
				3536
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3537	/*
				3538	* Get class of previous character.
				3539	*/
				3540	static int
				3541	reg_prev_class()
				3542	{
				3543	if (reginput > regline)
				3544	return mb_get_class(reginput - 1
				3545	- (*mb_head_off)(regline, reginput - 1));
				3546	return -1;
				3547	}
				3548
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3549	#endif
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	3550	#define ADVANCE_REGINPUT() mb_ptr_adv(reginput)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3551
				3552	/*
				3553	* The arguments from BRACE_LIMITS are stored here. They are actually local
				3554	* to regmatch(), but they are here to reduce the amount of stack space used
				3555	* (it can be called recursively many times).
				3556	*/
				3557	static long bl_minval;
				3558	static long bl_maxval;
				3559
				3560	/*
				3561	* regmatch - main matching routine
				3562	*
				3563	* Conceptually the strategy is simple: Check to see whether the current
				3564	* node matches, call self recursively to see whether the rest matches,
				3565	* and then act accordingly. In practice we make some effort to avoid
				3566	* recursion, in particular by going through "ordinary" nodes (that don't
				3567	* need to know whether the rest of the match failed) by a loop instead of
				3568	* by recursion.
				3569	*
				3570	* Returns TRUE when there is a match. Leaves reginput and reglnum just after
				3571	* the last matched character.
				3572	* Returns FALSE when there is no match. Leaves reginput and reglnum in an
				3573	* undefined state!
				3574	*/
				3575	static int
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	3576	regmatch(scan, startp)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3577	char_u scan; / Current node. */
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	3578	regsave_T startp; / start position for BACK */
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3579	{
				3580	char_u next; / Next node. */
				3581	int op;
				3582	int c;
				3583
				3584	#ifdef HAVE_GETRLIMIT
				3585	/* Check if we are running out of stack space. Could be caused by
				3586	* recursively calling ourselves. */
				3587	if (out_of_stack \|\| mch_stackcheck((char *)&op) == FAIL)
				3588	{
				3589	out_of_stack = TRUE;
				3590	return FALSE;
				3591	}
				3592	#endif
				3593
				3594	/* Some patterns my cause a long time to match, even though they are not
				3595	* illegal. E.g., "$[a-z]\+$\+Q". Allow breaking them with CTRL-C. */
				3596	fast_breakcheck();
				3597
				3598	#ifdef DEBUG
				3599	if (scan != NULL && regnarrate)
				3600	{
				3601	mch_errmsg(regprop(scan));
				3602	mch_errmsg("(\n");
				3603	}
				3604	#endif
				3605	while (scan != NULL)
				3606	{
				3607	if (got_int \|\| out_of_stack)
				3608	return FALSE;
				3609	#ifdef DEBUG
				3610	if (regnarrate)
				3611	{
				3612	mch_errmsg(regprop(scan));
				3613	mch_errmsg("...\n");
				3614	# ifdef FEAT_SYN_HL
				3615	if (re_extmatch_in != NULL)
				3616	{
				3617	int i;
				3618
				3619	mch_errmsg(_("External submatches:\n"));
				3620	for (i = 0; i < NSUBEXP; i++)
				3621	{
				3622	mch_errmsg(" \"");
				3623	if (re_extmatch_in->matches[i] != NULL)
				3624	mch_errmsg(re_extmatch_in->matches[i]);
				3625	mch_errmsg("\"\n");
				3626	}
				3627	}
				3628	# endif
				3629	}
				3630	#endif
				3631	next = regnext(scan);
				3632
				3633	op = OP(scan);
				3634	/* Check for character class with NL added. */
				3635	if (WITH_NL(op) && *reginput == NUL && reglnum < reg_maxline)
				3636	{
				3637	reg_nextline();
				3638	}
				3639	else if (reg_line_lbr && WITH_NL(op) && *reginput == '\n')
				3640	{
				3641	ADVANCE_REGINPUT();
				3642	}
				3643	else
				3644	{
				3645	if (WITH_NL(op))
				3646	op -= ADD_NL;
				3647	#ifdef FEAT_MBYTE
				3648	if (has_mbyte)
				3649	c = (*mb_ptr2char)(reginput);
				3650	else
				3651	#endif
				3652	c = *reginput;
				3653	switch (op)
				3654	{
				3655	case BOL:
				3656	if (reginput != regline)
				3657	return FALSE;
				3658	break;
				3659
				3660	case EOL:
				3661	if (c != NUL)
				3662	return FALSE;
				3663	break;
				3664
				3665	case RE_BOF:
				3666	/* Passing -1 to the getline() function provided for the search
				3667	* should always return NULL if the current line is the first
				3668	* line of the file. */
				3669	if (reglnum != 0 \|\| reginput != regline
				3670	\|\| (REG_MULTI && reg_getline((linenr_T)-1) != NULL))
				3671	return FALSE;
				3672	break;
				3673
				3674	case RE_EOF:
				3675	if (reglnum != reg_maxline \|\| c != NUL)
				3676	return FALSE;
				3677	break;
				3678
				3679	case CURSOR:
				3680	/* Check if the buffer is in a window and compare the
				3681	* reg_win->w_cursor position to the match position. */
				3682	if (reg_win == NULL
				3683	\|\| (reglnum + reg_firstlnum != reg_win->w_cursor.lnum)
				3684	\|\| ((colnr_T)(reginput - regline) != reg_win->w_cursor.col))
				3685	return FALSE;
				3686	break;
				3687
				3688	case RE_LNUM:
				3689	if (!REG_MULTI \|\| !re_num_cmp((long_u)(reglnum + reg_firstlnum),
				3690	scan))
				3691	return FALSE;
				3692	break;
				3693
				3694	case RE_COL:
				3695	if (!re_num_cmp((long_u)(reginput - regline) + 1, scan))
				3696	return FALSE;
				3697	break;
				3698
				3699	case RE_VCOL:
				3700	if (!re_num_cmp((long_u)win_linetabsize(
				3701	reg_win == NULL ? curwin : reg_win,
				3702	regline, (colnr_T)(reginput - regline)) + 1, scan))
				3703	return FALSE;
				3704	break;
				3705
				3706	case BOW: /* \<word; reginput points to w */
				3707	if (c == NUL) /* Can't match at end of line */
				3708	return FALSE;
				3709	#ifdef FEAT_MBYTE
				3710	if (has_mbyte)
				3711	{
				3712	int this_class;
				3713
				3714	/* Get class of current and previous char (if it exists). */
				3715	this_class = mb_get_class(reginput);
				3716	if (this_class <= 1)
				3717	return FALSE; /* not on a word at all */
				3718	if (reg_prev_class() == this_class)
				3719	return FALSE; /* previous char is in same word */
				3720	}
				3721	#endif
				3722	else
				3723	{
				3724	if (!vim_iswordc(c)
				3725	\|\| (reginput > regline && vim_iswordc(reginput[-1])))
				3726	return FALSE;
				3727	}
				3728	break;
				3729
				3730	case EOW: /* word\>; reginput points after d */
				3731	if (reginput == regline) /* Can't match at start of line */
				3732	return FALSE;
				3733	#ifdef FEAT_MBYTE
				3734	if (has_mbyte)
				3735	{
				3736	int this_class, prev_class;
				3737
				3738	/* Get class of current and previous char (if it exists). */
				3739	this_class = mb_get_class(reginput);
				3740	prev_class = reg_prev_class();
				3741	if (this_class == prev_class)
				3742	return FALSE;
				3743	if (prev_class == 0 \|\| prev_class == 1)
				3744	return FALSE;
				3745	}
				3746	else
				3747	#endif
				3748	{
				3749	if (!vim_iswordc(reginput[-1]))
				3750	return FALSE;
				3751	if (reginput[0] != NUL && vim_iswordc(c))
				3752	return FALSE;
				3753	}
				3754	break; /* Matched with EOW */
				3755
				3756	case ANY:
				3757	if (c == NUL)
				3758	return FALSE;
				3759	ADVANCE_REGINPUT();
				3760	break;
				3761
				3762	case IDENT:
				3763	if (!vim_isIDc(c))
				3764	return FALSE;
				3765	ADVANCE_REGINPUT();
				3766	break;
				3767
				3768	case SIDENT:
				3769	if (VIM_ISDIGIT(*reginput) \|\| !vim_isIDc(c))
				3770	return FALSE;
				3771	ADVANCE_REGINPUT();
				3772	break;
				3773
				3774	case KWORD:
				3775	if (!vim_iswordp(reginput))
				3776	return FALSE;
				3777	ADVANCE_REGINPUT();
				3778	break;
				3779
				3780	case SKWORD:
				3781	if (VIM_ISDIGIT(*reginput) \|\| !vim_iswordp(reginput))
				3782	return FALSE;
				3783	ADVANCE_REGINPUT();
				3784	break;
				3785
				3786	case FNAME:
				3787	if (!vim_isfilec(c))
				3788	return FALSE;
				3789	ADVANCE_REGINPUT();
				3790	break;
				3791
				3792	case SFNAME:
				3793	if (VIM_ISDIGIT(*reginput) \|\| !vim_isfilec(c))
				3794	return FALSE;
				3795	ADVANCE_REGINPUT();
				3796	break;
				3797
				3798	case PRINT:
				3799	if (ptr2cells(reginput) != 1)
				3800	return FALSE;
				3801	ADVANCE_REGINPUT();
				3802	break;
				3803
				3804	case SPRINT:
				3805	if (VIM_ISDIGIT(*reginput) \|\| ptr2cells(reginput) != 1)
				3806	return FALSE;
				3807	ADVANCE_REGINPUT();
				3808	break;
				3809
				3810	case WHITE:
				3811	if (!vim_iswhite(c))
				3812	return FALSE;
				3813	ADVANCE_REGINPUT();
				3814	break;
				3815
				3816	case NWHITE:
				3817	if (c == NUL \|\| vim_iswhite(c))
				3818	return FALSE;
				3819	ADVANCE_REGINPUT();
				3820	break;
				3821
				3822	case DIGIT:
				3823	if (!ri_digit(c))
				3824	return FALSE;
				3825	ADVANCE_REGINPUT();
				3826	break;
				3827
				3828	case NDIGIT:
				3829	if (c == NUL \|\| ri_digit(c))
				3830	return FALSE;
				3831	ADVANCE_REGINPUT();
				3832	break;
				3833
				3834	case HEX:
				3835	if (!ri_hex(c))
				3836	return FALSE;
				3837	ADVANCE_REGINPUT();
				3838	break;
				3839
				3840	case NHEX:
				3841	if (c == NUL \|\| ri_hex(c))
				3842	return FALSE;
				3843	ADVANCE_REGINPUT();
				3844	break;
				3845
				3846	case OCTAL:
				3847	if (!ri_octal(c))
				3848	return FALSE;
				3849	ADVANCE_REGINPUT();
				3850	break;
				3851
				3852	case NOCTAL:
				3853	if (c == NUL \|\| ri_octal(c))
				3854	return FALSE;
				3855	ADVANCE_REGINPUT();
				3856	break;
				3857
				3858	case WORD:
				3859	if (!ri_word(c))
				3860	return FALSE;
				3861	ADVANCE_REGINPUT();
				3862	break;
				3863
				3864	case NWORD:
				3865	if (c == NUL \|\| ri_word(c))
				3866	return FALSE;
				3867	ADVANCE_REGINPUT();
				3868	break;
				3869
				3870	case HEAD:
				3871	if (!ri_head(c))
				3872	return FALSE;
				3873	ADVANCE_REGINPUT();
				3874	break;
				3875
				3876	case NHEAD:
				3877	if (c == NUL \|\| ri_head(c))
				3878	return FALSE;
				3879	ADVANCE_REGINPUT();
				3880	break;
				3881
				3882	case ALPHA:
				3883	if (!ri_alpha(c))
				3884	return FALSE;
				3885	ADVANCE_REGINPUT();
				3886	break;
				3887
				3888	case NALPHA:
				3889	if (c == NUL \|\| ri_alpha(c))
				3890	return FALSE;
				3891	ADVANCE_REGINPUT();
				3892	break;
				3893
				3894	case LOWER:
				3895	if (!ri_lower(c))
				3896	return FALSE;
				3897	ADVANCE_REGINPUT();
				3898	break;
				3899
				3900	case NLOWER:
				3901	if (c == NUL \|\| ri_lower(c))
				3902	return FALSE;
				3903	ADVANCE_REGINPUT();
				3904	break;
				3905
				3906	case UPPER:
				3907	if (!ri_upper(c))
				3908	return FALSE;
				3909	ADVANCE_REGINPUT();
				3910	break;
				3911
				3912	case NUPPER:
				3913	if (c == NUL \|\| ri_upper(c))
				3914	return FALSE;
				3915	ADVANCE_REGINPUT();
				3916	break;
				3917
				3918	case EXACTLY:
				3919	{
				3920	int len;
				3921	char_u *opnd;
				3922
				3923	opnd = OPERAND(scan);
				3924	/* Inline the first byte, for speed. */
				3925	if (opnd != reginput
				3926	&& (!ireg_ic \|\| (
				3927	#ifdef FEAT_MBYTE
				3928	!enc_utf8 &&
				3929	#endif
				3930	TOLOWER_LOC(opnd) != TOLOWER_LOC(reginput))))
				3931	return FALSE;
				3932	if (*opnd == NUL)
				3933	{
				3934	/* match empty string always works; happens when "~" is
				3935	* empty. */
				3936	}
				3937	else if (opnd[1] == NUL
				3938	#ifdef FEAT_MBYTE
				3939	&& !(enc_utf8 && ireg_ic)
				3940	#endif
				3941	)
				3942	++reginput; /* matched a single char */
				3943	else
				3944	{
				3945	len = (int)STRLEN(opnd);
				3946	/* Need to match first byte again for multi-byte. */
				3947	if (cstrncmp(opnd, reginput, &len) != 0)
				3948	return FALSE;
				3949	#ifdef FEAT_MBYTE
				3950	/* Check for following composing character. */
				3951	if (enc_utf8 && UTF_COMPOSINGLIKE(reginput, reginput + len))
				3952	{
				3953	/* raaron: This code makes a composing character get
				3954	* ignored, which is the correct behavior (sometimes)
				3955	* for voweled Hebrew texts. */
				3956	if (!ireg_icombine)
				3957	return FALSE;
				3958	}
				3959	else
				3960	#endif
				3961	reginput += len;
				3962	}
				3963	}
				3964	break;
				3965
				3966	case ANYOF:
				3967	case ANYBUT:
				3968	if (c == NUL)
				3969	return FALSE;
				3970	if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
				3971	return FALSE;
				3972	ADVANCE_REGINPUT();
				3973	break;
				3974
				3975	#ifdef FEAT_MBYTE
				3976	case MULTIBYTECODE:
				3977	if (has_mbyte)
				3978	{
				3979	int i, len;
				3980	char_u *opnd;
				3981
				3982	opnd = OPERAND(scan);
				3983	/* Safety check (just in case 'encoding' was changed since
				3984	* compiling the program). */
				3985	if ((len = (*mb_ptr2len_check)(opnd)) < 2)
				3986	return FALSE;
				3987	for (i = 0; i < len; ++i)
				3988	if (opnd[i] != reginput[i])
				3989	return FALSE;
				3990	reginput += len;
				3991	}
				3992	else
				3993	return FALSE;
				3994	break;
				3995	#endif
				3996
				3997	case NOTHING:
				3998	break;
				3999
				4000	case BACK:
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4001	/* When we run into BACK without matching something non-empty, we
				4002	* fail. */
				4003	if (startp != NULL && reg_save_equal(startp))
				4004	return FALSE;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4005	break;
				4006
				4007	case MOPEN + 0: /* Match start: \zs */
				4008	case MOPEN + 1: /* \( */
				4009	case MOPEN + 2:
				4010	case MOPEN + 3:
				4011	case MOPEN + 4:
				4012	case MOPEN + 5:
				4013	case MOPEN + 6:
				4014	case MOPEN + 7:
				4015	case MOPEN + 8:
				4016	case MOPEN + 9:
				4017	{
				4018	int no;
				4019	save_se_T save;
				4020
				4021	no = op - MOPEN;
				4022	cleanup_subexpr();
				4023	save_se(&save, &reg_startpos[no], &reg_startp[no]);
				4024
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4025	if (regmatch(next, startp))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4026	return TRUE;
				4027
				4028	restore_se(&save, &reg_startpos[no], &reg_startp[no]);
				4029	return FALSE;
				4030	}
				4031	/* break; Not Reached */
				4032
				4033	case NOPEN: /* \%( */
				4034	case NCLOSE: /* \) after \%( */
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4035	if (regmatch(next, startp))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4036	return TRUE;
				4037	return FALSE;
				4038	/* break; Not Reached */
				4039
				4040	#ifdef FEAT_SYN_HL
				4041	case ZOPEN + 1:
				4042	case ZOPEN + 2:
				4043	case ZOPEN + 3:
				4044	case ZOPEN + 4:
				4045	case ZOPEN + 5:
				4046	case ZOPEN + 6:
				4047	case ZOPEN + 7:
				4048	case ZOPEN + 8:
				4049	case ZOPEN + 9:
				4050	{
				4051	int no;
				4052	save_se_T save;
				4053
				4054	no = op - ZOPEN;
				4055	cleanup_zsubexpr();
				4056	save_se(&save, &reg_startzpos[no], &reg_startzp[no]);
				4057
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4058	if (regmatch(next, startp))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4059	return TRUE;
				4060
				4061	restore_se(&save, &reg_startzpos[no], &reg_startzp[no]);
				4062	return FALSE;
				4063	}
				4064	/* break; Not Reached */
				4065	#endif
				4066
				4067	case MCLOSE + 0: /* Match end: \ze */
				4068	case MCLOSE + 1: /* \) */
				4069	case MCLOSE + 2:
				4070	case MCLOSE + 3:
				4071	case MCLOSE + 4:
				4072	case MCLOSE + 5:
				4073	case MCLOSE + 6:
				4074	case MCLOSE + 7:
				4075	case MCLOSE + 8:
				4076	case MCLOSE + 9:
				4077	{
				4078	int no;
				4079	save_se_T save;
				4080
				4081	no = op - MCLOSE;
				4082	cleanup_subexpr();
				4083	save_se(&save, &reg_endpos[no], &reg_endp[no]);
				4084
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4085	if (regmatch(next, startp))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4086	return TRUE;
				4087
				4088	restore_se(&save, &reg_endpos[no], &reg_endp[no]);
				4089	return FALSE;
				4090	}
				4091	/* break; Not Reached */
				4092
				4093	#ifdef FEAT_SYN_HL
				4094	case ZCLOSE + 1: /* \) after \z( */
				4095	case ZCLOSE + 2:
				4096	case ZCLOSE + 3:
				4097	case ZCLOSE + 4:
				4098	case ZCLOSE + 5:
				4099	case ZCLOSE + 6:
				4100	case ZCLOSE + 7:
				4101	case ZCLOSE + 8:
				4102	case ZCLOSE + 9:
				4103	{
				4104	int no;
				4105	save_se_T save;
				4106
				4107	no = op - ZCLOSE;
				4108	cleanup_zsubexpr();
				4109	save_se(&save, &reg_endzpos[no], &reg_endzp[no]);
				4110
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4111	if (regmatch(next, startp))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4112	return TRUE;
				4113
				4114	restore_se(&save, &reg_endzpos[no], &reg_endzp[no]);
				4115	return FALSE;
				4116	}
				4117	/* break; Not Reached */
				4118	#endif
				4119
				4120	case BACKREF + 1:
				4121	case BACKREF + 2:
				4122	case BACKREF + 3:
				4123	case BACKREF + 4:
				4124	case BACKREF + 5:
				4125	case BACKREF + 6:
				4126	case BACKREF + 7:
				4127	case BACKREF + 8:
				4128	case BACKREF + 9:
				4129	{
				4130	int no;
				4131	int len;
				4132	linenr_T clnum;
				4133	colnr_T ccol;
				4134	char_u *p;
				4135
				4136	no = op - BACKREF;
				4137	cleanup_subexpr();
				4138	if (!REG_MULTI) /* Single-line regexp */
				4139	{
				4140	if (reg_endp[no] == NULL)
				4141	{
				4142	/* Backref was not set: Match an empty string. */
				4143	len = 0;
				4144	}
				4145	else
				4146	{
				4147	/* Compare current input with back-ref in the same
				4148	* line. */
				4149	len = (int)(reg_endp[no] - reg_startp[no]);
				4150	if (cstrncmp(reg_startp[no], reginput, &len) != 0)
				4151	return FALSE;
				4152	}
				4153	}
				4154	else /* Multi-line regexp */
				4155	{
				4156	if (reg_endpos[no].lnum < 0)
				4157	{
				4158	/* Backref was not set: Match an empty string. */
				4159	len = 0;
				4160	}
				4161	else
				4162	{
				4163	if (reg_startpos[no].lnum == reglnum
				4164	&& reg_endpos[no].lnum == reglnum)
				4165	{
				4166	/* Compare back-ref within the current line. */
				4167	len = reg_endpos[no].col - reg_startpos[no].col;
				4168	if (cstrncmp(regline + reg_startpos[no].col,
				4169	reginput, &len) != 0)
				4170	return FALSE;
				4171	}
				4172	else
				4173	{
				4174	/* Messy situation: Need to compare between two
				4175	* lines. */
				4176	ccol = reg_startpos[no].col;
				4177	clnum = reg_startpos[no].lnum;
				4178	for (;;)
				4179	{
				4180	/* Since getting one line may invalidate
				4181	* the other, need to make copy. Slow! */
				4182	if (regline != reg_tofree)
				4183	{
				4184	len = (int)STRLEN(regline);
				4185	if (reg_tofree == NULL
				4186	\|\| len >= (int)reg_tofreelen)
				4187	{
				4188	len += 50; /* get some extra */
				4189	vim_free(reg_tofree);
				4190	reg_tofree = alloc(len);
				4191	if (reg_tofree == NULL)
				4192	return FALSE; /* out of memory! */
				4193	reg_tofreelen = len;
				4194	}
				4195	STRCPY(reg_tofree, regline);
				4196	reginput = reg_tofree
				4197	+ (reginput - regline);
				4198	regline = reg_tofree;
				4199	}
				4200
				4201	/* Get the line to compare with. */
				4202	p = reg_getline(clnum);
				4203	if (clnum == reg_endpos[no].lnum)
				4204	len = reg_endpos[no].col - ccol;
				4205	else
				4206	len = (int)STRLEN(p + ccol);
				4207
				4208	if (cstrncmp(p + ccol, reginput, &len) != 0)
				4209	return FALSE; /* doesn't match */
				4210	if (clnum == reg_endpos[no].lnum)
				4211	break; /* match and at end! */
				4212	if (reglnum == reg_maxline)
				4213	return FALSE; /* text too short */
				4214
				4215	/* Advance to next line. */
				4216	reg_nextline();
				4217	++clnum;
				4218	ccol = 0;
				4219	if (got_int \|\| out_of_stack)
				4220	return FALSE;
				4221	}
				4222
				4223	/* found a match! Note that regline may now point
				4224	* to a copy of the line, that should not matter. */
				4225	}
				4226	}
				4227	}
				4228
				4229	/* Matched the backref, skip over it. */
				4230	reginput += len;
				4231	}
				4232	break;
				4233
				4234	#ifdef FEAT_SYN_HL
				4235	case ZREF + 1:
				4236	case ZREF + 2:
				4237	case ZREF + 3:
				4238	case ZREF + 4:
				4239	case ZREF + 5:
				4240	case ZREF + 6:
				4241	case ZREF + 7:
				4242	case ZREF + 8:
				4243	case ZREF + 9:
				4244	{
				4245	int no;
				4246	int len;
				4247
				4248	cleanup_zsubexpr();
				4249	no = op - ZREF;
				4250	if (re_extmatch_in != NULL
				4251	&& re_extmatch_in->matches[no] != NULL)
				4252	{
				4253	len = (int)STRLEN(re_extmatch_in->matches[no]);
				4254	if (cstrncmp(re_extmatch_in->matches[no],
				4255	reginput, &len) != 0)
				4256	return FALSE;
				4257	reginput += len;
				4258	}
				4259	else
				4260	{
				4261	/* Backref was not set: Match an empty string. */
				4262	}
				4263	}
				4264	break;
				4265	#endif
				4266
				4267	case BRANCH:
				4268	{
				4269	if (OP(next) != BRANCH) /* No choice. */
				4270	next = OPERAND(scan); /* Avoid recursion. */
				4271	else
				4272	{
				4273	regsave_T save;
				4274
				4275	do
				4276	{
				4277	reg_save(&save);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4278	if (regmatch(OPERAND(scan), &save))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4279	return TRUE;
				4280	reg_restore(&save);
				4281	scan = regnext(scan);
				4282	} while (scan != NULL && OP(scan) == BRANCH);
				4283	return FALSE;
				4284	/* NOTREACHED */
				4285	}
				4286	}
				4287	break;
				4288
				4289	case BRACE_LIMITS:
				4290	{
				4291	int no;
				4292
				4293	if (OP(next) == BRACE_SIMPLE)
				4294	{
				4295	bl_minval = OPERAND_MIN(scan);
				4296	bl_maxval = OPERAND_MAX(scan);
				4297	}
				4298	else if (OP(next) >= BRACE_COMPLEX
				4299	&& OP(next) < BRACE_COMPLEX + 10)
				4300	{
				4301	no = OP(next) - BRACE_COMPLEX;
				4302	brace_min[no] = OPERAND_MIN(scan);
				4303	brace_max[no] = OPERAND_MAX(scan);
				4304	brace_count[no] = 0;
				4305	}
				4306	else
				4307	{
				4308	EMSG(_(e_internal)); /* Shouldn't happen */
				4309	return FALSE;
				4310	}
				4311	}
				4312	break;
				4313
				4314	case BRACE_COMPLEX + 0:
				4315	case BRACE_COMPLEX + 1:
				4316	case BRACE_COMPLEX + 2:
				4317	case BRACE_COMPLEX + 3:
				4318	case BRACE_COMPLEX + 4:
				4319	case BRACE_COMPLEX + 5:
				4320	case BRACE_COMPLEX + 6:
				4321	case BRACE_COMPLEX + 7:
				4322	case BRACE_COMPLEX + 8:
				4323	case BRACE_COMPLEX + 9:
				4324	{
				4325	int no;
				4326	regsave_T save;
				4327
				4328	no = op - BRACE_COMPLEX;
				4329	++brace_count[no];
				4330
				4331	/* If not matched enough times yet, try one more */
				4332	if (brace_count[no] <= (brace_min[no] <= brace_max[no]
				4333	? brace_min[no] : brace_max[no]))
				4334	{
				4335	reg_save(&save);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4336	if (regmatch(OPERAND(scan), &save))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4337	return TRUE;
				4338	reg_restore(&save);
				4339	--brace_count[no]; /* failed, decrement match count */
				4340	return FALSE;
				4341	}
				4342
				4343	/* If matched enough times, may try matching some more */
				4344	if (brace_min[no] <= brace_max[no])
				4345	{
				4346	/* Range is the normal way around, use longest match */
				4347	if (brace_count[no] <= brace_max[no])
				4348	{
				4349	reg_save(&save);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4350	if (regmatch(OPERAND(scan), &save))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4351	return TRUE; /* matched some more times */
				4352	reg_restore(&save);
				4353	--brace_count[no]; /* matched just enough times */
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4354	/* { continue with the items after \{} */
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4355	}
				4356	}
				4357	else
				4358	{
				4359	/* Range is backwards, use shortest match first */
				4360	if (brace_count[no] <= brace_min[no])
				4361	{
				4362	reg_save(&save);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4363	if (regmatch(next, &save))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4364	return TRUE;
				4365	reg_restore(&save);
				4366	next = OPERAND(scan);
				4367	/* must try to match one more item */
				4368	}
				4369	}
				4370	}
				4371	break;
				4372
				4373	case BRACE_SIMPLE:
				4374	case STAR:
				4375	case PLUS:
				4376	{
				4377	int nextb; /* next byte */
				4378	int nextb_ic; /* next byte reverse case */
				4379	long count;
				4380	regsave_T save;
				4381	long minval;
				4382	long maxval;
				4383
				4384	/*
				4385	* Lookahead to avoid useless match attempts when we know
				4386	* what character comes next.
				4387	*/
				4388	if (OP(next) == EXACTLY)
				4389	{
				4390	nextb = *OPERAND(next);
				4391	if (ireg_ic)
				4392	{
				4393	if (isupper(nextb))
				4394	nextb_ic = TOLOWER_LOC(nextb);
				4395	else
				4396	nextb_ic = TOUPPER_LOC(nextb);
				4397	}
				4398	else
				4399	nextb_ic = nextb;
				4400	}
				4401	else
				4402	{
				4403	nextb = NUL;
				4404	nextb_ic = NUL;
				4405	}
				4406	if (op != BRACE_SIMPLE)
				4407	{
				4408	minval = (op == STAR) ? 0 : 1;
				4409	maxval = MAX_LIMIT;
				4410	}
				4411	else
				4412	{
				4413	minval = bl_minval;
				4414	maxval = bl_maxval;
				4415	}
				4416
				4417	/*
				4418	* When maxval > minval, try matching as much as possible, up
				4419	* to maxval. When maxval < minval, try matching at least the
				4420	* minimal number (since the range is backwards, that's also
				4421	* maxval!).
				4422	*/
				4423	count = regrepeat(OPERAND(scan), maxval);
				4424	if (got_int)
				4425	return FALSE;
				4426	if (minval <= maxval)
				4427	{
				4428	/* Range is the normal way around, use longest match */
				4429	while (count >= minval)
				4430	{
				4431	/* If it could match, try it. */
				4432	if (nextb == NUL \|\| *reginput == nextb
				4433	\|\| *reginput == nextb_ic)
				4434	{
				4435	reg_save(&save);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4436	if (regmatch(next, startp))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4437	return TRUE;
				4438	reg_restore(&save);
				4439	}
				4440	/* Couldn't or didn't match -- back up one char. */
				4441	if (--count < minval)
				4442	break;
				4443	if (reginput == regline)
				4444	{
				4445	/* backup to last char of previous line */
				4446	--reglnum;
				4447	regline = reg_getline(reglnum);
				4448	/* Just in case regrepeat() didn't count right. */
				4449	if (regline == NULL)
				4450	return FALSE;
				4451	reginput = regline + STRLEN(regline);
				4452	fast_breakcheck();
				4453	if (got_int \|\| out_of_stack)
				4454	return FALSE;
				4455	}
				4456	else
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4457	mb_ptr_back(regline, reginput);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4458	}
				4459	}
				4460	else
				4461	{
				4462	/* Range is backwards, use shortest match first.
				4463	* Careful: maxval and minval are exchanged! */
				4464	if (count < maxval)
				4465	return FALSE;
				4466	for (;;)
				4467	{
				4468	/* If it could work, try it. */
				4469	if (nextb == NUL \|\| *reginput == nextb
				4470	\|\| *reginput == nextb_ic)
				4471	{
				4472	reg_save(&save);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4473	if (regmatch(next, &save))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4474	return TRUE;
				4475	reg_restore(&save);
				4476	}
				4477	/* Couldn't or didn't match: try advancing one char. */
				4478	if (count == minval
				4479	\|\| regrepeat(OPERAND(scan), 1L) == 0)
				4480	break;
				4481	++count;
				4482	if (got_int \|\| out_of_stack)
				4483	return FALSE;
				4484	}
				4485	}
				4486	return FALSE;
				4487	}
				4488	/* break; Not Reached */
				4489
				4490	case NOMATCH:
				4491	{
				4492	regsave_T save;
				4493
				4494	/* If the operand matches, we fail. Otherwise backup and
				4495	* continue with the next item. */
				4496	reg_save(&save);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4497	if (regmatch(OPERAND(scan), startp))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4498	return FALSE;
				4499	reg_restore(&save);
				4500	}
				4501	break;
				4502
				4503	case MATCH:
				4504	case SUBPAT:
				4505	{
				4506	regsave_T save;
				4507
				4508	/* If the operand doesn't match, we fail. Otherwise backup
				4509	* and continue with the next item. */
				4510	reg_save(&save);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4511	if (!regmatch(OPERAND(scan), startp))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4512	return FALSE;
				4513	if (op == MATCH) /* zero-width */
				4514	reg_restore(&save);
				4515	}
				4516	break;
				4517
				4518	case BEHIND:
				4519	case NOBEHIND:
				4520	{
				4521	regsave_T save_after, save_start;
				4522	regsave_T save_behind_pos;
				4523	int needmatch = (op == BEHIND);
				4524
				4525	/*
				4526	* Look back in the input of the operand matches or not. This
				4527	* must be done at every position in the input and checking if
				4528	* the match ends at the current position.
				4529	* First check if the next item matches, that's probably
				4530	* faster.
				4531	*/
				4532	reg_save(&save_start);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4533	if (regmatch(next, startp))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4534	{
				4535	/* save the position after the found match for next */
				4536	reg_save(&save_after);
				4537
				4538	/* start looking for a match with operand at the current
				4539	* postion. Go back one character until we find the
				4540	* result, hitting the start of the line or the previous
				4541	* line (for multi-line matching).
				4542	* Set behind_pos to where the match should end, BHPOS
				4543	* will match it. */
				4544	save_behind_pos = behind_pos;
				4545	behind_pos = save_start;
				4546	for (;;)
				4547	{
				4548	reg_restore(&save_start);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4549	if (regmatch(OPERAND(scan), startp)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4550	&& reg_save_equal(&behind_pos))
				4551	{
				4552	behind_pos = save_behind_pos;
				4553	/* found a match that ends where "next" started */
				4554	if (needmatch)
				4555	{
				4556	reg_restore(&save_after);
				4557	return TRUE;
				4558	}
				4559	return FALSE;
				4560	}
				4561	/*
				4562	* No match: Go back one character. May go to
				4563	* previous line once.
				4564	*/
				4565	if (REG_MULTI)
				4566	{
				4567	if (save_start.rs_u.pos.col == 0)
				4568	{
				4569	if (save_start.rs_u.pos.lnum
				4570	< behind_pos.rs_u.pos.lnum
				4571	\|\| reg_getline(
				4572	--save_start.rs_u.pos.lnum) == NULL)
				4573	break;
				4574	reg_restore(&save_start);
				4575	save_start.rs_u.pos.col =
				4576	(colnr_T)STRLEN(regline);
				4577	}
				4578	else
				4579	--save_start.rs_u.pos.col;
				4580	}
				4581	else
				4582	{
				4583	if (save_start.rs_u.ptr == regline)
				4584	break;
				4585	--save_start.rs_u.ptr;
				4586	}
				4587	}
				4588
				4589	/* NOBEHIND succeeds when no match was found */
				4590	behind_pos = save_behind_pos;
				4591	if (!needmatch)
				4592	{
				4593	reg_restore(&save_after);
				4594	return TRUE;
				4595	}
				4596	}
				4597	return FALSE;
				4598	}
				4599
				4600	case BHPOS:
				4601	if (REG_MULTI)
				4602	{
				4603	if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline)
				4604	\|\| behind_pos.rs_u.pos.lnum != reglnum)
				4605	return FALSE;
				4606	}
				4607	else if (behind_pos.rs_u.ptr != reginput)
				4608	return FALSE;
				4609	break;
				4610
				4611	case NEWL:
				4612	if ((c != NUL \|\| reglnum == reg_maxline)
				4613	&& (c != '\n' \|\| !reg_line_lbr))
				4614	return FALSE;
				4615	if (reg_line_lbr)
				4616	ADVANCE_REGINPUT();
				4617	else
				4618	reg_nextline();
				4619	break;
				4620
				4621	case END:
				4622	return TRUE; /* Success! */
				4623
				4624	default:
				4625	EMSG(_(e_re_corr));
				4626	#ifdef DEBUG
				4627	printf("Illegal op code %d\n", op);
				4628	#endif
				4629	return FALSE;
				4630	}
				4631	}
				4632
				4633	scan = next;
				4634	}
				4635
				4636	/*
				4637	* We get here only if there's trouble -- normally "case END" is the
				4638	* terminating point.
				4639	*/
				4640	EMSG(_(e_re_corr));
				4641	#ifdef DEBUG
				4642	printf("Premature EOL\n");
				4643	#endif
				4644	return FALSE;
				4645	}
				4646
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4647	/*
				4648	* regrepeat - repeatedly match something simple, return how many.
				4649	* Advances reginput (and reglnum) to just after the matched chars.
				4650	*/
				4651	static int
				4652	regrepeat(p, maxcount)
				4653	char_u *p;
				4654	long maxcount; /* maximum number of matches allowed */
				4655	{
				4656	long count = 0;
				4657	char_u *scan;
				4658	char_u *opnd;
				4659	int mask;
				4660	int testval = 0;
				4661
				4662	scan = reginput; /* Make local copy of reginput for speed. */
				4663	opnd = OPERAND(p);
				4664	switch (OP(p))
				4665	{
				4666	case ANY:
				4667	case ANY + ADD_NL:
				4668	while (count < maxcount)
				4669	{
				4670	/* Matching anything means we continue until end-of-line (or
				4671	* end-of-file for ANY + ADD_NL), only limited by maxcount. */
				4672	while (*scan != NUL && count < maxcount)
				4673	{
				4674	++count;
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4675	mb_ptr_adv(scan);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4676	}
				4677	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline \|\| count == maxcount)
				4678	break;
				4679	++count; /* count the line-break */
				4680	reg_nextline();
				4681	scan = reginput;
				4682	if (got_int)
				4683	break;
				4684	}
				4685	break;
				4686
				4687	case IDENT:
				4688	case IDENT + ADD_NL:
				4689	testval = TRUE;
				4690	/FALLTHROUGH/
				4691	case SIDENT:
				4692	case SIDENT + ADD_NL:
				4693	while (count < maxcount)
				4694	{
				4695	if (vim_isIDc(scan) && (testval \|\| !VIM_ISDIGIT(scan)))
				4696	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4697	mb_ptr_adv(scan);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4698	}
				4699	else if (*scan == NUL)
				4700	{
				4701	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4702	break;
				4703	reg_nextline();
				4704	scan = reginput;
				4705	if (got_int)
				4706	break;
				4707	}
				4708	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4709	++scan;
				4710	else
				4711	break;
				4712	++count;
				4713	}
				4714	break;
				4715
				4716	case KWORD:
				4717	case KWORD + ADD_NL:
				4718	testval = TRUE;
				4719	/FALLTHROUGH/
				4720	case SKWORD:
				4721	case SKWORD + ADD_NL:
				4722	while (count < maxcount)
				4723	{
				4724	if (vim_iswordp(scan) && (testval \|\| !VIM_ISDIGIT(*scan)))
				4725	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4726	mb_ptr_adv(scan);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4727	}
				4728	else if (*scan == NUL)
				4729	{
				4730	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4731	break;
				4732	reg_nextline();
				4733	scan = reginput;
				4734	if (got_int)
				4735	break;
				4736	}
				4737	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4738	++scan;
				4739	else
				4740	break;
				4741	++count;
				4742	}
				4743	break;
				4744
				4745	case FNAME:
				4746	case FNAME + ADD_NL:
				4747	testval = TRUE;
				4748	/FALLTHROUGH/
				4749	case SFNAME:
				4750	case SFNAME + ADD_NL:
				4751	while (count < maxcount)
				4752	{
				4753	if (vim_isfilec(scan) && (testval \|\| !VIM_ISDIGIT(scan)))
				4754	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4755	mb_ptr_adv(scan);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4756	}
				4757	else if (*scan == NUL)
				4758	{
				4759	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4760	break;
				4761	reg_nextline();
				4762	scan = reginput;
				4763	if (got_int)
				4764	break;
				4765	}
				4766	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4767	++scan;
				4768	else
				4769	break;
				4770	++count;
				4771	}
				4772	break;
				4773
				4774	case PRINT:
				4775	case PRINT + ADD_NL:
				4776	testval = TRUE;
				4777	/FALLTHROUGH/
				4778	case SPRINT:
				4779	case SPRINT + ADD_NL:
				4780	while (count < maxcount)
				4781	{
				4782	if (*scan == NUL)
				4783	{
				4784	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4785	break;
				4786	reg_nextline();
				4787	scan = reginput;
				4788	if (got_int)
				4789	break;
				4790	}
				4791	else if (ptr2cells(scan) == 1 && (testval \|\| !VIM_ISDIGIT(*scan)))
				4792	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4793	mb_ptr_adv(scan);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4794	}
				4795	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4796	++scan;
				4797	else
				4798	break;
				4799	++count;
				4800	}
				4801	break;
				4802
				4803	case WHITE:
				4804	case WHITE + ADD_NL:
				4805	testval = mask = RI_WHITE;
				4806	do_class:
				4807	while (count < maxcount)
				4808	{
				4809	#ifdef FEAT_MBYTE
				4810	int l;
				4811	#endif
				4812	if (*scan == NUL)
				4813	{
				4814	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4815	break;
				4816	reg_nextline();
				4817	scan = reginput;
				4818	if (got_int)
				4819	break;
				4820	}
				4821	#ifdef FEAT_MBYTE
				4822	else if (has_mbyte && (l = (*mb_ptr2len_check)(scan)) > 1)
				4823	{
				4824	if (testval != 0)
				4825	break;
				4826	scan += l;
				4827	}
				4828	#endif
				4829	else if ((class_tab[*scan] & mask) == testval)
				4830	++scan;
				4831	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4832	++scan;
				4833	else
				4834	break;
				4835	++count;
				4836	}
				4837	break;
				4838
				4839	case NWHITE:
				4840	case NWHITE + ADD_NL:
				4841	mask = RI_WHITE;
				4842	goto do_class;
				4843	case DIGIT:
				4844	case DIGIT + ADD_NL:
				4845	testval = mask = RI_DIGIT;
				4846	goto do_class;
				4847	case NDIGIT:
				4848	case NDIGIT + ADD_NL:
				4849	mask = RI_DIGIT;
				4850	goto do_class;
				4851	case HEX:
				4852	case HEX + ADD_NL:
				4853	testval = mask = RI_HEX;
				4854	goto do_class;
				4855	case NHEX:
				4856	case NHEX + ADD_NL:
				4857	mask = RI_HEX;
				4858	goto do_class;
				4859	case OCTAL:
				4860	case OCTAL + ADD_NL:
				4861	testval = mask = RI_OCTAL;
				4862	goto do_class;
				4863	case NOCTAL:
				4864	case NOCTAL + ADD_NL:
				4865	mask = RI_OCTAL;
				4866	goto do_class;
				4867	case WORD:
				4868	case WORD + ADD_NL:
				4869	testval = mask = RI_WORD;
				4870	goto do_class;
				4871	case NWORD:
				4872	case NWORD + ADD_NL:
				4873	mask = RI_WORD;
				4874	goto do_class;
				4875	case HEAD:
				4876	case HEAD + ADD_NL:
				4877	testval = mask = RI_HEAD;
				4878	goto do_class;
				4879	case NHEAD:
				4880	case NHEAD + ADD_NL:
				4881	mask = RI_HEAD;
				4882	goto do_class;
				4883	case ALPHA:
				4884	case ALPHA + ADD_NL:
				4885	testval = mask = RI_ALPHA;
				4886	goto do_class;
				4887	case NALPHA:
				4888	case NALPHA + ADD_NL:
				4889	mask = RI_ALPHA;
				4890	goto do_class;
				4891	case LOWER:
				4892	case LOWER + ADD_NL:
				4893	testval = mask = RI_LOWER;
				4894	goto do_class;
				4895	case NLOWER:
				4896	case NLOWER + ADD_NL:
				4897	mask = RI_LOWER;
				4898	goto do_class;
				4899	case UPPER:
				4900	case UPPER + ADD_NL:
				4901	testval = mask = RI_UPPER;
				4902	goto do_class;
				4903	case NUPPER:
				4904	case NUPPER + ADD_NL:
				4905	mask = RI_UPPER;
				4906	goto do_class;
				4907
				4908	case EXACTLY:
				4909	{
				4910	int cu, cl;
				4911
				4912	/* This doesn't do a multi-byte character, because a MULTIBYTECODE
				4913	* would have been used for it. */
				4914	if (ireg_ic)
				4915	{
				4916	cu = TOUPPER_LOC(*opnd);
				4917	cl = TOLOWER_LOC(*opnd);
				4918	while (count < maxcount && (scan == cu \|\| scan == cl))
				4919	{
				4920	count++;
				4921	scan++;
				4922	}
				4923	}
				4924	else
				4925	{
				4926	cu = *opnd;
				4927	while (count < maxcount && *scan == cu)
				4928	{
				4929	count++;
				4930	scan++;
				4931	}
				4932	}
				4933	break;
				4934	}
				4935
				4936	#ifdef FEAT_MBYTE
				4937	case MULTIBYTECODE:
				4938	{
				4939	int i, len, cf = 0;
				4940
				4941	/* Safety check (just in case 'encoding' was changed since
				4942	* compiling the program). */
				4943	if ((len = (*mb_ptr2len_check)(opnd)) > 1)
				4944	{
				4945	if (ireg_ic && enc_utf8)
				4946	cf = utf_fold(utf_ptr2char(opnd));
				4947	while (count < maxcount)
				4948	{
				4949	for (i = 0; i < len; ++i)
				4950	if (opnd[i] != scan[i])
				4951	break;
				4952	if (i < len && (!ireg_ic \|\| !enc_utf8
				4953	\|\| utf_fold(utf_ptr2char(scan)) != cf))
				4954	break;
				4955	scan += len;
				4956	++count;
				4957	}
				4958	}
				4959	}
				4960	break;
				4961	#endif
				4962
				4963	case ANYOF:
				4964	case ANYOF + ADD_NL:
				4965	testval = TRUE;
				4966	/FALLTHROUGH/
				4967
				4968	case ANYBUT:
				4969	case ANYBUT + ADD_NL:
				4970	while (count < maxcount)
				4971	{
				4972	#ifdef FEAT_MBYTE
				4973	int len;
				4974	#endif
				4975	if (*scan == NUL)
				4976	{
				4977	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4978	break;
				4979	reg_nextline();
				4980	scan = reginput;
				4981	if (got_int)
				4982	break;
				4983	}
				4984	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4985	++scan;
				4986	#ifdef FEAT_MBYTE
				4987	else if (has_mbyte && (len = (*mb_ptr2len_check)(scan)) > 1)
				4988	{
				4989	if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
				4990	break;
				4991	scan += len;
				4992	}
				4993	#endif
				4994	else
				4995	{
				4996	if ((cstrchr(opnd, *scan) == NULL) == testval)
				4997	break;
				4998	++scan;
				4999	}
				5000	++count;
				5001	}
				5002	break;
				5003
				5004	case NEWL:
				5005	while (count < maxcount
				5006	&& ((*scan == NUL && reglnum < reg_maxline)
				5007	\|\| (*scan == '\n' && reg_line_lbr)))
				5008	{
				5009	count++;
				5010	if (reg_line_lbr)
				5011	ADVANCE_REGINPUT();
				5012	else
				5013	reg_nextline();
				5014	scan = reginput;
				5015	if (got_int)
				5016	break;
				5017	}
				5018	break;
				5019
				5020	default: /* Oh dear. Called inappropriately. */
				5021	EMSG(_(e_re_corr));
				5022	#ifdef DEBUG
				5023	printf("Called regrepeat with op code %d\n", OP(p));
				5024	#endif
				5025	break;
				5026	}
				5027
				5028	reginput = scan;
				5029
				5030	return (int)count;
				5031	}
				5032
				5033	/*
				5034	* regnext - dig the "next" pointer out of a node
				5035	*/
				5036	static char_u *
				5037	regnext(p)
				5038	char_u *p;
				5039	{
				5040	int offset;
				5041
				5042	if (p == JUST_CALC_SIZE)
				5043	return NULL;
				5044
				5045	offset = NEXT(p);
				5046	if (offset == 0)
				5047	return NULL;
				5048
				5049	if (OP(p) == BACK)
				5050	return p - offset;
				5051	else
				5052	return p + offset;
				5053	}
				5054
				5055	/*
				5056	* Check the regexp program for its magic number.
				5057	* Return TRUE if it's wrong.
				5058	*/
				5059	static int
				5060	prog_magic_wrong()
				5061	{
				5062	if (UCHARAT(REG_MULTI
				5063	? reg_mmatch->regprog->program
				5064	: reg_match->regprog->program) != REGMAGIC)
				5065	{
				5066	EMSG(_(e_re_corr));
				5067	return TRUE;
				5068	}
				5069	return FALSE;
				5070	}
				5071
				5072	/*
				5073	* Cleanup the subexpressions, if this wasn't done yet.
				5074	* This construction is used to clear the subexpressions only when they are
				5075	* used (to increase speed).
				5076	*/
				5077	static void
				5078	cleanup_subexpr()
				5079	{
				5080	if (need_clear_subexpr)
				5081	{
				5082	if (REG_MULTI)
				5083	{
				5084	/* Use 0xff to set lnum to -1 */
				5085	vim_memset(reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				5086	vim_memset(reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				5087	}
				5088	else
				5089	{
				5090	vim_memset(reg_startp, 0, sizeof(char_u ) NSUBEXP);
				5091	vim_memset(reg_endp, 0, sizeof(char_u ) NSUBEXP);
				5092	}
				5093	need_clear_subexpr = FALSE;
				5094	}
				5095	}
				5096
				5097	#ifdef FEAT_SYN_HL
				5098	static void
				5099	cleanup_zsubexpr()
				5100	{
				5101	if (need_clear_zsubexpr)
				5102	{
				5103	if (REG_MULTI)
				5104	{
				5105	/* Use 0xff to set lnum to -1 */
				5106	vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				5107	vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				5108	}
				5109	else
				5110	{
				5111	vim_memset(reg_startzp, 0, sizeof(char_u ) NSUBEXP);
				5112	vim_memset(reg_endzp, 0, sizeof(char_u ) NSUBEXP);
				5113	}
				5114	need_clear_zsubexpr = FALSE;
				5115	}
				5116	}
				5117	#endif
				5118
				5119	/*
				5120	* Advance reglnum, regline and reginput to the next line.
				5121	*/
				5122	static void
				5123	reg_nextline()
				5124	{
				5125	regline = reg_getline(++reglnum);
				5126	reginput = regline;
				5127	fast_breakcheck();
				5128	}
				5129
				5130	/*
				5131	* Save the input line and position in a regsave_T.
				5132	*/
				5133	static void
				5134	reg_save(save)
				5135	regsave_T *save;
				5136	{
				5137	if (REG_MULTI)
				5138	{
				5139	save->rs_u.pos.col = (colnr_T)(reginput - regline);
				5140	save->rs_u.pos.lnum = reglnum;
				5141	}
				5142	else
				5143	save->rs_u.ptr = reginput;
				5144	}
				5145
				5146	/*
				5147	* Restore the input line and position from a regsave_T.
				5148	*/
				5149	static void
				5150	reg_restore(save)
				5151	regsave_T *save;
				5152	{
				5153	if (REG_MULTI)
				5154	{
				5155	if (reglnum != save->rs_u.pos.lnum)
				5156	{
				5157	/* only call reg_getline() when the line number changed to save
				5158	* a bit of time */
				5159	reglnum = save->rs_u.pos.lnum;
				5160	regline = reg_getline(reglnum);
				5161	}
				5162	reginput = regline + save->rs_u.pos.col;
				5163	}
				5164	else
				5165	reginput = save->rs_u.ptr;
				5166	}
				5167
				5168	/*
				5169	* Return TRUE if current position is equal to saved position.
				5170	*/
				5171	static int
				5172	reg_save_equal(save)
				5173	regsave_T *save;
				5174	{
				5175	if (REG_MULTI)
				5176	return reglnum == save->rs_u.pos.lnum
				5177	&& reginput == regline + save->rs_u.pos.col;
				5178	return reginput == save->rs_u.ptr;
				5179	}
				5180
				5181	/*
				5182	* Tentatively set the sub-expression start to the current position (after
				5183	* calling regmatch() they will have changed). Need to save the existing
				5184	* values for when there is no match.
				5185	* Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
				5186	* depending on REG_MULTI.
				5187	*/
				5188	static void
				5189	save_se_multi(savep, posp)
				5190	save_se_T *savep;
				5191	lpos_T *posp;
				5192	{
				5193	savep->se_u.pos = *posp;
				5194	posp->lnum = reglnum;
				5195	posp->col = (colnr_T)(reginput - regline);
				5196	}
				5197
				5198	static void
				5199	save_se_one(savep, pp)
				5200	save_se_T *savep;
				5201	char_u **pp;
				5202	{
				5203	savep->se_u.ptr = *pp;
				5204	*pp = reginput;
				5205	}
				5206
				5207	/*
				5208	* Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
				5209	*/
				5210	static int
				5211	re_num_cmp(val, scan)
				5212	long_u val;
				5213	char_u *scan;
				5214	{
				5215	long_u n = OPERAND_MIN(scan);
				5216
				5217	if (OPERAND_CMP(scan) == '>')
				5218	return val > n;
				5219	if (OPERAND_CMP(scan) == '<')
				5220	return val < n;
				5221	return val == n;
				5222	}
				5223
				5224
				5225	#ifdef DEBUG
				5226
				5227	/*
				5228	* regdump - dump a regexp onto stdout in vaguely comprehensible form
				5229	*/
				5230	static void
				5231	regdump(pattern, r)
				5232	char_u *pattern;
				5233	regprog_T *r;
				5234	{
				5235	char_u *s;
				5236	int op = EXACTLY; /* Arbitrary non-END op. */
				5237	char_u *next;
				5238	char_u *end = NULL;
				5239
				5240	printf("\r\nregcomp(%s):\r\n", pattern);
				5241
				5242	s = r->program + 1;
				5243	/*
				5244	* Loop until we find the END that isn't before a referred next (an END
				5245	* can also appear in a NOMATCH operand).
				5246	*/
				5247	while (op != END \|\| s <= end)
				5248	{
				5249	op = OP(s);
				5250	printf("%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
				5251	next = regnext(s);
				5252	if (next == NULL) /* Next ptr. */
				5253	printf("(0)");
				5254	else
				5255	printf("(%d)", (int)((s - r->program) + (next - s)));
				5256	if (end < next)
				5257	end = next;
				5258	if (op == BRACE_LIMITS)
				5259	{
				5260	/* Two short ints */
				5261	printf(" minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
				5262	s += 8;
				5263	}
				5264	s += 3;
				5265	if (op == ANYOF \|\| op == ANYOF + ADD_NL
				5266	\|\| op == ANYBUT \|\| op == ANYBUT + ADD_NL
				5267	\|\| op == EXACTLY)
				5268	{
				5269	/* Literal string, where present. */
				5270	while (*s != NUL)
				5271	printf("%c", *s++);
				5272	s++;
				5273	}
				5274	printf("\r\n");
				5275	}
				5276
				5277	/* Header fields of interest. */
				5278	if (r->regstart != NUL)
				5279	printf("start `%s' 0x%x; ", r->regstart < 256
				5280	? (char *)transchar(r->regstart)
				5281	: "multibyte", r->regstart);
				5282	if (r->reganch)
				5283	printf("anchored; ");
				5284	if (r->regmust != NULL)
				5285	printf("must have \"%s\"", r->regmust);
				5286	printf("\r\n");
				5287	}
				5288
				5289	/*
				5290	* regprop - printable representation of opcode
				5291	*/
				5292	static char_u *
				5293	regprop(op)
				5294	char_u *op;
				5295	{
				5296	char_u *p;
				5297	static char_u buf[50];
				5298
				5299	(void) strcpy(buf, ":");
				5300
				5301	switch (OP(op))
				5302	{
				5303	case BOL:
				5304	p = "BOL";
				5305	break;
				5306	case EOL:
				5307	p = "EOL";
				5308	break;
				5309	case RE_BOF:
				5310	p = "BOF";
				5311	break;
				5312	case RE_EOF:
				5313	p = "EOF";
				5314	break;
				5315	case CURSOR:
				5316	p = "CURSOR";
				5317	break;
				5318	case RE_LNUM:
				5319	p = "RE_LNUM";
				5320	break;
				5321	case RE_COL:
				5322	p = "RE_COL";
				5323	break;
				5324	case RE_VCOL:
				5325	p = "RE_VCOL";
				5326	break;
				5327	case BOW:
				5328	p = "BOW";
				5329	break;
				5330	case EOW:
				5331	p = "EOW";
				5332	break;
				5333	case ANY:
				5334	p = "ANY";
				5335	break;
				5336	case ANY + ADD_NL:
				5337	p = "ANY+NL";
				5338	break;
				5339	case ANYOF:
				5340	p = "ANYOF";
				5341	break;
				5342	case ANYOF + ADD_NL:
				5343	p = "ANYOF+NL";
				5344	break;
				5345	case ANYBUT:
				5346	p = "ANYBUT";
				5347	break;
				5348	case ANYBUT + ADD_NL:
				5349	p = "ANYBUT+NL";
				5350	break;
				5351	case IDENT:
				5352	p = "IDENT";
				5353	break;
				5354	case IDENT + ADD_NL:
				5355	p = "IDENT+NL";
				5356	break;
				5357	case SIDENT:
				5358	p = "SIDENT";
				5359	break;
				5360	case SIDENT + ADD_NL:
				5361	p = "SIDENT+NL";
				5362	break;
				5363	case KWORD:
				5364	p = "KWORD";
				5365	break;
				5366	case KWORD + ADD_NL:
				5367	p = "KWORD+NL";
				5368	break;
				5369	case SKWORD:
				5370	p = "SKWORD";
				5371	break;
				5372	case SKWORD + ADD_NL:
				5373	p = "SKWORD+NL";
				5374	break;
				5375	case FNAME:
				5376	p = "FNAME";
				5377	break;
				5378	case FNAME + ADD_NL:
				5379	p = "FNAME+NL";
				5380	break;
				5381	case SFNAME:
				5382	p = "SFNAME";
				5383	break;
				5384	case SFNAME + ADD_NL:
				5385	p = "SFNAME+NL";
				5386	break;
				5387	case PRINT:
				5388	p = "PRINT";
				5389	break;
				5390	case PRINT + ADD_NL:
				5391	p = "PRINT+NL";
				5392	break;
				5393	case SPRINT:
				5394	p = "SPRINT";
				5395	break;
				5396	case SPRINT + ADD_NL:
				5397	p = "SPRINT+NL";
				5398	break;
				5399	case WHITE:
				5400	p = "WHITE";
				5401	break;
				5402	case WHITE + ADD_NL:
				5403	p = "WHITE+NL";
				5404	break;
				5405	case NWHITE:
				5406	p = "NWHITE";
				5407	break;
				5408	case NWHITE + ADD_NL:
				5409	p = "NWHITE+NL";
				5410	break;
				5411	case DIGIT:
				5412	p = "DIGIT";
				5413	break;
				5414	case DIGIT + ADD_NL:
				5415	p = "DIGIT+NL";
				5416	break;
				5417	case NDIGIT:
				5418	p = "NDIGIT";
				5419	break;
				5420	case NDIGIT + ADD_NL:
				5421	p = "NDIGIT+NL";
				5422	break;
				5423	case HEX:
				5424	p = "HEX";
				5425	break;
				5426	case HEX + ADD_NL:
				5427	p = "HEX+NL";
				5428	break;
				5429	case NHEX:
				5430	p = "NHEX";
				5431	break;
				5432	case NHEX + ADD_NL:
				5433	p = "NHEX+NL";
				5434	break;
				5435	case OCTAL:
				5436	p = "OCTAL";
				5437	break;
				5438	case OCTAL + ADD_NL:
				5439	p = "OCTAL+NL";
				5440	break;
				5441	case NOCTAL:
				5442	p = "NOCTAL";
				5443	break;
				5444	case NOCTAL + ADD_NL:
				5445	p = "NOCTAL+NL";
				5446	break;
				5447	case WORD:
				5448	p = "WORD";
				5449	break;
				5450	case WORD + ADD_NL:
				5451	p = "WORD+NL";
				5452	break;
				5453	case NWORD:
				5454	p = "NWORD";
				5455	break;
				5456	case NWORD + ADD_NL:
				5457	p = "NWORD+NL";
				5458	break;
				5459	case HEAD:
				5460	p = "HEAD";
				5461	break;
				5462	case HEAD + ADD_NL:
				5463	p = "HEAD+NL";
				5464	break;
				5465	case NHEAD:
				5466	p = "NHEAD";
				5467	break;
				5468	case NHEAD + ADD_NL:
				5469	p = "NHEAD+NL";
				5470	break;
				5471	case ALPHA:
				5472	p = "ALPHA";
				5473	break;
				5474	case ALPHA + ADD_NL:
				5475	p = "ALPHA+NL";
				5476	break;
				5477	case NALPHA:
				5478	p = "NALPHA";
				5479	break;
				5480	case NALPHA + ADD_NL:
				5481	p = "NALPHA+NL";
				5482	break;
				5483	case LOWER:
				5484	p = "LOWER";
				5485	break;
				5486	case LOWER + ADD_NL:
				5487	p = "LOWER+NL";
				5488	break;
				5489	case NLOWER:
				5490	p = "NLOWER";
				5491	break;
				5492	case NLOWER + ADD_NL:
				5493	p = "NLOWER+NL";
				5494	break;
				5495	case UPPER:
				5496	p = "UPPER";
				5497	break;
				5498	case UPPER + ADD_NL:
				5499	p = "UPPER+NL";
				5500	break;
				5501	case NUPPER:
				5502	p = "NUPPER";
				5503	break;
				5504	case NUPPER + ADD_NL:
				5505	p = "NUPPER+NL";
				5506	break;
				5507	case BRANCH:
				5508	p = "BRANCH";
				5509	break;
				5510	case EXACTLY:
				5511	p = "EXACTLY";
				5512	break;
				5513	case NOTHING:
				5514	p = "NOTHING";
				5515	break;
				5516	case BACK:
				5517	p = "BACK";
				5518	break;
				5519	case END:
				5520	p = "END";
				5521	break;
				5522	case MOPEN + 0:
				5523	p = "MATCH START";
				5524	break;
				5525	case MOPEN + 1:
				5526	case MOPEN + 2:
				5527	case MOPEN + 3:
				5528	case MOPEN + 4:
				5529	case MOPEN + 5:
				5530	case MOPEN + 6:
				5531	case MOPEN + 7:
				5532	case MOPEN + 8:
				5533	case MOPEN + 9:
				5534	sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
				5535	p = NULL;
				5536	break;
				5537	case MCLOSE + 0:
				5538	p = "MATCH END";
				5539	break;
				5540	case MCLOSE + 1:
				5541	case MCLOSE + 2:
				5542	case MCLOSE + 3:
				5543	case MCLOSE + 4:
				5544	case MCLOSE + 5:
				5545	case MCLOSE + 6:
				5546	case MCLOSE + 7:
				5547	case MCLOSE + 8:
				5548	case MCLOSE + 9:
				5549	sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
				5550	p = NULL;
				5551	break;
				5552	case BACKREF + 1:
				5553	case BACKREF + 2:
				5554	case BACKREF + 3:
				5555	case BACKREF + 4:
				5556	case BACKREF + 5:
				5557	case BACKREF + 6:
				5558	case BACKREF + 7:
				5559	case BACKREF + 8:
				5560	case BACKREF + 9:
				5561	sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
				5562	p = NULL;
				5563	break;
				5564	case NOPEN:
				5565	p = "NOPEN";
				5566	break;
				5567	case NCLOSE:
				5568	p = "NCLOSE";
				5569	break;
				5570	#ifdef FEAT_SYN_HL
				5571	case ZOPEN + 1:
				5572	case ZOPEN + 2:
				5573	case ZOPEN + 3:
				5574	case ZOPEN + 4:
				5575	case ZOPEN + 5:
				5576	case ZOPEN + 6:
				5577	case ZOPEN + 7:
				5578	case ZOPEN + 8:
				5579	case ZOPEN + 9:
				5580	sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
				5581	p = NULL;
				5582	break;
				5583	case ZCLOSE + 1:
				5584	case ZCLOSE + 2:
				5585	case ZCLOSE + 3:
				5586	case ZCLOSE + 4:
				5587	case ZCLOSE + 5:
				5588	case ZCLOSE + 6:
				5589	case ZCLOSE + 7:
				5590	case ZCLOSE + 8:
				5591	case ZCLOSE + 9:
				5592	sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
				5593	p = NULL;
				5594	break;
				5595	case ZREF + 1:
				5596	case ZREF + 2:
				5597	case ZREF + 3:
				5598	case ZREF + 4:
				5599	case ZREF + 5:
				5600	case ZREF + 6:
				5601	case ZREF + 7:
				5602	case ZREF + 8:
				5603	case ZREF + 9:
				5604	sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
				5605	p = NULL;
				5606	break;
				5607	#endif
				5608	case STAR:
				5609	p = "STAR";
				5610	break;
				5611	case PLUS:
				5612	p = "PLUS";
				5613	break;
				5614	case NOMATCH:
				5615	p = "NOMATCH";
				5616	break;
				5617	case MATCH:
				5618	p = "MATCH";
				5619	break;
				5620	case BEHIND:
				5621	p = "BEHIND";
				5622	break;
				5623	case NOBEHIND:
				5624	p = "NOBEHIND";
				5625	break;
				5626	case SUBPAT:
				5627	p = "SUBPAT";
				5628	break;
				5629	case BRACE_LIMITS:
				5630	p = "BRACE_LIMITS";
				5631	break;
				5632	case BRACE_SIMPLE:
				5633	p = "BRACE_SIMPLE";
				5634	break;
				5635	case BRACE_COMPLEX + 0:
				5636	case BRACE_COMPLEX + 1:
				5637	case BRACE_COMPLEX + 2:
				5638	case BRACE_COMPLEX + 3:
				5639	case BRACE_COMPLEX + 4:
				5640	case BRACE_COMPLEX + 5:
				5641	case BRACE_COMPLEX + 6:
				5642	case BRACE_COMPLEX + 7:
				5643	case BRACE_COMPLEX + 8:
				5644	case BRACE_COMPLEX + 9:
				5645	sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
				5646	p = NULL;
				5647	break;
				5648	#ifdef FEAT_MBYTE
				5649	case MULTIBYTECODE:
				5650	p = "MULTIBYTECODE";
				5651	break;
				5652	#endif
				5653	case NEWL:
				5654	p = "NEWL";
				5655	break;
				5656	default:
				5657	sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
				5658	p = NULL;
				5659	break;
				5660	}
				5661	if (p != NULL)
				5662	(void) strcat(buf, p);
				5663	return buf;
				5664	}
				5665	#endif
				5666
				5667	#ifdef FEAT_MBYTE
				5668	static void mb_decompose __ARGS((int c, int c1, int c2, int *c3));
				5669
				5670	typedef struct
				5671	{
				5672	int a, b, c;
				5673	} decomp_T;
				5674
				5675
				5676	/* 0xfb20 - 0xfb4f */
				5677	decomp_T decomp_table[0xfb4f-0xfb20+1] =
				5678	{
				5679	{0x5e2,0,0}, /* 0xfb20 alt ayin */
				5680	{0x5d0,0,0}, /* 0xfb21 alt alef */
				5681	{0x5d3,0,0}, /* 0xfb22 alt dalet */
				5682	{0x5d4,0,0}, /* 0xfb23 alt he */
				5683	{0x5db,0,0}, /* 0xfb24 alt kaf */
				5684	{0x5dc,0,0}, /* 0xfb25 alt lamed */
				5685	{0x5dd,0,0}, /* 0xfb26 alt mem-sofit */
				5686	{0x5e8,0,0}, /* 0xfb27 alt resh */
				5687	{0x5ea,0,0}, /* 0xfb28 alt tav */
				5688	{'+', 0, 0}, /* 0xfb29 alt plus */
				5689	{0x5e9, 0x5c1, 0}, /* 0xfb2a shin+shin-dot */
				5690	{0x5e9, 0x5c2, 0}, /* 0xfb2b shin+sin-dot */
				5691	{0x5e9, 0x5c1, 0x5bc}, /* 0xfb2c shin+shin-dot+dagesh */
				5692	{0x5e9, 0x5c2, 0x5bc}, /* 0xfb2d shin+sin-dot+dagesh */
				5693	{0x5d0, 0x5b7, 0}, /* 0xfb2e alef+patah */
				5694	{0x5d0, 0x5b8, 0}, /* 0xfb2f alef+qamats */
				5695	{0x5d0, 0x5b4, 0}, /* 0xfb30 alef+hiriq */
				5696	{0x5d1, 0x5bc, 0}, /* 0xfb31 bet+dagesh */
				5697	{0x5d2, 0x5bc, 0}, /* 0xfb32 gimel+dagesh */
				5698	{0x5d3, 0x5bc, 0}, /* 0xfb33 dalet+dagesh */
				5699	{0x5d4, 0x5bc, 0}, /* 0xfb34 he+dagesh */
				5700	{0x5d5, 0x5bc, 0}, /* 0xfb35 vav+dagesh */
				5701	{0x5d6, 0x5bc, 0}, /* 0xfb36 zayin+dagesh */
				5702	{0xfb37, 0, 0}, /* 0xfb37 -- UNUSED */
				5703	{0x5d8, 0x5bc, 0}, /* 0xfb38 tet+dagesh */
				5704	{0x5d9, 0x5bc, 0}, /* 0xfb39 yud+dagesh */
				5705	{0x5da, 0x5bc, 0}, /* 0xfb3a kaf sofit+dagesh */
				5706	{0x5db, 0x5bc, 0}, /* 0xfb3b kaf+dagesh */
				5707	{0x5dc, 0x5bc, 0}, /* 0xfb3c lamed+dagesh */
				5708	{0xfb3d, 0, 0}, /* 0xfb3d -- UNUSED */
				5709	{0x5de, 0x5bc, 0}, /* 0xfb3e mem+dagesh */
				5710	{0xfb3f, 0, 0}, /* 0xfb3f -- UNUSED */
				5711	{0x5e0, 0x5bc, 0}, /* 0xfb40 nun+dagesh */
				5712	{0x5e1, 0x5bc, 0}, /* 0xfb41 samech+dagesh */
				5713	{0xfb42, 0, 0}, /* 0xfb42 -- UNUSED */
				5714	{0x5e3, 0x5bc, 0}, /* 0xfb43 pe sofit+dagesh */
				5715	{0x5e4, 0x5bc,0}, /* 0xfb44 pe+dagesh */
				5716	{0xfb45, 0, 0}, /* 0xfb45 -- UNUSED */
				5717	{0x5e6, 0x5bc, 0}, /* 0xfb46 tsadi+dagesh */
				5718	{0x5e7, 0x5bc, 0}, /* 0xfb47 qof+dagesh */
				5719	{0x5e8, 0x5bc, 0}, /* 0xfb48 resh+dagesh */
				5720	{0x5e9, 0x5bc, 0}, /* 0xfb49 shin+dagesh */
				5721	{0x5ea, 0x5bc, 0}, /* 0xfb4a tav+dagesh */
				5722	{0x5d5, 0x5b9, 0}, /* 0xfb4b vav+holam */
				5723	{0x5d1, 0x5bf, 0}, /* 0xfb4c bet+rafe */
				5724	{0x5db, 0x5bf, 0}, /* 0xfb4d kaf+rafe */
				5725	{0x5e4, 0x5bf, 0}, /* 0xfb4e pe+rafe */
				5726	{0x5d0, 0x5dc, 0} /* 0xfb4f alef-lamed */
				5727	};
				5728
				5729	static void
				5730	mb_decompose(c, c1, c2, c3)
				5731	int c, c1, c2, *c3;
				5732	{
				5733	decomp_T d;
				5734
				5735	if (c >= 0x4b20 && c <= 0xfb4f)
				5736	{
				5737	d = decomp_table[c - 0xfb20];
				5738	*c1 = d.a;
				5739	*c2 = d.b;
				5740	*c3 = d.c;
				5741	}
				5742	else
				5743	{
				5744	*c1 = c;
				5745	c2 = c3 = 0;
				5746	}
				5747	}
				5748	#endif
				5749
				5750	/*
				5751	* Compare two strings, ignore case if ireg_ic set.
				5752	* Return 0 if strings match, non-zero otherwise.
				5753	* Correct the length "*n" when composing characters are ignored.
				5754	*/
				5755	static int
				5756	cstrncmp(s1, s2, n)
				5757	char_u s1, s2;
				5758	int *n;
				5759	{
				5760	int result;
				5761
				5762	if (!ireg_ic)
				5763	result = STRNCMP(s1, s2, *n);
				5764	else
				5765	result = MB_STRNICMP(s1, s2, *n);
				5766
				5767	#ifdef FEAT_MBYTE
				5768	/* if it failed and it's utf8 and we want to combineignore: */
				5769	if (result != 0 && enc_utf8 && ireg_icombine)
				5770	{
				5771	char_u str1, str2;
				5772	int c1, c2, c11, c12;
				5773	int ix;
				5774	int junk;
				5775
				5776	/* we have to handle the strcmp ourselves, since it is necessary to
				5777	* deal with the composing characters by ignoring them: */
				5778	str1 = s1;
				5779	str2 = s2;
				5780	c1 = c2 = 0;
				5781	for (ix = 0; ix < *n; )
				5782	{
				5783	c1 = mb_ptr2char_adv(&str1);
				5784	c2 = mb_ptr2char_adv(&str2);
				5785	ix += utf_char2len(c1);
				5786
				5787	/* decompose the character if necessary, into 'base' characters
				5788	* because I don't care about Arabic, I will hard-code the Hebrew
				5789	* which I do care about! So sue me... */
				5790	if (c1 != c2 && (!ireg_ic \|\| utf_fold(c1) != utf_fold(c2)))
				5791	{
				5792	/* decomposition necessary? */
				5793	mb_decompose(c1, &c11, &junk, &junk);
				5794	mb_decompose(c2, &c12, &junk, &junk);
				5795	c1 = c11;
				5796	c2 = c12;
				5797	if (c11 != c12 && (!ireg_ic \|\| utf_fold(c11) != utf_fold(c12)))
				5798	break;
				5799	}
				5800	}
				5801	result = c2 - c1;
				5802	if (result == 0)
				5803	*n = (int)(str2 - s2);
				5804	}
				5805	#endif
				5806
				5807	return result;
				5808	}
				5809
				5810	/*
				5811	* cstrchr: This function is used a lot for simple searches, keep it fast!
				5812	*/
				5813	static char_u *
				5814	cstrchr(s, c)
				5815	char_u *s;
				5816	int c;
				5817	{
				5818	char_u *p;
				5819	int cc;
				5820
				5821	if (!ireg_ic
				5822	#ifdef FEAT_MBYTE
				5823	\|\| (!enc_utf8 && mb_char2len(c) > 1)
				5824	#endif
				5825	)
				5826	return vim_strchr(s, c);
				5827
				5828	/* tolower() and toupper() can be slow, comparing twice should be a lot
				5829	* faster (esp. when using MS Visual C++!).
				5830	* For UTF-8 need to use folded case. */
				5831	#ifdef FEAT_MBYTE
				5832	if (enc_utf8 && c > 0x80)
				5833	cc = utf_fold(c);
				5834	else
				5835	#endif
				5836	if (isupper(c))
				5837	cc = TOLOWER_LOC(c);
				5838	else if (islower(c))
				5839	cc = TOUPPER_LOC(c);
				5840	else
				5841	return vim_strchr(s, c);
				5842
				5843	#ifdef FEAT_MBYTE
				5844	if (has_mbyte)
				5845	{
				5846	for (p = s; p != NUL; p += (mb_ptr2len_check)(p))
				5847	{
				5848	if (enc_utf8 && c > 0x80)
				5849	{
				5850	if (utf_fold(utf_ptr2char(p)) == cc)
				5851	return p;
				5852	}
				5853	else if (p == c \|\| p == cc)
				5854	return p;
				5855	}
				5856	}
				5857	else
				5858	#endif
				5859	/* Faster version for when there are no multi-byte characters. */
				5860	for (p = s; *p != NUL; ++p)
				5861	if (p == c \|\| p == cc)
				5862	return p;
				5863
				5864	return NULL;
				5865	}
				5866
				5867	/***************************************************************
				5868	* regsub stuff *
				5869	***************************************************************/
				5870
				5871	/* This stuff below really confuses cc on an SGI -- webb */
				5872	#ifdef __sgi
				5873	# undef __ARGS
				5874	# define __ARGS(x) ()
				5875	#endif
				5876
				5877	/*
				5878	* We should define ftpr as a pointer to a function returning a pointer to
				5879	* a function returning a pointer to a function ...
				5880	* This is impossible, so we declare a pointer to a function returning a
				5881	* pointer to a function returning void. This should work for all compilers.
				5882	*/
				5883	typedef void ((fptr) __ARGS((char_u *, int)))();
				5884
				5885	static fptr do_upper __ARGS((char_u *, int));
				5886	static fptr do_Upper __ARGS((char_u *, int));
				5887	static fptr do_lower __ARGS((char_u *, int));
				5888	static fptr do_Lower __ARGS((char_u *, int));
				5889
				5890	static int vim_regsub_both __ARGS((char_u source, char_u dest, int copy, int magic, int backslash));
				5891
				5892	static fptr
				5893	do_upper(d, c)
				5894	char_u *d;
				5895	int c;
				5896	{
				5897	*d = TOUPPER_LOC(c);
				5898
				5899	return (fptr)NULL;
				5900	}
				5901
				5902	static fptr
				5903	do_Upper(d, c)
				5904	char_u *d;
				5905	int c;
				5906	{
				5907	*d = TOUPPER_LOC(c);
				5908
				5909	return (fptr)do_Upper;
				5910	}
				5911
				5912	static fptr
				5913	do_lower(d, c)
				5914	char_u *d;
				5915	int c;
				5916	{
				5917	*d = TOLOWER_LOC(c);
				5918
				5919	return (fptr)NULL;
				5920	}
				5921
				5922	static fptr
				5923	do_Lower(d, c)
				5924	char_u *d;
				5925	int c;
				5926	{
				5927	*d = TOLOWER_LOC(c);
				5928
				5929	return (fptr)do_Lower;
				5930	}
				5931
				5932	/*
				5933	* regtilde(): Replace tildes in the pattern by the old pattern.
				5934	*
				5935	* Short explanation of the tilde: It stands for the previous replacement
				5936	* pattern. If that previous pattern also contains a ~ we should go back a
				5937	* step further... But we insert the previous pattern into the current one
				5938	* and remember that.
				5939	* This still does not handle the case where "magic" changes. TODO?
				5940	*
				5941	* The tildes are parsed once before the first call to vim_regsub().
				5942	*/
				5943	char_u *
				5944	regtilde(source, magic)
				5945	char_u *source;
				5946	int magic;
				5947	{
				5948	char_u *newsub = source;
				5949	char_u *tmpsub;
				5950	char_u *p;
				5951	int len;
				5952	int prevlen;
				5953
				5954	for (p = newsub; *p; ++p)
				5955	{
				5956	if ((p == '~' && magic) \|\| (p == '\\' && *(p + 1) == '~' && !magic))
				5957	{
				5958	if (reg_prev_sub != NULL)
				5959	{
				5960	/* length = len(newsub) - 1 + len(prev_sub) + 1 */
				5961	prevlen = (int)STRLEN(reg_prev_sub);
				5962	tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen));
				5963	if (tmpsub != NULL)
				5964	{
				5965	/* copy prefix */
				5966	len = (int)(p - newsub); /* not including ~ */
				5967	mch_memmove(tmpsub, newsub, (size_t)len);
				5968	/* interpretate tilde */
				5969	mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
				5970	/* copy postfix */
				5971	if (!magic)
				5972	++p; /* back off \ */
				5973	STRCPY(tmpsub + len + prevlen, p + 1);
				5974
				5975	if (newsub != source) /* already allocated newsub */
				5976	vim_free(newsub);
				5977	newsub = tmpsub;
				5978	p = newsub + len + prevlen;
				5979	}
				5980	}
				5981	else if (magic)
				5982	STRCPY(p, p + 1); /* remove '~' */
				5983	else
				5984	STRCPY(p, p + 2); /* remove '\~' */
				5985	--p;
				5986	}
				5987	else
				5988	{
				5989	if (p == '\\' && p[1]) / skip escaped characters */
				5990	++p;
				5991	#ifdef FEAT_MBYTE
				5992	if (has_mbyte)
				5993	p += (*mb_ptr2len_check)(p) - 1;
				5994	#endif
				5995	}
				5996	}
				5997
				5998	vim_free(reg_prev_sub);
				5999	if (newsub != source) /* newsub was allocated, just keep it */
				6000	reg_prev_sub = newsub;
				6001	else /* no ~ found, need to save newsub */
				6002	reg_prev_sub = vim_strsave(newsub);
				6003	return newsub;
				6004	}
				6005
				6006	#ifdef FEAT_EVAL
				6007	static int can_f_submatch = FALSE; /* TRUE when submatch() can be used */
				6008
				6009	/* These pointers are used instead of reg_match and reg_mmatch for
				6010	* reg_submatch(). Needed for when the substitution string is an expression
				6011	* that contains a call to substitute() and submatch(). */
				6012	static regmatch_T *submatch_match;
				6013	static regmmatch_T *submatch_mmatch;
				6014	#endif
				6015
				6016	#if defined(FEAT_MODIFY_FNAME) \|\| defined(FEAT_EVAL) \|\| defined(PROTO)
				6017	/*
				6018	* vim_regsub() - perform substitutions after a vim_regexec() or
				6019	* vim_regexec_multi() match.
				6020	*
				6021	* If "copy" is TRUE really copy into "dest".
				6022	* If "copy" is FALSE nothing is copied, this is just to find out the length
				6023	* of the result.
				6024	*
				6025	* If "backslash" is TRUE, a backslash will be removed later, need to double
				6026	* them to keep them, and insert a backslash before a CR to avoid it being
				6027	* replaced with a line break later.
				6028	*
				6029	* Note: The matched text must not change between the call of
				6030	* vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
				6031	* references invalid!
				6032	*
				6033	* Returns the size of the replacement, including terminating NUL.
				6034	*/
				6035	int
				6036	vim_regsub(rmp, source, dest, copy, magic, backslash)
				6037	regmatch_T *rmp;
				6038	char_u *source;
				6039	char_u *dest;
				6040	int copy;
				6041	int magic;
				6042	int backslash;
				6043	{
				6044	reg_match = rmp;
				6045	reg_mmatch = NULL;
				6046	reg_maxline = 0;
				6047	return vim_regsub_both(source, dest, copy, magic, backslash);
				6048	}
				6049	#endif
				6050
				6051	int
				6052	vim_regsub_multi(rmp, lnum, source, dest, copy, magic, backslash)
				6053	regmmatch_T *rmp;
				6054	linenr_T lnum;
				6055	char_u *source;
				6056	char_u *dest;
				6057	int copy;
				6058	int magic;
				6059	int backslash;
				6060	{
				6061	reg_match = NULL;
				6062	reg_mmatch = rmp;
				6063	reg_buf = curbuf; /* always works on the current buffer! */
				6064	reg_firstlnum = lnum;
				6065	reg_maxline = curbuf->b_ml.ml_line_count - lnum;
				6066	return vim_regsub_both(source, dest, copy, magic, backslash);
				6067	}
				6068
				6069	static int
				6070	vim_regsub_both(source, dest, copy, magic, backslash)
				6071	char_u *source;
				6072	char_u *dest;
				6073	int copy;
				6074	int magic;
				6075	int backslash;
				6076	{
				6077	char_u *src;
				6078	char_u *dst;
				6079	char_u *s;
				6080	int c;
				6081	int no = -1;
				6082	fptr func = (fptr)NULL;
				6083	linenr_T clnum = 0; /* init for GCC */
				6084	int len = 0; /* init for GCC */
				6085	#ifdef FEAT_EVAL
				6086	static char_u *eval_result = NULL;
				6087	#endif
				6088	#ifdef FEAT_MBYTE
				6089	int l;
				6090	#endif
				6091
				6092
				6093	/* Be paranoid... */
				6094	if (source == NULL \|\| dest == NULL)
				6095	{
				6096	EMSG(_(e_null));
				6097	return 0;
				6098	}
				6099	if (prog_magic_wrong())
				6100	return 0;
				6101	src = source;
				6102	dst = dest;
				6103
				6104	/*
				6105	* When the substitute part starts with "\=" evaluate it as an expression.
				6106	*/
				6107	if (source[0] == '\\' && source[1] == '='
				6108	#ifdef FEAT_EVAL
				6109	&& !can_f_submatch /* can't do this recursively */
				6110	#endif
				6111	)
				6112	{
				6113	#ifdef FEAT_EVAL
				6114	/* To make sure that the length doesn't change between checking the
				6115	* length and copying the string, and to speed up things, the
				6116	* resulting string is saved from the call with "copy" == FALSE to the
				6117	* call with "copy" == TRUE. */
				6118	if (copy)
				6119	{
				6120	if (eval_result != NULL)
				6121	{
				6122	STRCPY(dest, eval_result);
				6123	dst += STRLEN(eval_result);
				6124	vim_free(eval_result);
				6125	eval_result = NULL;
				6126	}
				6127	}
				6128	else
				6129	{
				6130	linenr_T save_reg_maxline;
				6131	win_T *save_reg_win;
				6132	int save_ireg_ic;
				6133
				6134	vim_free(eval_result);
				6135
				6136	/* The expression may contain substitute(), which calls us
				6137	* recursively. Make sure submatch() gets the text from the first
				6138	* level. Don't need to save "reg_buf", because
				6139	* vim_regexec_multi() can't be called recursively. */
				6140	submatch_match = reg_match;
				6141	submatch_mmatch = reg_mmatch;
				6142	save_reg_maxline = reg_maxline;
				6143	save_reg_win = reg_win;
				6144	save_ireg_ic = ireg_ic;
				6145	can_f_submatch = TRUE;
				6146
				6147	eval_result = eval_to_string(source + 2, NULL);
				6148	if (eval_result != NULL)
				6149	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	6150	for (s = eval_result; *s != NUL; mb_ptr_adv(s))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	6151	{
				6152	/* Change NL to CR, so that it becomes a line break.
				6153	* Skip over a backslashed character. */
				6154	if (*s == NL)
				6155	*s = CAR;
				6156	else if (*s == '\\' && s[1] != NUL)
				6157	++s;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	6158	}
				6159
				6160	dst += STRLEN(eval_result);
				6161	}
				6162
				6163	reg_match = submatch_match;
				6164	reg_mmatch = submatch_mmatch;
				6165	reg_maxline = save_reg_maxline;
				6166	reg_win = save_reg_win;
				6167	ireg_ic = save_ireg_ic;
				6168	can_f_submatch = FALSE;
				6169	}
				6170	#endif
				6171	}
				6172	else
				6173	while ((c = *src++) != NUL)
				6174	{
				6175	if (c == '&' && magic)
				6176	no = 0;
				6177	else if (c == '\\' && *src != NUL)
				6178	{
				6179	if (*src == '&' && !magic)
				6180	{
				6181	++src;
				6182	no = 0;
				6183	}
				6184	else if ('0' <= src && src <= '9')
				6185	{
				6186	no = *src++ - '0';
				6187	}
				6188	else if (vim_strchr((char_u )"uUlLeE", src))
				6189	{
				6190	switch (*src++)
				6191	{
				6192	case 'u': func = (fptr)do_upper;
				6193	continue;
				6194	case 'U': func = (fptr)do_Upper;
				6195	continue;
				6196	case 'l': func = (fptr)do_lower;
				6197	continue;
				6198	case 'L': func = (fptr)do_Lower;
				6199	continue;
				6200	case 'e':
				6201	case 'E': func = (fptr)NULL;
				6202	continue;
				6203	}
				6204	}
				6205	}
				6206	if (no < 0) /* Ordinary character. */
				6207	{
				6208	if (c == '\\' && *src != NUL)
				6209	{
				6210	/* Check for abbreviations -- webb */
				6211	switch (*src)
				6212	{
				6213	case 'r': c = CAR; ++src; break;
				6214	case 'n': c = NL; ++src; break;
				6215	case 't': c = TAB; ++src; break;
				6216	/* Oh no! \e already has meaning in subst pat :-( */
				6217	/* case 'e': c = ESC; ++src; break; */
				6218	case 'b': c = Ctrl_H; ++src; break;
				6219
				6220	/* If "backslash" is TRUE the backslash will be removed
				6221	* later. Used to insert a literal CR. */
				6222	default: if (backslash)
				6223	{
				6224	if (copy)
				6225	*dst = '\\';
				6226	++dst;
				6227	}
				6228	c = *src++;
				6229	}
				6230	}
				6231
				6232	/* Write to buffer, if copy is set. */
				6233	#ifdef FEAT_MBYTE
				6234	if (has_mbyte && (l = (*mb_ptr2len_check)(src - 1)) > 1)
				6235	{
				6236	/* TODO: should use "func" here. */
				6237	if (copy)
				6238	mch_memmove(dst, src - 1, l);
				6239	dst += l - 1;
				6240	src += l - 1;
				6241	}
				6242	else
				6243	{
				6244	#endif
				6245	if (copy)
				6246	{
				6247	if (func == (fptr)NULL) /* just copy */
				6248	*dst = c;
				6249	else /* change case */
				6250	func = (fptr)(func(dst, c));
				6251	/* Turbo C complains without the typecast */
				6252	}
				6253	#ifdef FEAT_MBYTE
				6254	}
				6255	#endif
				6256	dst++;
				6257	}
				6258	else
				6259	{
				6260	if (REG_MULTI)
				6261	{
				6262	clnum = reg_mmatch->startpos[no].lnum;
				6263	if (clnum < 0 \|\| reg_mmatch->endpos[no].lnum < 0)
				6264	s = NULL;
				6265	else
				6266	{
				6267	s = reg_getline(clnum) + reg_mmatch->startpos[no].col;
				6268	if (reg_mmatch->endpos[no].lnum == clnum)
				6269	len = reg_mmatch->endpos[no].col
				6270	- reg_mmatch->startpos[no].col;
				6271	else
				6272	len = (int)STRLEN(s);
				6273	}
				6274	}
				6275	else
				6276	{
				6277	s = reg_match->startp[no];
				6278	if (reg_match->endp[no] == NULL)
				6279	s = NULL;
				6280	else
				6281	len = (int)(reg_match->endp[no] - s);
				6282	}
				6283	if (s != NULL)
				6284	{
				6285	for (;;)
				6286	{
				6287	if (len == 0)
				6288	{
				6289	if (REG_MULTI)
				6290	{
				6291	if (reg_mmatch->endpos[no].lnum == clnum)
				6292	break;
				6293	if (copy)
				6294	*dst = CAR;
				6295	++dst;
				6296	s = reg_getline(++clnum);
				6297	if (reg_mmatch->endpos[no].lnum == clnum)
				6298	len = reg_mmatch->endpos[no].col;
				6299	else
				6300	len = (int)STRLEN(s);
				6301	}
				6302	else
				6303	break;
				6304	}
				6305	else if (s == NUL) / we hit NUL. */
				6306	{
				6307	if (copy)
				6308	EMSG(_(e_re_damg));
				6309	goto exit;
				6310	}
				6311	else
				6312	{
				6313	if (backslash && (s == CAR \|\| s == '\\'))
				6314	{
				6315	/*
				6316	* Insert a backslash in front of a CR, otherwise
				6317	* it will be replaced by a line break.
				6318	* Number of backslashes will be halved later,
				6319	* double them here.
				6320	*/
				6321	if (copy)
				6322	{
				6323	dst[0] = '\\';
				6324	dst[1] = *s;
				6325	}
				6326	dst += 2;
				6327	}
				6328	#ifdef FEAT_MBYTE
				6329	else if (has_mbyte && (l = (*mb_ptr2len_check)(s)) > 1)
				6330	{
				6331	/* TODO: should use "func" here. */
				6332	if (copy)
				6333	mch_memmove(dst, s, l);
				6334	dst += l;
				6335	s += l - 1;
				6336	len -= l - 1;
				6337	}
				6338	#endif
				6339	else
				6340	{
				6341	if (copy)
				6342	{
				6343	if (func == (fptr)NULL) /* just copy */
				6344	dst = s;
				6345	else /* change case */
				6346	func = (fptr)(func(dst, *s));
				6347	/* Turbo C complains without the typecast */
				6348	}
				6349	++dst;
				6350	}
				6351	++s;
				6352	--len;
				6353	}
				6354	}
				6355	}
				6356	no = -1;
				6357	}
				6358	}
				6359	if (copy)
				6360	*dst = NUL;
				6361
				6362	exit:
				6363	return (int)((dst - dest) + 1);
				6364	}
				6365
				6366	#ifdef FEAT_EVAL
				6367	/*
				6368	* Used for the submatch() function: get the string from tne n'th submatch in
				6369	* allocated memory.
				6370	* Returns NULL when not in a ":s" command and for a non-existing submatch.
				6371	*/
				6372	char_u *
				6373	reg_submatch(no)
				6374	int no;
				6375	{
				6376	char_u *retval = NULL;
				6377	char_u *s;
				6378	int len;
				6379	int round;
				6380	linenr_T lnum;
				6381
				6382	if (!can_f_submatch)
				6383	return NULL;
				6384
				6385	if (submatch_match == NULL)
				6386	{
				6387	/*
				6388	* First round: compute the length and allocate memory.
				6389	* Second round: copy the text.
				6390	*/
				6391	for (round = 1; round <= 2; ++round)
				6392	{
				6393	lnum = submatch_mmatch->startpos[no].lnum;
				6394	if (lnum < 0 \|\| submatch_mmatch->endpos[no].lnum < 0)
				6395	return NULL;
				6396
				6397	s = reg_getline(lnum) + submatch_mmatch->startpos[no].col;
				6398	if (s == NULL) /* anti-crash check, cannot happen? */
				6399	break;
				6400	if (submatch_mmatch->endpos[no].lnum == lnum)
				6401	{
				6402	/* Within one line: take form start to end col. */
				6403	len = submatch_mmatch->endpos[no].col
				6404	- submatch_mmatch->startpos[no].col;
				6405	if (round == 2)
				6406	{
				6407	STRNCPY(retval, s, len);
				6408	retval[len] = NUL;
				6409	}
				6410	++len;
				6411	}
				6412	else
				6413	{
				6414	/* Multiple lines: take start line from start col, middle
				6415	* lines completely and end line up to end col. */
				6416	len = (int)STRLEN(s);
				6417	if (round == 2)
				6418	{
				6419	STRCPY(retval, s);
				6420	retval[len] = '\n';
				6421	}
				6422	++len;
				6423	++lnum;
				6424	while (lnum < submatch_mmatch->endpos[no].lnum)
				6425	{
				6426	s = reg_getline(lnum++);
				6427	if (round == 2)
				6428	STRCPY(retval + len, s);
				6429	len += (int)STRLEN(s);
				6430	if (round == 2)
				6431	retval[len] = '\n';
				6432	++len;
				6433	}
				6434	if (round == 2)
				6435	STRNCPY(retval + len, reg_getline(lnum),
				6436	submatch_mmatch->endpos[no].col);
				6437	len += submatch_mmatch->endpos[no].col;
				6438	if (round == 2)
				6439	retval[len] = NUL;
				6440	++len;
				6441	}
				6442
				6443	if (round == 1)
				6444	{
				6445	retval = lalloc((long_u)len, TRUE);
				6446	if (s == NULL)
				6447	return NULL;
				6448	}
				6449	}
				6450	}
				6451	else
				6452	{
				6453	if (submatch_match->endp[no] == NULL)
				6454	retval = NULL;
				6455	else
				6456	{
				6457	s = submatch_match->startp[no];
				6458	retval = vim_strnsave(s, (int)(submatch_match->endp[no] - s));
				6459	}
				6460	}
				6461
				6462	return retval;
				6463	}
				6464	#endif