Blame - src/regexp.c - android_external_vim

blob: 6f15824dfb55c0dd78fde1b3ffcfe05f38955798 [file] [log] [blame]

Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1	/* vi:set ts=8 sts=4 sw=4:
				2	*
				3	* Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
				4	*
				5	* NOTICE:
				6	*
				7	* This is NOT the original regular expression code as written by Henry
				8	* Spencer. This code has been modified specifically for use with the VIM
				9	* editor, and should not be used separately from Vim. If you want a good
				10	* regular expression library, get the original code. The copyright notice
				11	* that follows is from the original.
				12	*
				13	* END NOTICE
				14	*
				15	* Copyright (c) 1986 by University of Toronto.
				16	* Written by Henry Spencer. Not derived from licensed software.
				17	*
				18	* Permission is granted to anyone to use this software for any
				19	* purpose on any computer system, and to redistribute it freely,
				20	* subject to the following restrictions:
				21	*
				22	* 1. The author is not responsible for the consequences of use of
				23	* this software, no matter how awful, even if they arise
				24	* from defects in it.
				25	*
				26	* 2. The origin of this software must not be misrepresented, either
				27	* by explicit claim or by omission.
				28	*
				29	* 3. Altered versions must be plainly marked as such, and must not
				30	* be misrepresented as being the original software.
				31	*
				32	* Beware that some of this code is subtly aware of the way operator
				33	* precedence is structured in regular expressions. Serious changes in
				34	* regular-expression syntax might require a total rethink.
				35	*
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	36	* Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
				37	* Webb, Ciaran McCreesh and Bram Moolenaar.
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	38	* Named character class support added by Walter Briscoe (1998 Jul 01)
				39	*/
				40
				41	#include "vim.h"
				42
				43	#undef DEBUG
				44
				45	/*
				46	* The "internal use only" fields in regexp.h are present to pass info from
				47	* compile to execute that permits the execute phase to run lots faster on
				48	* simple cases. They are:
				49	*
				50	* regstart char that must begin a match; NUL if none obvious; Can be a
				51	* multi-byte character.
				52	* reganch is the match anchored (at beginning-of-line only)?
				53	* regmust string (pointer into program) that match must include, or NULL
				54	* regmlen length of regmust string
				55	* regflags RF_ values or'ed together
				56	*
				57	* Regstart and reganch permit very fast decisions on suitable starting points
				58	* for a match, cutting down the work a lot. Regmust permits fast rejection
				59	* of lines that cannot possibly match. The regmust tests are costly enough
				60	* that vim_regcomp() supplies a regmust only if the r.e. contains something
				61	* potentially expensive (at present, the only such thing detected is * or +
				62	* at the start of the r.e., which can involve a lot of backup). Regmlen is
				63	* supplied because the test in vim_regexec() needs it and vim_regcomp() is
				64	* computing it anyway.
				65	*/
				66
				67	/*
				68	* Structure for regexp "program". This is essentially a linear encoding
				69	* of a nondeterministic finite-state machine (aka syntax charts or
				70	* "railroad normal form" in parsing technology). Each node is an opcode
				71	* plus a "next" pointer, possibly plus an operand. "Next" pointers of
				72	* all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
				73	* pointer with a BRANCH on both ends of it is connecting two alternatives.
				74	* (Here we have one of the subtle syntax dependencies: an individual BRANCH
				75	* (as opposed to a collection of them) is never concatenated with anything
				76	* because of operator precedence). The "next" pointer of a BRACES_COMPLEX
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	77	* node points to the node after the stuff to be repeated.
				78	* The operand of some types of node is a literal string; for others, it is a
				79	* node leading into a sub-FSM. In particular, the operand of a BRANCH node
				80	* is the first node of the branch.
				81	* (NB this is not a tree structure: the tail of the branch connects to the
				82	* thing following the set of BRANCHes.)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	83	*
				84	* pattern is coded like:
				85	*
				86	* +-----------------+
				87	* \| V
				88	* <aa>\\|<bb> BRANCH <aa> BRANCH <bb> --> END
				89	* \| ^ \| ^
				90	* +------+ +----------+
				91	*
				92	*
				93	* +------------------+
				94	* V \|
				95	* <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
				96	* \| \| ^ ^
				97	* \| +---------------+ \|
				98	* +---------------------------------------------+
				99	*
				100	*
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	101	* +----------------------+
				102	* V \|
				103	* <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END
				104	* \| \| ^ ^
				105	* \| +----------+ \|
				106	* +-------------------------------------------------+
				107	*
				108	*
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	109	* +-------------------------+
				110	* V \|
				111	* <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
				112	* \| \| ^
				113	* \| +----------------+
				114	* +-----------------------------------------------+
				115	*
				116	*
				117	* <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END
				118	* \| \| ^ ^
				119	* \| +----------------+ \|
				120	* +--------------------------------+
				121	*
				122	* +---------+
				123	* \| V
				124	* \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END
				125	* \| \| \| \| ^ ^
				126	* \| \| \| +-----+ \|
				127	* \| \| +----------------+ \|
				128	* \| +---------------------------+ \|
				129	* +------------------------------------------------------+
				130	*
				131	* They all start with a BRANCH for "\\|" alternaties, even when there is only
				132	* one alternative.
				133	*/
				134
				135	/*
				136	* The opcodes are:
				137	*/
				138
				139	/* definition number opnd? meaning */
				140	#define END 0 /* End of program or NOMATCH operand. */
				141	#define BOL 1 /* Match "" at beginning of line. */
				142	#define EOL 2 /* Match "" at end of line. */
				143	#define BRANCH 3 /* node Match this alternative, or the
				144	* next... */
				145	#define BACK 4 /* Match "", "next" ptr points backward. */
				146	#define EXACTLY 5 /* str Match this string. */
				147	#define NOTHING 6 /* Match empty string. */
				148	#define STAR 7 /* node Match this (simple) thing 0 or more
				149	* times. */
				150	#define PLUS 8 /* node Match this (simple) thing 1 or more
				151	* times. */
				152	#define MATCH 9 /* node match the operand zero-width */
				153	#define NOMATCH 10 /* node check for no match with operand */
				154	#define BEHIND 11 /* node look behind for a match with operand */
				155	#define NOBEHIND 12 /* node look behind for no match with operand */
				156	#define SUBPAT 13 /* node match the operand here */
				157	#define BRACE_SIMPLE 14 /* node Match this (simple) thing between m and
				158	* n times (\{m,n\}). */
				159	#define BOW 15 /* Match "" after [^a-zA-Z0-9_] */
				160	#define EOW 16 /* Match "" at [^a-zA-Z0-9_] */
				161	#define BRACE_LIMITS 17 /* nr nr define the min & max for BRACE_SIMPLE
				162	* and BRACE_COMPLEX. */
				163	#define NEWL 18 /* Match line-break */
				164	#define BHPOS 19 /* End position for BEHIND or NOBEHIND */
				165
				166
				167	/* character classes: 20-48 normal, 50-78 include a line-break */
				168	#define ADD_NL 30
				169	#define FIRST_NL ANY + ADD_NL
				170	#define ANY 20 /* Match any one character. */
				171	#define ANYOF 21 /* str Match any character in this string. */
				172	#define ANYBUT 22 /* str Match any character not in this
				173	* string. */
				174	#define IDENT 23 /* Match identifier char */
				175	#define SIDENT 24 /* Match identifier char but no digit */
				176	#define KWORD 25 /* Match keyword char */
				177	#define SKWORD 26 /* Match word char but no digit */
				178	#define FNAME 27 /* Match file name char */
				179	#define SFNAME 28 /* Match file name char but no digit */
				180	#define PRINT 29 /* Match printable char */
				181	#define SPRINT 30 /* Match printable char but no digit */
				182	#define WHITE 31 /* Match whitespace char */
				183	#define NWHITE 32 /* Match non-whitespace char */
				184	#define DIGIT 33 /* Match digit char */
				185	#define NDIGIT 34 /* Match non-digit char */
				186	#define HEX 35 /* Match hex char */
				187	#define NHEX 36 /* Match non-hex char */
				188	#define OCTAL 37 /* Match octal char */
				189	#define NOCTAL 38 /* Match non-octal char */
				190	#define WORD 39 /* Match word char */
				191	#define NWORD 40 /* Match non-word char */
				192	#define HEAD 41 /* Match head char */
				193	#define NHEAD 42 /* Match non-head char */
				194	#define ALPHA 43 /* Match alpha char */
				195	#define NALPHA 44 /* Match non-alpha char */
				196	#define LOWER 45 /* Match lowercase char */
				197	#define NLOWER 46 /* Match non-lowercase char */
				198	#define UPPER 47 /* Match uppercase char */
				199	#define NUPPER 48 /* Match non-uppercase char */
				200	#define LAST_NL NUPPER + ADD_NL
				201	#define WITH_NL(op) ((op) >= FIRST_NL && (op) <= LAST_NL)
				202
				203	#define MOPEN 80 /* -89 Mark this point in input as start of
				204	* \( subexpr. MOPEN + 0 marks start of
				205	* match. */
				206	#define MCLOSE 90 /* -99 Analogous to MOPEN. MCLOSE + 0 marks
				207	* end of match. */
				208	#define BACKREF 100 /* -109 node Match same string again \1-\9 */
				209
				210	#ifdef FEAT_SYN_HL
				211	# define ZOPEN 110 /* -119 Mark this point in input as start of
				212	* \z( subexpr. */
				213	# define ZCLOSE 120 /* -129 Analogous to ZOPEN. */
				214	# define ZREF 130 /* -139 node Match external submatch \z1-\z9 */
				215	#endif
				216
				217	#define BRACE_COMPLEX 140 /* -149 node Match nodes between m & n times */
				218
				219	#define NOPEN 150 /* Mark this point in input as start of
				220	\%( subexpr. */
				221	#define NCLOSE 151 /* Analogous to NOPEN. */
				222
				223	#define MULTIBYTECODE 200 /* mbc Match one multi-byte character */
				224	#define RE_BOF 201 /* Match "" at beginning of file. */
				225	#define RE_EOF 202 /* Match "" at end of file. */
				226	#define CURSOR 203 /* Match location of cursor. */
				227
				228	#define RE_LNUM 204 /* nr cmp Match line number */
				229	#define RE_COL 205 /* nr cmp Match column number */
				230	#define RE_VCOL 206 /* nr cmp Match virtual column number */
				231
				232	/*
				233	* Magic characters have a special meaning, they don't match literally.
				234	* Magic characters are negative. This separates them from literal characters
				235	* (possibly multi-byte). Only ASCII characters can be Magic.
				236	*/
				237	#define Magic(x) ((int)(x) - 256)
				238	#define un_Magic(x) ((x) + 256)
				239	#define is_Magic(x) ((x) < 0)
				240
				241	static int no_Magic __ARGS((int x));
				242	static int toggle_Magic __ARGS((int x));
				243
				244	static int
				245	no_Magic(x)
				246	int x;
				247	{
				248	if (is_Magic(x))
				249	return un_Magic(x);
				250	return x;
				251	}
				252
				253	static int
				254	toggle_Magic(x)
				255	int x;
				256	{
				257	if (is_Magic(x))
				258	return un_Magic(x);
				259	return Magic(x);
				260	}
				261
				262	/*
				263	* The first byte of the regexp internal "program" is actually this magic
				264	* number; the start node begins in the second byte. It's used to catch the
				265	* most severe mutilation of the program by the caller.
				266	*/
				267
				268	#define REGMAGIC 0234
				269
				270	/*
				271	* Opcode notes:
				272	*
				273	* BRANCH The set of branches constituting a single choice are hooked
				274	* together with their "next" pointers, since precedence prevents
				275	* anything being concatenated to any individual branch. The
				276	* "next" pointer of the last BRANCH in a choice points to the
				277	* thing following the whole choice. This is also where the
				278	* final "next" pointer of each individual branch points; each
				279	* branch starts with the operand node of a BRANCH node.
				280	*
				281	* BACK Normal "next" pointers all implicitly point forward; BACK
				282	* exists to make loop structures possible.
				283	*
				284	* STAR,PLUS '=', and complex '*' and '+', are implemented as circular
				285	* BRANCH structures using BACK. Simple cases (one character
				286	* per match) are implemented with STAR and PLUS for speed
				287	* and to minimize recursive plunges.
				288	*
				289	* BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
				290	* node, and defines the min and max limits to be used for that
				291	* node.
				292	*
				293	* MOPEN,MCLOSE ...are numbered at compile time.
				294	* ZOPEN,ZCLOSE ...ditto
				295	*/
				296
				297	/*
				298	* A node is one char of opcode followed by two chars of "next" pointer.
				299	* "Next" pointers are stored as two 8-bit bytes, high order first. The
				300	* value is a positive offset from the opcode of the node containing it.
				301	* An operand, if any, simply follows the node. (Note that much of the
				302	* code generation knows about this implicit relationship.)
				303	*
				304	* Using two bytes for the "next" pointer is vast overkill for most things,
				305	* but allows patterns to get big without disasters.
				306	*/
				307	#define OP(p) ((int)*(p))
				308	#define NEXT(p) (((((p) + 1) & 0377) << 8) + (((p) + 2) & 0377))
				309	#define OPERAND(p) ((p) + 3)
				310	/* Obtain an operand that was stored as four bytes, MSB first. */
				311	#define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
				312	+ ((long)(p)[5] << 8) + (long)(p)[6])
				313	/* Obtain a second operand stored as four bytes. */
				314	#define OPERAND_MAX(p) OPERAND_MIN((p) + 4)
				315	/* Obtain a second single-byte operand stored after a four bytes operand. */
				316	#define OPERAND_CMP(p) (p)[7]
				317
				318	/*
				319	* Utility definitions.
				320	*/
				321	#define UCHARAT(p) ((int)(char_u )(p))
				322
				323	/* Used for an error (down from) vim_regcomp(): give the error message, set
				324	* rc_did_emsg and return NULL */
				325	#define EMSG_RET_NULL(m) { EMSG(m); rc_did_emsg = TRUE; return NULL; }
				326	#define EMSG_M_RET_NULL(m, c) { EMSG2(m, c ? "" : "\\"); rc_did_emsg = TRUE; return NULL; }
				327	#define EMSG_RET_FAIL(m) { EMSG(m); rc_did_emsg = TRUE; return FAIL; }
				328	#define EMSG_ONE_RET_NULL EMSG_M_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
				329
				330	#define MAX_LIMIT (32767L << 16L)
				331
				332	static int re_multi_type __ARGS((int));
				333	static int cstrncmp __ARGS((char_u s1, char_u s2, int *n));
				334	static char_u cstrchr __ARGS((char_u , int));
				335
				336	#ifdef DEBUG
				337	static void regdump __ARGS((char_u , regprog_T ));
				338	static char_u regprop __ARGS((char_u ));
				339	#endif
				340
				341	#define NOT_MULTI 0
				342	#define MULTI_ONE 1
				343	#define MULTI_MULT 2
				344	/*
				345	* Return NOT_MULTI if c is not a "multi" operator.
				346	* Return MULTI_ONE if c is a single "multi" operator.
				347	* Return MULTI_MULT if c is a multi "multi" operator.
				348	*/
				349	static int
				350	re_multi_type(c)
				351	int c;
				352	{
				353	if (c == Magic('@') \|\| c == Magic('=') \|\| c == Magic('?'))
				354	return MULTI_ONE;
				355	if (c == Magic('*') \|\| c == Magic('+') \|\| c == Magic('{'))
				356	return MULTI_MULT;
				357	return NOT_MULTI;
				358	}
				359
				360	/*
				361	* Flags to be passed up and down.
				362	*/
				363	#define HASWIDTH 0x1 /* Known never to match null string. */
				364	#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */
				365	#define SPSTART 0x4 /* Starts with * or +. */
				366	#define HASNL 0x8 /* Contains some \n. */
				367	#define HASLOOKBH 0x10 /* Contains "\@<=" or "\@<!". */
				368	#define WORST 0 /* Worst case. */
				369
				370	/*
				371	* When regcode is set to this value, code is not emitted and size is computed
				372	* instead.
				373	*/
				374	#define JUST_CALC_SIZE ((char_u *) -1)
				375
				376	static char_u *reg_prev_sub;
				377
				378	/*
				379	* REGEXP_INRANGE contains all characters which are always special in a []
				380	* range after '\'.
				381	* REGEXP_ABBR contains all characters which act as abbreviations after '\'.
				382	* These are:
				383	* \n - New line (NL).
				384	* \r - Carriage Return (CR).
				385	* \t - Tab (TAB).
				386	* \e - Escape (ESC).
				387	* \b - Backspace (Ctrl_H).
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	388	* \d - Character code in decimal, eg \d123
				389	* \o - Character code in octal, eg \o80
				390	* \x - Character code in hex, eg \x4a
				391	* \u - Multibyte character code, eg \u20ac
				392	* \U - Long multibyte character code, eg \U12345678
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	393	*/
				394	static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	395	static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	396
				397	static int backslash_trans __ARGS((int c));
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	398	static int get_char_class __ARGS((char_u **pp));
				399	static int get_equi_class __ARGS((char_u **pp));
				400	static void reg_equi_class __ARGS((int c));
				401	static int get_coll_element __ARGS((char_u **pp));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	402	static char_u skip_anyof __ARGS((char_u p));
				403	static void init_class_tab __ARGS((void));
				404
				405	/*
				406	* Translate '\x' to its control character, except "\n", which is Magic.
				407	*/
				408	static int
				409	backslash_trans(c)
				410	int c;
				411	{
				412	switch (c)
				413	{
				414	case 'r': return CAR;
				415	case 't': return TAB;
				416	case 'e': return ESC;
				417	case 'b': return BS;
				418	}
				419	return c;
				420	}
				421
				422	/*
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	423	* Check for a character class name "[:name:]". "pp" points to the '['.
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	424	* Returns one of the CLASS_ items. CLASS_NONE means that no item was
				425	* recognized. Otherwise "pp" is advanced to after the item.
				426	*/
				427	static int
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	428	get_char_class(pp)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	429	char_u **pp;
				430	{
				431	static const char *(class_names[]) =
				432	{
				433	"alnum:]",
				434	#define CLASS_ALNUM 0
				435	"alpha:]",
				436	#define CLASS_ALPHA 1
				437	"blank:]",
				438	#define CLASS_BLANK 2
				439	"cntrl:]",
				440	#define CLASS_CNTRL 3
				441	"digit:]",
				442	#define CLASS_DIGIT 4
				443	"graph:]",
				444	#define CLASS_GRAPH 5
				445	"lower:]",
				446	#define CLASS_LOWER 6
				447	"print:]",
				448	#define CLASS_PRINT 7
				449	"punct:]",
				450	#define CLASS_PUNCT 8
				451	"space:]",
				452	#define CLASS_SPACE 9
				453	"upper:]",
				454	#define CLASS_UPPER 10
				455	"xdigit:]",
				456	#define CLASS_XDIGIT 11
				457	"tab:]",
				458	#define CLASS_TAB 12
				459	"return:]",
				460	#define CLASS_RETURN 13
				461	"backspace:]",
				462	#define CLASS_BACKSPACE 14
				463	"escape:]",
				464	#define CLASS_ESCAPE 15
				465	};
				466	#define CLASS_NONE 99
				467	int i;
				468
				469	if ((*pp)[1] == ':')
				470	{
				471	for (i = 0; i < sizeof(class_names) / sizeof(*class_names); ++i)
				472	if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
				473	{
				474	*pp += STRLEN(class_names[i]) + 2;
				475	return i;
				476	}
				477	}
				478	return CLASS_NONE;
				479	}
				480
				481	/*
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	482	* Specific version of character class functions.
				483	* Using a table to keep this fast.
				484	*/
				485	static short class_tab[256];
				486
				487	#define RI_DIGIT 0x01
				488	#define RI_HEX 0x02
				489	#define RI_OCTAL 0x04
				490	#define RI_WORD 0x08
				491	#define RI_HEAD 0x10
				492	#define RI_ALPHA 0x20
				493	#define RI_LOWER 0x40
				494	#define RI_UPPER 0x80
				495	#define RI_WHITE 0x100
				496
				497	static void
				498	init_class_tab()
				499	{
				500	int i;
				501	static int done = FALSE;
				502
				503	if (done)
				504	return;
				505
				506	for (i = 0; i < 256; ++i)
				507	{
				508	if (i >= '0' && i <= '7')
				509	class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
				510	else if (i >= '8' && i <= '9')
				511	class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
				512	else if (i >= 'a' && i <= 'f')
				513	class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
				514	#ifdef EBCDIC
				515	else if ((i >= 'g' && i <= 'i') \|\| (i >= 'j' && i <= 'r')
				516	\|\| (i >= 's' && i <= 'z'))
				517	#else
				518	else if (i >= 'g' && i <= 'z')
				519	#endif
				520	class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
				521	else if (i >= 'A' && i <= 'F')
				522	class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
				523	#ifdef EBCDIC
				524	else if ((i >= 'G' && i <= 'I') \|\| ( i >= 'J' && i <= 'R')
				525	\|\| (i >= 'S' && i <= 'Z'))
				526	#else
				527	else if (i >= 'G' && i <= 'Z')
				528	#endif
				529	class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
				530	else if (i == '_')
				531	class_tab[i] = RI_WORD + RI_HEAD;
				532	else
				533	class_tab[i] = 0;
				534	}
				535	class_tab[' '] \|= RI_WHITE;
				536	class_tab['\t'] \|= RI_WHITE;
				537	done = TRUE;
				538	}
				539
				540	#ifdef FEAT_MBYTE
				541	# define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT))
				542	# define ri_hex(c) (c < 0x100 && (class_tab[c] & RI_HEX))
				543	# define ri_octal(c) (c < 0x100 && (class_tab[c] & RI_OCTAL))
				544	# define ri_word(c) (c < 0x100 && (class_tab[c] & RI_WORD))
				545	# define ri_head(c) (c < 0x100 && (class_tab[c] & RI_HEAD))
				546	# define ri_alpha(c) (c < 0x100 && (class_tab[c] & RI_ALPHA))
				547	# define ri_lower(c) (c < 0x100 && (class_tab[c] & RI_LOWER))
				548	# define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER))
				549	# define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE))
				550	#else
				551	# define ri_digit(c) (class_tab[c] & RI_DIGIT)
				552	# define ri_hex(c) (class_tab[c] & RI_HEX)
				553	# define ri_octal(c) (class_tab[c] & RI_OCTAL)
				554	# define ri_word(c) (class_tab[c] & RI_WORD)
				555	# define ri_head(c) (class_tab[c] & RI_HEAD)
				556	# define ri_alpha(c) (class_tab[c] & RI_ALPHA)
				557	# define ri_lower(c) (class_tab[c] & RI_LOWER)
				558	# define ri_upper(c) (class_tab[c] & RI_UPPER)
				559	# define ri_white(c) (class_tab[c] & RI_WHITE)
				560	#endif
				561
				562	/* flags for regflags */
				563	#define RF_ICASE 1 /* ignore case */
				564	#define RF_NOICASE 2 /* don't ignore case */
				565	#define RF_HASNL 4 /* can match a NL */
				566	#define RF_ICOMBINE 8 /* ignore combining characters */
				567	#define RF_LOOKBH 16 /* uses "\@<=" or "\@<!" */
				568
				569	/*
				570	* Global work variables for vim_regcomp().
				571	*/
				572
				573	static char_u regparse; / Input-scan pointer. */
				574	static int prevchr_len; /* byte length of previous char */
				575	static int num_complex_braces; /* Complex \{...} count */
				576	static int regnpar; /* () count. */
				577	#ifdef FEAT_SYN_HL
				578	static int regnzpar; /* \z() count. */
				579	static int re_has_z; /* \z item detected */
				580	#endif
				581	static char_u regcode; / Code-emit pointer, or JUST_CALC_SIZE */
				582	static long regsize; /* Code size. */
				583	static char_u had_endbrace[NSUBEXP]; /* flags, TRUE if end of () found */
				584	static unsigned regflags; /* RF_ flags for prog */
				585	static long brace_min[10]; /* Minimums for complex brace repeats */
				586	static long brace_max[10]; /* Maximums for complex brace repeats */
				587	static int brace_count[10]; /* Current counts for complex brace repeats */
				588	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				589	static int had_eol; /* TRUE when EOL found by vim_regcomp() */
				590	#endif
				591	static int one_exactly = FALSE; /* only do one char for EXACTLY */
				592
				593	static int reg_magic; /* magicness of the pattern: */
				594	#define MAGIC_NONE 1 /* "\V" very unmagic */
				595	#define MAGIC_OFF 2 /* "\M" or 'magic' off */
				596	#define MAGIC_ON 3 /* "\m" or 'magic' */
				597	#define MAGIC_ALL 4 /* "\v" very magic */
				598
				599	static int reg_string; /* matching with a string instead of a buffer
				600	line */
				601
				602	/*
				603	* META contains all characters that may be magic, except '^' and '$'.
				604	*/
				605
				606	#ifdef EBCDIC
				607	static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{\|~";
				608	#else
				609	/* META[] is used often enough to justify turning it into a table. */
				610	static char_u META_flags[] = {
				611	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				612	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				613	/* % & ( ) * + . */
				614	0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
				615	/* 1 2 3 4 5 6 7 8 9 < = > ? */
				616	0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
				617	/* @ A C D F H I K L M O */
				618	1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
				619	/* P S U V W X Z [ _ */
				620	1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
				621	/* a c d f h i k l m n o */
				622	0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
				623	/* p s u v w x z { \| ~ */
				624	1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
				625	};
				626	#endif
				627
				628	static int curchr;
				629
				630	/* arguments for reg() */
				631	#define REG_NOPAREN 0 /* toplevel reg() */
				632	#define REG_PAREN 1 /* */
				633	#define REG_ZPAREN 2 /* \z(\) */
				634	#define REG_NPAREN 3 /* \%(\) */
				635
				636	/*
				637	* Forward declarations for vim_regcomp()'s friends.
				638	*/
				639	static void initchr __ARGS((char_u *));
				640	static int getchr __ARGS((void));
				641	static void skipchr_keepstart __ARGS((void));
				642	static int peekchr __ARGS((void));
				643	static void skipchr __ARGS((void));
				644	static void ungetchr __ARGS((void));
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	645	static int gethexchrs __ARGS((int maxinputlen));
				646	static int getoctchrs __ARGS((void));
				647	static int getdecchrs __ARGS((void));
				648	static int coll_get_char __ARGS((void));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	649	static void regcomp_start __ARGS((char_u *expr, int flags));
				650	static char_u reg __ARGS((int, int ));
				651	static char_u regbranch __ARGS((int flagp));
				652	static char_u regconcat __ARGS((int flagp));
				653	static char_u regpiece __ARGS((int ));
				654	static char_u regatom __ARGS((int ));
				655	static char_u *regnode __ARGS((int));
				656	static int prog_magic_wrong __ARGS((void));
				657	static char_u regnext __ARGS((char_u ));
				658	static void regc __ARGS((int b));
				659	#ifdef FEAT_MBYTE
				660	static void regmbc __ARGS((int c));
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	661	#else
				662	# define regmbc(c) regc(c)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	663	#endif
				664	static void reginsert __ARGS((int, char_u *));
				665	static void reginsert_limits __ARGS((int, long, long, char_u *));
				666	static char_u re_put_long __ARGS((char_u pr, long_u val));
				667	static int read_limits __ARGS((long , long ));
				668	static void regtail __ARGS((char_u , char_u ));
				669	static void regoptail __ARGS((char_u , char_u ));
				670
				671	/*
				672	* Return TRUE if compiled regular expression "prog" can match a line break.
				673	*/
				674	int
				675	re_multiline(prog)
				676	regprog_T *prog;
				677	{
				678	return (prog->regflags & RF_HASNL);
				679	}
				680
				681	/*
				682	* Return TRUE if compiled regular expression "prog" looks before the start
				683	* position (pattern contains "\@<=" or "\@<!").
				684	*/
				685	int
				686	re_lookbehind(prog)
				687	regprog_T *prog;
				688	{
				689	return (prog->regflags & RF_LOOKBH);
				690	}
				691
				692	/*
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	693	* Check for an equivalence class name "[=a=]". "pp" points to the '['.
				694	* Returns a character representing the class. Zero means that no item was
				695	* recognized. Otherwise "pp" is advanced to after the item.
				696	*/
				697	static int
				698	get_equi_class(pp)
				699	char_u **pp;
				700	{
				701	int c;
				702	int l = 1;
				703	char_u p = pp;
				704
				705	if (p[1] == '=')
				706	{
				707	#ifdef FEAT_MBYTE
				708	if (has_mbyte)
				709	l = mb_ptr2len_check(p + 2);
				710	#endif
				711	if (p[l + 2] == '=' && p[l + 3] == ']')
				712	{
				713	#ifdef FEAT_MBYTE
				714	if (has_mbyte)
				715	c = mb_ptr2char(p + 2);
				716	else
				717	#endif
				718	c = p[2];
				719	*pp += l + 4;
				720	return c;
				721	}
				722	}
				723	return 0;
				724	}
				725
				726	/*
				727	* Produce the bytes for equivalence class "c".
				728	* Currently only handles latin1, latin9 and utf-8.
				729	*/
				730	static void
				731	reg_equi_class(c)
				732	int c;
				733	{
				734	#ifdef FEAT_MBYTE
				735	if (enc_utf8 \|\| STRCMP(p_enc, "latin1") == 0
				736	\|\| STRCMP(p_enc, "latin9") == 0)
				737	#endif
				738	{
				739	switch (c)
				740	{
				741	case 'A': case 'À': case 'Á': case 'Â':
				742	case 'Ã': case 'Ä': case 'Å':
				743	regmbc('A'); regmbc('À'); regmbc('Á'); regmbc('Â');
				744	regmbc('Ã'); regmbc('Ä'); regmbc('Å');
				745	return;
				746	case 'C': case 'Ç':
				747	regmbc('C'); regmbc('Ç');
				748	return;
				749	case 'E': case 'È': case 'É': case 'Ê': case 'Ë':
				750	regmbc('E'); regmbc('È'); regmbc('É'); regmbc('Ê');
				751	regmbc('Ë');
				752	return;
				753	case 'I': case 'Ì': case 'Í': case 'Î': case 'Ï':
				754	regmbc('I'); regmbc('Ì'); regmbc('Í'); regmbc('Î');
				755	regmbc('Ï');
				756	return;
				757	case 'N': case 'Ñ':
				758	regmbc('N'); regmbc('Ñ');
				759	return;
				760	case 'O': case 'Ò': case 'Ó': case 'Ô': case 'Õ': case 'Ö':
				761	regmbc('O'); regmbc('Ò'); regmbc('Ó'); regmbc('Ô');
				762	regmbc('Õ'); regmbc('Ö');
				763	return;
				764	case 'U': case 'Ù': case 'Ú': case 'Û': case 'Ü':
				765	regmbc('U'); regmbc('Ù'); regmbc('Ú'); regmbc('Û');
				766	regmbc('Ü');
				767	return;
				768	case 'Y': case 'Ý':
				769	regmbc('Y'); regmbc('Ý');
				770	return;
				771	case 'a': case 'à': case 'á': case 'â':
				772	case 'ã': case 'ä': case 'å':
				773	regmbc('a'); regmbc('à'); regmbc('á'); regmbc('â');
				774	regmbc('ã'); regmbc('ä'); regmbc('å');
				775	return;
				776	case 'c': case 'ç':
				777	regmbc('c'); regmbc('ç');
				778	return;
				779	case 'e': case 'è': case 'é': case 'ê': case 'ë':
				780	regmbc('e'); regmbc('è'); regmbc('é'); regmbc('ê');
				781	regmbc('ë');
				782	return;
				783	case 'i': case 'ì': case 'í': case 'î': case 'ï':
				784	regmbc('i'); regmbc('ì'); regmbc('í'); regmbc('î');
				785	regmbc('ï');
				786	return;
				787	case 'n': case 'ñ':
				788	regmbc('n'); regmbc('ñ');
				789	return;
				790	case 'o': case 'ò': case 'ó': case 'ô': case 'õ': case 'ö':
				791	regmbc('o'); regmbc('ò'); regmbc('ó'); regmbc('ô');
				792	regmbc('õ'); regmbc('ö');
				793	return;
				794	case 'u': case 'ù': case 'ú': case 'û': case 'ü':
				795	regmbc('u'); regmbc('ù'); regmbc('ú'); regmbc('û');
				796	regmbc('ü');
				797	return;
				798	case 'y': case 'ý': case 'ÿ':
				799	regmbc('y'); regmbc('ý'); regmbc('ÿ');
				800	return;
				801	}
				802	}
				803	regmbc(c);
				804	}
				805
				806	/*
				807	* Check for a collating element "[.a.]". "pp" points to the '['.
				808	* Returns a character. Zero means that no item was recognized. Otherwise
				809	* "pp" is advanced to after the item.
				810	* Currently only single characters are recognized!
				811	*/
				812	static int
				813	get_coll_element(pp)
				814	char_u **pp;
				815	{
				816	int c;
				817	int l = 1;
				818	char_u p = pp;
				819
				820	if (p[1] == '.')
				821	{
				822	#ifdef FEAT_MBYTE
				823	if (has_mbyte)
				824	l = mb_ptr2len_check(p + 2);
				825	#endif
				826	if (p[l + 2] == '.' && p[l + 3] == ']')
				827	{
				828	#ifdef FEAT_MBYTE
				829	if (has_mbyte)
				830	c = mb_ptr2char(p + 2);
				831	else
				832	#endif
				833	c = p[2];
				834	*pp += l + 4;
				835	return c;
				836	}
				837	}
				838	return 0;
				839	}
				840
				841
				842	/*
				843	* Skip over a "[]" range.
				844	* "p" must point to the character after the '['.
				845	* The returned pointer is on the matching ']', or the terminating NUL.
				846	*/
				847	static char_u *
				848	skip_anyof(p)
				849	char_u *p;
				850	{
				851	int cpo_lit; /* 'cpoptions' contains 'l' flag */
				852	int cpo_bsl; /* 'cpoptions' contains '\' flag */
				853	#ifdef FEAT_MBYTE
				854	int l;
				855	#endif
				856
				857	cpo_lit = (!reg_syn && vim_strchr(p_cpo, CPO_LITERAL) != NULL);
				858	cpo_bsl = (!reg_syn && vim_strchr(p_cpo, CPO_BACKSL) != NULL);
				859
				860	if (p == '^') / Complement of range. */
				861	++p;
				862	if (p == ']' \|\| p == '-')
				863	++p;
				864	while (p != NUL && p != ']')
				865	{
				866	#ifdef FEAT_MBYTE
				867	if (has_mbyte && (l = (*mb_ptr2len_check)(p)) > 1)
				868	p += l;
				869	else
				870	#endif
				871	if (*p == '-')
				872	{
				873	++p;
				874	if (p != ']' && p != NUL)
				875	mb_ptr_adv(p);
				876	}
				877	else if (*p == '\\'
				878	&& !cpo_bsl
				879	&& (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
				880	\|\| (!cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
				881	p += 2;
				882	else if (*p == '[')
				883	{
				884	if (get_char_class(&p) == CLASS_NONE
				885	&& get_equi_class(&p) == 0
				886	&& get_coll_element(&p) == 0)
				887	++p; /* It was not a class name */
				888	}
				889	else
				890	++p;
				891	}
				892
				893	return p;
				894	}
				895
				896	/*
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	897	* Skip past regular expression.
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	898	* Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	899	* Take care of characters with a backslash in front of it.
				900	* Skip strings inside [ and ].
				901	* When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
				902	* expression and change "\?" to "?". If "*newp" is not NULL the expression
				903	* is changed in-place.
				904	*/
				905	char_u *
				906	skip_regexp(startp, dirc, magic, newp)
				907	char_u *startp;
				908	int dirc;
				909	int magic;
				910	char_u **newp;
				911	{
				912	int mymagic;
				913	char_u *p = startp;
				914
				915	if (magic)
				916	mymagic = MAGIC_ON;
				917	else
				918	mymagic = MAGIC_OFF;
				919
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	920	for (; p[0] != NUL; mb_ptr_adv(p))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	921	{
				922	if (p[0] == dirc) /* found end of regexp */
				923	break;
				924	if ((p[0] == '[' && mymagic >= MAGIC_ON)
				925	\|\| (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
				926	{
				927	p = skip_anyof(p + 1);
				928	if (p[0] == NUL)
				929	break;
				930	}
				931	else if (p[0] == '\\' && p[1] != NUL)
				932	{
				933	if (dirc == '?' && newp != NULL && p[1] == '?')
				934	{
				935	/* change "\?" to "?", make a copy first. */
				936	if (*newp == NULL)
				937	{
				938	*newp = vim_strsave(startp);
				939	if (*newp != NULL)
				940	p = *newp + (p - startp);
				941	}
				942	if (*newp != NULL)
				943	mch_memmove(p, p + 1, STRLEN(p));
				944	else
				945	++p;
				946	}
				947	else
				948	++p; /* skip next character */
				949	if (*p == 'v')
				950	mymagic = MAGIC_ALL;
				951	else if (*p == 'V')
				952	mymagic = MAGIC_NONE;
				953	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	954	}
				955	return p;
				956	}
				957
				958	/*
Bram Moolenaar	86b6835	2004-12-27 21:59:20 +0000	[diff] [blame]	959	* vim_regcomp() - compile a regular expression into internal code
				960	* Returns the program in allocated space. Returns NULL for an error.
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	961	*
				962	* We can't allocate space until we know how big the compiled form will be,
				963	* but we can't compile it (and thus know how big it is) until we've got a
				964	* place to put the code. So we cheat: we compile it twice, once with code
				965	* generation turned off and size counting turned on, and once "for real".
				966	* This also means that we don't allocate space until we are sure that the
				967	* thing really will compile successfully, and we never have to move the
				968	* code and thus invalidate pointers into it. (Note that it has to be in
				969	* one piece because vim_free() must be able to free it all.)
				970	*
				971	* Whether upper/lower case is to be ignored is decided when executing the
				972	* program, it does not matter here.
				973	*
				974	* Beware that the optimization-preparation code in here knows about some
				975	* of the structure of the compiled regexp.
				976	* "re_flags": RE_MAGIC and/or RE_STRING.
				977	*/
				978	regprog_T *
				979	vim_regcomp(expr, re_flags)
				980	char_u *expr;
				981	int re_flags;
				982	{
				983	regprog_T *r;
				984	char_u *scan;
				985	char_u *longest;
				986	int len;
				987	int flags;
				988
				989	if (expr == NULL)
				990	EMSG_RET_NULL(_(e_null));
				991
				992	init_class_tab();
				993
				994	/*
				995	* First pass: determine size, legality.
				996	*/
				997	regcomp_start(expr, re_flags);
				998	regcode = JUST_CALC_SIZE;
				999	regc(REGMAGIC);
				1000	if (reg(REG_NOPAREN, &flags) == NULL)
				1001	return NULL;
				1002
				1003	/* Small enough for pointer-storage convention? */
				1004	#ifdef SMALL_MALLOC /* 16 bit storage allocation */
				1005	if (regsize >= 65536L - 256L)
				1006	EMSG_RET_NULL(_("E339: Pattern too long"));
				1007	#endif
				1008
				1009	/* Allocate space. */
				1010	r = (regprog_T *)lalloc(sizeof(regprog_T) + regsize, TRUE);
				1011	if (r == NULL)
				1012	return NULL;
				1013
				1014	/*
				1015	* Second pass: emit code.
				1016	*/
				1017	regcomp_start(expr, re_flags);
				1018	regcode = r->program;
				1019	regc(REGMAGIC);
				1020	if (reg(REG_NOPAREN, &flags) == NULL)
				1021	{
				1022	vim_free(r);
				1023	return NULL;
				1024	}
				1025
				1026	/* Dig out information for optimizations. */
				1027	r->regstart = NUL; /* Worst-case defaults. */
				1028	r->reganch = 0;
				1029	r->regmust = NULL;
				1030	r->regmlen = 0;
				1031	r->regflags = regflags;
				1032	if (flags & HASNL)
				1033	r->regflags \|= RF_HASNL;
				1034	if (flags & HASLOOKBH)
				1035	r->regflags \|= RF_LOOKBH;
				1036	#ifdef FEAT_SYN_HL
				1037	/* Remember whether this pattern has any \z specials in it. */
				1038	r->reghasz = re_has_z;
				1039	#endif
				1040	scan = r->program + 1; /* First BRANCH. */
				1041	if (OP(regnext(scan)) == END) /* Only one top-level choice. */
				1042	{
				1043	scan = OPERAND(scan);
				1044
				1045	/* Starting-point info. */
				1046	if (OP(scan) == BOL \|\| OP(scan) == RE_BOF)
				1047	{
				1048	r->reganch++;
				1049	scan = regnext(scan);
				1050	}
				1051
				1052	if (OP(scan) == EXACTLY)
				1053	{
				1054	#ifdef FEAT_MBYTE
				1055	if (has_mbyte)
				1056	r->regstart = (*mb_ptr2char)(OPERAND(scan));
				1057	else
				1058	#endif
				1059	r->regstart = *OPERAND(scan);
				1060	}
				1061	else if ((OP(scan) == BOW
				1062	\|\| OP(scan) == EOW
				1063	\|\| OP(scan) == NOTHING
				1064	\|\| OP(scan) == MOPEN + 0 \|\| OP(scan) == NOPEN
				1065	\|\| OP(scan) == MCLOSE + 0 \|\| OP(scan) == NCLOSE)
				1066	&& OP(regnext(scan)) == EXACTLY)
				1067	{
				1068	#ifdef FEAT_MBYTE
				1069	if (has_mbyte)
				1070	r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
				1071	else
				1072	#endif
				1073	r->regstart = *OPERAND(regnext(scan));
				1074	}
				1075
				1076	/*
				1077	* If there's something expensive in the r.e., find the longest
				1078	* literal string that must appear and make it the regmust. Resolve
				1079	* ties in favor of later strings, since the regstart check works
				1080	* with the beginning of the r.e. and avoiding duplication
				1081	* strengthens checking. Not a strong reason, but sufficient in the
				1082	* absence of others.
				1083	*/
				1084	/*
				1085	* When the r.e. starts with BOW, it is faster to look for a regmust
				1086	* first. Used a lot for "#" and "*" commands. (Added by mool).
				1087	*/
				1088	if ((flags & SPSTART \|\| OP(scan) == BOW \|\| OP(scan) == EOW)
				1089	&& !(flags & HASNL))
				1090	{
				1091	longest = NULL;
				1092	len = 0;
				1093	for (; scan != NULL; scan = regnext(scan))
				1094	if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
				1095	{
				1096	longest = OPERAND(scan);
				1097	len = (int)STRLEN(OPERAND(scan));
				1098	}
				1099	r->regmust = longest;
				1100	r->regmlen = len;
				1101	}
				1102	}
				1103	#ifdef DEBUG
				1104	regdump(expr, r);
				1105	#endif
				1106	return r;
				1107	}
				1108
				1109	/*
				1110	* Setup to parse the regexp. Used once to get the length and once to do it.
				1111	*/
				1112	static void
				1113	regcomp_start(expr, re_flags)
				1114	char_u *expr;
				1115	int re_flags; /* see vim_regcomp() */
				1116	{
				1117	initchr(expr);
				1118	if (re_flags & RE_MAGIC)
				1119	reg_magic = MAGIC_ON;
				1120	else
				1121	reg_magic = MAGIC_OFF;
				1122	reg_string = (re_flags & RE_STRING);
				1123
				1124	num_complex_braces = 0;
				1125	regnpar = 1;
				1126	vim_memset(had_endbrace, 0, sizeof(had_endbrace));
				1127	#ifdef FEAT_SYN_HL
				1128	regnzpar = 1;
				1129	re_has_z = 0;
				1130	#endif
				1131	regsize = 0L;
				1132	regflags = 0;
				1133	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				1134	had_eol = FALSE;
				1135	#endif
				1136	}
				1137
				1138	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				1139	/*
				1140	* Check if during the previous call to vim_regcomp the EOL item "$" has been
				1141	* found. This is messy, but it works fine.
				1142	*/
				1143	int
				1144	vim_regcomp_had_eol()
				1145	{
				1146	return had_eol;
				1147	}
				1148	#endif
				1149
				1150	/*
				1151	* reg - regular expression, i.e. main body or parenthesized thing
				1152	*
				1153	* Caller must absorb opening parenthesis.
				1154	*
				1155	* Combining parenthesis handling with the base level of regular expression
				1156	* is a trifle forced, but the need to tie the tails of the branches to what
				1157	* follows makes it hard to avoid.
				1158	*/
				1159	static char_u *
				1160	reg(paren, flagp)
				1161	int paren; /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
				1162	int *flagp;
				1163	{
				1164	char_u *ret;
				1165	char_u *br;
				1166	char_u *ender;
				1167	int parno = 0;
				1168	int flags;
				1169
				1170	flagp = HASWIDTH; / Tentatively. */
				1171
				1172	#ifdef FEAT_SYN_HL
				1173	if (paren == REG_ZPAREN)
				1174	{
				1175	/* Make a ZOPEN node. */
				1176	if (regnzpar >= NSUBEXP)
				1177	EMSG_RET_NULL(_("E50: Too many \\z("));
				1178	parno = regnzpar;
				1179	regnzpar++;
				1180	ret = regnode(ZOPEN + parno);
				1181	}
				1182	else
				1183	#endif
				1184	if (paren == REG_PAREN)
				1185	{
				1186	/* Make a MOPEN node. */
				1187	if (regnpar >= NSUBEXP)
				1188	EMSG_M_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
				1189	parno = regnpar;
				1190	++regnpar;
				1191	ret = regnode(MOPEN + parno);
				1192	}
				1193	else if (paren == REG_NPAREN)
				1194	{
				1195	/* Make a NOPEN node. */
				1196	ret = regnode(NOPEN);
				1197	}
				1198	else
				1199	ret = NULL;
				1200
				1201	/* Pick up the branches, linking them together. */
				1202	br = regbranch(&flags);
				1203	if (br == NULL)
				1204	return NULL;
				1205	if (ret != NULL)
				1206	regtail(ret, br); /* [MZ]OPEN -> first. */
				1207	else
				1208	ret = br;
				1209	/* If one of the branches can be zero-width, the whole thing can.
				1210	* If one of the branches has * at start or matches a line-break, the
				1211	* whole thing can. */
				1212	if (!(flags & HASWIDTH))
				1213	*flagp &= ~HASWIDTH;
				1214	*flagp \|= flags & (SPSTART \| HASNL \| HASLOOKBH);
				1215	while (peekchr() == Magic('\|'))
				1216	{
				1217	skipchr();
				1218	br = regbranch(&flags);
				1219	if (br == NULL)
				1220	return NULL;
				1221	regtail(ret, br); /* BRANCH -> BRANCH. */
				1222	if (!(flags & HASWIDTH))
				1223	*flagp &= ~HASWIDTH;
				1224	*flagp \|= flags & (SPSTART \| HASNL \| HASLOOKBH);
				1225	}
				1226
				1227	/* Make a closing node, and hook it on the end. */
				1228	ender = regnode(
				1229	#ifdef FEAT_SYN_HL
				1230	paren == REG_ZPAREN ? ZCLOSE + parno :
				1231	#endif
				1232	paren == REG_PAREN ? MCLOSE + parno :
				1233	paren == REG_NPAREN ? NCLOSE : END);
				1234	regtail(ret, ender);
				1235
				1236	/* Hook the tails of the branches to the closing node. */
				1237	for (br = ret; br != NULL; br = regnext(br))
				1238	regoptail(br, ender);
				1239
				1240	/* Check for proper termination. */
				1241	if (paren != REG_NOPAREN && getchr() != Magic(')'))
				1242	{
				1243	#ifdef FEAT_SYN_HL
				1244	if (paren == REG_ZPAREN)
				1245	EMSG_RET_NULL(_("E52: Unmatched \\z("))
				1246	else
				1247	#endif
				1248	if (paren == REG_NPAREN)
				1249	EMSG_M_RET_NULL(_("E53: Unmatched %s%%("), reg_magic == MAGIC_ALL)
				1250	else
				1251	EMSG_M_RET_NULL(_("E54: Unmatched %s("), reg_magic == MAGIC_ALL)
				1252	}
				1253	else if (paren == REG_NOPAREN && peekchr() != NUL)
				1254	{
				1255	if (curchr == Magic(')'))
				1256	EMSG_M_RET_NULL(_("E55: Unmatched %s)"), reg_magic == MAGIC_ALL)
				1257	else
				1258	EMSG_RET_NULL(_(e_trailing)) /* "Can't happen". */
				1259	/* NOTREACHED */
				1260	}
				1261	/*
				1262	* Here we set the flag allowing back references to this set of
				1263	* parentheses.
				1264	*/
				1265	if (paren == REG_PAREN)
				1266	had_endbrace[parno] = TRUE; /* have seen the close paren */
				1267	return ret;
				1268	}
				1269
				1270	/*
				1271	* regbranch - one alternative of an \| operator
				1272	*
				1273	* Implements the & operator.
				1274	*/
				1275	static char_u *
				1276	regbranch(flagp)
				1277	int *flagp;
				1278	{
				1279	char_u *ret;
				1280	char_u *chain = NULL;
				1281	char_u *latest;
				1282	int flags;
				1283
				1284	flagp = WORST \| HASNL; / Tentatively. */
				1285
				1286	ret = regnode(BRANCH);
				1287	for (;;)
				1288	{
				1289	latest = regconcat(&flags);
				1290	if (latest == NULL)
				1291	return NULL;
				1292	/* If one of the branches has width, the whole thing has. If one of
				1293	* the branches anchors at start-of-line, the whole thing does.
				1294	* If one of the branches uses look-behind, the whole thing does. */
				1295	*flagp \|= flags & (HASWIDTH \| SPSTART \| HASLOOKBH);
				1296	/* If one of the branches doesn't match a line-break, the whole thing
				1297	* doesn't. */
				1298	*flagp &= ~HASNL \| (flags & HASNL);
				1299	if (chain != NULL)
				1300	regtail(chain, latest);
				1301	if (peekchr() != Magic('&'))
				1302	break;
				1303	skipchr();
				1304	regtail(latest, regnode(END)); /* operand ends */
				1305	reginsert(MATCH, latest);
				1306	chain = latest;
				1307	}
				1308
				1309	return ret;
				1310	}
				1311
				1312	/*
				1313	* regbranch - one alternative of an \| or & operator
				1314	*
				1315	* Implements the concatenation operator.
				1316	*/
				1317	static char_u *
				1318	regconcat(flagp)
				1319	int *flagp;
				1320	{
				1321	char_u *first = NULL;
				1322	char_u *chain = NULL;
				1323	char_u *latest;
				1324	int flags;
				1325	int cont = TRUE;
				1326
				1327	flagp = WORST; / Tentatively. */
				1328
				1329	while (cont)
				1330	{
				1331	switch (peekchr())
				1332	{
				1333	case NUL:
				1334	case Magic('\|'):
				1335	case Magic('&'):
				1336	case Magic(')'):
				1337	cont = FALSE;
				1338	break;
				1339	case Magic('Z'):
				1340	#ifdef FEAT_MBYTE
				1341	regflags \|= RF_ICOMBINE;
				1342	#endif
				1343	skipchr_keepstart();
				1344	break;
				1345	case Magic('c'):
				1346	regflags \|= RF_ICASE;
				1347	skipchr_keepstart();
				1348	break;
				1349	case Magic('C'):
				1350	regflags \|= RF_NOICASE;
				1351	skipchr_keepstart();
				1352	break;
				1353	case Magic('v'):
				1354	reg_magic = MAGIC_ALL;
				1355	skipchr_keepstart();
				1356	curchr = -1;
				1357	break;
				1358	case Magic('m'):
				1359	reg_magic = MAGIC_ON;
				1360	skipchr_keepstart();
				1361	curchr = -1;
				1362	break;
				1363	case Magic('M'):
				1364	reg_magic = MAGIC_OFF;
				1365	skipchr_keepstart();
				1366	curchr = -1;
				1367	break;
				1368	case Magic('V'):
				1369	reg_magic = MAGIC_NONE;
				1370	skipchr_keepstart();
				1371	curchr = -1;
				1372	break;
				1373	default:
				1374	latest = regpiece(&flags);
				1375	if (latest == NULL)
				1376	return NULL;
				1377	*flagp \|= flags & (HASWIDTH \| HASNL \| HASLOOKBH);
				1378	if (chain == NULL) /* First piece. */
				1379	*flagp \|= flags & SPSTART;
				1380	else
				1381	regtail(chain, latest);
				1382	chain = latest;
				1383	if (first == NULL)
				1384	first = latest;
				1385	break;
				1386	}
				1387	}
				1388	if (first == NULL) /* Loop ran zero times. */
				1389	first = regnode(NOTHING);
				1390	return first;
				1391	}
				1392
				1393	/*
				1394	* regpiece - something followed by possible [*+=]
				1395	*
				1396	* Note that the branching code sequences used for = and the general cases
				1397	* of * and + are somewhat optimized: they use the same NOTHING node as
				1398	* both the endmarker for their branch list and the body of the last branch.
				1399	* It might seem that this node could be dispensed with entirely, but the
				1400	* endmarker role is not redundant.
				1401	*/
				1402	static char_u *
				1403	regpiece(flagp)
				1404	int *flagp;
				1405	{
				1406	char_u *ret;
				1407	int op;
				1408	char_u *next;
				1409	int flags;
				1410	long minval;
				1411	long maxval;
				1412
				1413	ret = regatom(&flags);
				1414	if (ret == NULL)
				1415	return NULL;
				1416
				1417	op = peekchr();
				1418	if (re_multi_type(op) == NOT_MULTI)
				1419	{
				1420	*flagp = flags;
				1421	return ret;
				1422	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1423	/* default flags */
				1424	*flagp = (WORST \| SPSTART \| (flags & (HASNL \| HASLOOKBH)));
				1425
				1426	skipchr();
				1427	switch (op)
				1428	{
				1429	case Magic('*'):
				1430	if (flags & SIMPLE)
				1431	reginsert(STAR, ret);
				1432	else
				1433	{
				1434	/* Emit x* as (x&\|), where & means "self". */
				1435	reginsert(BRANCH, ret); /* Either x */
				1436	regoptail(ret, regnode(BACK)); /* and loop */
				1437	regoptail(ret, ret); /* back */
				1438	regtail(ret, regnode(BRANCH)); /* or */
				1439	regtail(ret, regnode(NOTHING)); /* null. */
				1440	}
				1441	break;
				1442
				1443	case Magic('+'):
				1444	if (flags & SIMPLE)
				1445	reginsert(PLUS, ret);
				1446	else
				1447	{
				1448	/* Emit x+ as x(&\|), where & means "self". */
				1449	next = regnode(BRANCH); /* Either */
				1450	regtail(ret, next);
				1451	regtail(regnode(BACK), ret); /* loop back */
				1452	regtail(next, regnode(BRANCH)); /* or */
				1453	regtail(ret, regnode(NOTHING)); /* null. */
				1454	}
				1455	*flagp = (WORST \| HASWIDTH \| (flags & (HASNL \| HASLOOKBH)));
				1456	break;
				1457
				1458	case Magic('@'):
				1459	{
				1460	int lop = END;
				1461
				1462	switch (no_Magic(getchr()))
				1463	{
				1464	case '=': lop = MATCH; break; /* \@= */
				1465	case '!': lop = NOMATCH; break; /* \@! */
				1466	case '>': lop = SUBPAT; break; /* \@> */
				1467	case '<': switch (no_Magic(getchr()))
				1468	{
				1469	case '=': lop = BEHIND; break; /* \@<= */
				1470	case '!': lop = NOBEHIND; break; /* \@<! */
				1471	}
				1472	}
				1473	if (lop == END)
				1474	EMSG_M_RET_NULL(_("E59: invalid character after %s@"),
				1475	reg_magic == MAGIC_ALL);
				1476	/* Look behind must match with behind_pos. */
				1477	if (lop == BEHIND \|\| lop == NOBEHIND)
				1478	{
				1479	regtail(ret, regnode(BHPOS));
				1480	*flagp \|= HASLOOKBH;
				1481	}
				1482	regtail(ret, regnode(END)); /* operand ends */
				1483	reginsert(lop, ret);
				1484	break;
				1485	}
				1486
				1487	case Magic('?'):
				1488	case Magic('='):
				1489	/* Emit x= as (x\|) */
				1490	reginsert(BRANCH, ret); /* Either x */
				1491	regtail(ret, regnode(BRANCH)); /* or */
				1492	next = regnode(NOTHING); /* null. */
				1493	regtail(ret, next);
				1494	regoptail(ret, next);
				1495	break;
				1496
				1497	case Magic('{'):
				1498	if (!read_limits(&minval, &maxval))
				1499	return NULL;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1500	if (flags & SIMPLE)
				1501	{
				1502	reginsert(BRACE_SIMPLE, ret);
				1503	reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
				1504	}
				1505	else
				1506	{
				1507	if (num_complex_braces >= 10)
				1508	EMSG_M_RET_NULL(_("E60: Too many complex %s{...}s"),
				1509	reg_magic == MAGIC_ALL);
				1510	reginsert(BRACE_COMPLEX + num_complex_braces, ret);
				1511	regoptail(ret, regnode(BACK));
				1512	regoptail(ret, ret);
				1513	reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
				1514	++num_complex_braces;
				1515	}
				1516	if (minval > 0 && maxval > 0)
				1517	*flagp = (HASWIDTH \| (flags & (HASNL \| HASLOOKBH)));
				1518	break;
				1519	}
				1520	if (re_multi_type(peekchr()) != NOT_MULTI)
				1521	{
				1522	/* Can't have a multi follow a multi. */
				1523	if (peekchr() == Magic('*'))
				1524	sprintf((char )IObuff, _("E61: Nested %s"),
				1525	reg_magic >= MAGIC_ON ? "" : "\\");
				1526	else
				1527	sprintf((char *)IObuff, _("E62: Nested %s%c"),
				1528	reg_magic == MAGIC_ALL ? "" : "\\", no_Magic(peekchr()));
				1529	EMSG_RET_NULL(IObuff);
				1530	}
				1531
				1532	return ret;
				1533	}
				1534
				1535	/*
				1536	* regatom - the lowest level
				1537	*
				1538	* Optimization: gobbles an entire sequence of ordinary characters so that
				1539	* it can turn them into a single node, which is smaller to store and
				1540	* faster to run. Don't do this when one_exactly is set.
				1541	*/
				1542	static char_u *
				1543	regatom(flagp)
				1544	int *flagp;
				1545	{
				1546	char_u *ret;
				1547	int flags;
				1548	int cpo_lit; /* 'cpoptions' contains 'l' flag */
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	1549	int cpo_bsl; /* 'cpoptions' contains '\' flag */
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1550	int c;
				1551	static char_u classchars = (char_u )".iIkKfFpPsSdDxXoOwWhHaAlLuU";
				1552	static int classcodes[] = {ANY, IDENT, SIDENT, KWORD, SKWORD,
				1553	FNAME, SFNAME, PRINT, SPRINT,
				1554	WHITE, NWHITE, DIGIT, NDIGIT,
				1555	HEX, NHEX, OCTAL, NOCTAL,
				1556	WORD, NWORD, HEAD, NHEAD,
				1557	ALPHA, NALPHA, LOWER, NLOWER,
				1558	UPPER, NUPPER
				1559	};
				1560	char_u *p;
				1561	int extra = 0;
				1562
				1563	flagp = WORST; / Tentatively. */
				1564	cpo_lit = (!reg_syn && vim_strchr(p_cpo, CPO_LITERAL) != NULL);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	1565	cpo_bsl = (!reg_syn && vim_strchr(p_cpo, CPO_BACKSL) != NULL);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1566
				1567	c = getchr();
				1568	switch (c)
				1569	{
				1570	case Magic('^'):
				1571	ret = regnode(BOL);
				1572	break;
				1573
				1574	case Magic('$'):
				1575	ret = regnode(EOL);
				1576	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				1577	had_eol = TRUE;
				1578	#endif
				1579	break;
				1580
				1581	case Magic('<'):
				1582	ret = regnode(BOW);
				1583	break;
				1584
				1585	case Magic('>'):
				1586	ret = regnode(EOW);
				1587	break;
				1588
				1589	case Magic('_'):
				1590	c = no_Magic(getchr());
				1591	if (c == '^') /* "\_^" is start-of-line */
				1592	{
				1593	ret = regnode(BOL);
				1594	break;
				1595	}
				1596	if (c == '$') /* "\_$" is end-of-line */
				1597	{
				1598	ret = regnode(EOL);
				1599	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				1600	had_eol = TRUE;
				1601	#endif
				1602	break;
				1603	}
				1604
				1605	extra = ADD_NL;
				1606	*flagp \|= HASNL;
				1607
				1608	/* "\_[" is character range plus newline */
				1609	if (c == '[')
				1610	goto collection;
				1611
				1612	/* "\_x" is character class plus newline */
				1613	/FALLTHROUGH/
				1614
				1615	/*
				1616	* Character classes.
				1617	*/
				1618	case Magic('.'):
				1619	case Magic('i'):
				1620	case Magic('I'):
				1621	case Magic('k'):
				1622	case Magic('K'):
				1623	case Magic('f'):
				1624	case Magic('F'):
				1625	case Magic('p'):
				1626	case Magic('P'):
				1627	case Magic('s'):
				1628	case Magic('S'):
				1629	case Magic('d'):
				1630	case Magic('D'):
				1631	case Magic('x'):
				1632	case Magic('X'):
				1633	case Magic('o'):
				1634	case Magic('O'):
				1635	case Magic('w'):
				1636	case Magic('W'):
				1637	case Magic('h'):
				1638	case Magic('H'):
				1639	case Magic('a'):
				1640	case Magic('A'):
				1641	case Magic('l'):
				1642	case Magic('L'):
				1643	case Magic('u'):
				1644	case Magic('U'):
				1645	p = vim_strchr(classchars, no_Magic(c));
				1646	if (p == NULL)
				1647	EMSG_RET_NULL(_("E63: invalid use of \\_"));
				1648	ret = regnode(classcodes[p - classchars] + extra);
				1649	*flagp \|= HASWIDTH \| SIMPLE;
				1650	break;
				1651
				1652	case Magic('n'):
				1653	if (reg_string)
				1654	{
				1655	/* In a string "\n" matches a newline character. */
				1656	ret = regnode(EXACTLY);
				1657	regc(NL);
				1658	regc(NUL);
				1659	*flagp \|= HASWIDTH \| SIMPLE;
				1660	}
				1661	else
				1662	{
				1663	/* In buffer text "\n" matches the end of a line. */
				1664	ret = regnode(NEWL);
				1665	*flagp \|= HASWIDTH \| HASNL;
				1666	}
				1667	break;
				1668
				1669	case Magic('('):
				1670	if (one_exactly)
				1671	EMSG_ONE_RET_NULL;
				1672	ret = reg(REG_PAREN, &flags);
				1673	if (ret == NULL)
				1674	return NULL;
				1675	*flagp \|= flags & (HASWIDTH \| SPSTART \| HASNL \| HASLOOKBH);
				1676	break;
				1677
				1678	case NUL:
				1679	case Magic('\|'):
				1680	case Magic('&'):
				1681	case Magic(')'):
				1682	EMSG_RET_NULL(_(e_internal)); /* Supposed to be caught earlier. */
				1683	/* NOTREACHED */
				1684
				1685	case Magic('='):
				1686	case Magic('?'):
				1687	case Magic('+'):
				1688	case Magic('@'):
				1689	case Magic('{'):
				1690	case Magic('*'):
				1691	c = no_Magic(c);
				1692	sprintf((char *)IObuff, _("E64: %s%c follows nothing"),
				1693	(c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL)
				1694	? "" : "\\", c);
				1695	EMSG_RET_NULL(IObuff);
				1696	/* NOTREACHED */
				1697
				1698	case Magic('~'): /* previous substitute pattern */
				1699	if (reg_prev_sub)
				1700	{
				1701	char_u *lp;
				1702
				1703	ret = regnode(EXACTLY);
				1704	lp = reg_prev_sub;
				1705	while (*lp != NUL)
				1706	regc(*lp++);
				1707	regc(NUL);
				1708	if (*reg_prev_sub != NUL)
				1709	{
				1710	*flagp \|= HASWIDTH;
				1711	if ((lp - reg_prev_sub) == 1)
				1712	*flagp \|= SIMPLE;
				1713	}
				1714	}
				1715	else
				1716	EMSG_RET_NULL(_(e_nopresub));
				1717	break;
				1718
				1719	case Magic('1'):
				1720	case Magic('2'):
				1721	case Magic('3'):
				1722	case Magic('4'):
				1723	case Magic('5'):
				1724	case Magic('6'):
				1725	case Magic('7'):
				1726	case Magic('8'):
				1727	case Magic('9'):
				1728	{
				1729	int refnum;
				1730
				1731	refnum = c - Magic('0');
				1732	/*
				1733	* Check if the back reference is legal. We must have seen the
				1734	* close brace.
				1735	* TODO: Should also check that we don't refer to something
				1736	* that is repeated (+*=): what instance of the repetition
				1737	* should we match?
				1738	*/
				1739	if (!had_endbrace[refnum])
				1740	{
				1741	/* Trick: check if "@<=" or "@<!" follows, in which case
				1742	* the \1 can appear before the referenced match. */
				1743	for (p = regparse; *p != NUL; ++p)
				1744	if (p[0] == '@' && p[1] == '<'
				1745	&& (p[2] == '!' \|\| p[2] == '='))
				1746	break;
				1747	if (*p == NUL)
				1748	EMSG_RET_NULL(_("E65: Illegal back reference"));
				1749	}
				1750	ret = regnode(BACKREF + refnum);
				1751	}
				1752	break;
				1753
				1754	#ifdef FEAT_SYN_HL
				1755	case Magic('z'):
				1756	{
				1757	c = no_Magic(getchr());
				1758	switch (c)
				1759	{
				1760	case '(': if (reg_do_extmatch != REX_SET)
				1761	EMSG_RET_NULL(_("E66: \\z( not allowed here"));
				1762	if (one_exactly)
				1763	EMSG_ONE_RET_NULL;
				1764	ret = reg(REG_ZPAREN, &flags);
				1765	if (ret == NULL)
				1766	return NULL;
				1767	*flagp \|= flags & (HASWIDTH\|SPSTART\|HASNL\|HASLOOKBH);
				1768	re_has_z = REX_SET;
				1769	break;
				1770
				1771	case '1':
				1772	case '2':
				1773	case '3':
				1774	case '4':
				1775	case '5':
				1776	case '6':
				1777	case '7':
				1778	case '8':
				1779	case '9': if (reg_do_extmatch != REX_USE)
				1780	EMSG_RET_NULL(_("E67: \\z1 et al. not allowed here"));
				1781	ret = regnode(ZREF + c - '0');
				1782	re_has_z = REX_USE;
				1783	break;
				1784
				1785	case 's': ret = regnode(MOPEN + 0);
				1786	break;
				1787
				1788	case 'e': ret = regnode(MCLOSE + 0);
				1789	break;
				1790
				1791	default: EMSG_RET_NULL(_("E68: Invalid character after \\z"));
				1792	}
				1793	}
				1794	break;
				1795	#endif
				1796
				1797	case Magic('%'):
				1798	{
				1799	c = no_Magic(getchr());
				1800	switch (c)
				1801	{
				1802	/* () without a back reference */
				1803	case '(':
				1804	if (one_exactly)
				1805	EMSG_ONE_RET_NULL;
				1806	ret = reg(REG_NPAREN, &flags);
				1807	if (ret == NULL)
				1808	return NULL;
				1809	*flagp \|= flags & (HASWIDTH \| SPSTART \| HASNL \| HASLOOKBH);
				1810	break;
				1811
				1812	/* Catch \%^ and \%$ regardless of where they appear in the
				1813	* pattern -- regardless of whether or not it makes sense. */
				1814	case '^':
				1815	ret = regnode(RE_BOF);
				1816	break;
				1817
				1818	case '$':
				1819	ret = regnode(RE_EOF);
				1820	break;
				1821
				1822	case '#':
				1823	ret = regnode(CURSOR);
				1824	break;
				1825
				1826	/* \%[abc]: Emit as a list of branches, all ending at the last
				1827	* branch which matches nothing. */
				1828	case '[':
				1829	if (one_exactly) /* doesn't nest */
				1830	EMSG_ONE_RET_NULL;
				1831	{
				1832	char_u *lastbranch;
				1833	char_u *lastnode = NULL;
				1834	char_u *br;
				1835
				1836	ret = NULL;
				1837	while ((c = getchr()) != ']')
				1838	{
				1839	if (c == NUL)
				1840	EMSG_M_RET_NULL(_("E69: Missing ] after %s%%["),
				1841	reg_magic == MAGIC_ALL);
				1842	br = regnode(BRANCH);
				1843	if (ret == NULL)
				1844	ret = br;
				1845	else
				1846	regtail(lastnode, br);
				1847
				1848	ungetchr();
				1849	one_exactly = TRUE;
				1850	lastnode = regatom(flagp);
				1851	one_exactly = FALSE;
				1852	if (lastnode == NULL)
				1853	return NULL;
				1854	}
				1855	if (ret == NULL)
				1856	EMSG_M_RET_NULL(_("E70: Empty %s%%[]"),
				1857	reg_magic == MAGIC_ALL);
				1858	lastbranch = regnode(BRANCH);
				1859	br = regnode(NOTHING);
				1860	if (ret != JUST_CALC_SIZE)
				1861	{
				1862	regtail(lastnode, br);
				1863	regtail(lastbranch, br);
				1864	/* connect all branches to the NOTHING
				1865	* branch at the end */
				1866	for (br = ret; br != lastnode; )
				1867	{
				1868	if (OP(br) == BRANCH)
				1869	{
				1870	regtail(br, lastbranch);
				1871	br = OPERAND(br);
				1872	}
				1873	else
				1874	br = regnext(br);
				1875	}
				1876	}
				1877	*flagp &= ~HASWIDTH;
				1878	break;
				1879	}
				1880
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	1881	case 'd': /* %d123 decimal */
				1882	case 'o': /* %o123 octal */
				1883	case 'x': /* %xab hex 2 */
				1884	case 'u': /* %uabcd hex 4 */
				1885	case 'U': /* %U1234abcd hex 8 */
				1886	{
				1887	int i;
				1888
				1889	switch (c)
				1890	{
				1891	case 'd': i = getdecchrs(); break;
				1892	case 'o': i = getoctchrs(); break;
				1893	case 'x': i = gethexchrs(2); break;
				1894	case 'u': i = gethexchrs(4); break;
				1895	case 'U': i = gethexchrs(8); break;
				1896	default: i = -1; break;
				1897	}
				1898
				1899	if (i < 0)
				1900	EMSG_M_RET_NULL(
				1901	_("E678: Invalid character after %s%%[dxouU]"),
				1902	reg_magic == MAGIC_ALL);
				1903	ret = regnode(EXACTLY);
				1904	if (i == 0)
				1905	regc(0x0a);
				1906	else
				1907	#ifdef FEAT_MBYTE
				1908	regmbc(i);
				1909	#else
				1910	regc(i);
				1911	#endif
				1912	regc(NUL);
				1913	*flagp \|= HASWIDTH;
				1914	break;
				1915	}
				1916
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1917	default:
				1918	if (VIM_ISDIGIT(c) \|\| c == '<' \|\| c == '>')
				1919	{
				1920	long_u n = 0;
				1921	int cmp;
				1922
				1923	cmp = c;
				1924	if (cmp == '<' \|\| cmp == '>')
				1925	c = getchr();
				1926	while (VIM_ISDIGIT(c))
				1927	{
				1928	n = n * 10 + (c - '0');
				1929	c = getchr();
				1930	}
				1931	if (c == 'l' \|\| c == 'c' \|\| c == 'v')
				1932	{
				1933	if (c == 'l')
				1934	ret = regnode(RE_LNUM);
				1935	else if (c == 'c')
				1936	ret = regnode(RE_COL);
				1937	else
				1938	ret = regnode(RE_VCOL);
				1939	if (ret == JUST_CALC_SIZE)
				1940	regsize += 5;
				1941	else
				1942	{
				1943	/* put the number and the optional
				1944	* comparator after the opcode */
				1945	regcode = re_put_long(regcode, n);
				1946	*regcode++ = cmp;
				1947	}
				1948	break;
				1949	}
				1950	}
				1951
				1952	EMSG_M_RET_NULL(_("E71: Invalid character after %s%%"),
				1953	reg_magic == MAGIC_ALL);
				1954	}
				1955	}
				1956	break;
				1957
				1958	case Magic('['):
				1959	collection:
				1960	{
				1961	char_u *lp;
				1962
				1963	/*
				1964	* If there is no matching ']', we assume the '[' is a normal
				1965	* character. This makes 'incsearch' and ":help [" work.
				1966	*/
				1967	lp = skip_anyof(regparse);
				1968	if (lp == ']') / there is a matching ']' */
				1969	{
				1970	int startc = -1; /* > 0 when next '-' is a range */
				1971	int endc;
				1972
				1973	/*
				1974	* In a character class, different parsing rules apply.
				1975	* Not even \ is special anymore, nothing is.
				1976	*/
				1977	if (regparse == '^') / Complement of range. */
				1978	{
				1979	ret = regnode(ANYBUT + extra);
				1980	regparse++;
				1981	}
				1982	else
				1983	ret = regnode(ANYOF + extra);
				1984
				1985	/* At the start ']' and '-' mean the literal character. */
				1986	if (regparse == ']' \|\| regparse == '-')
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	1987	{
				1988	startc = *regparse;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1989	regc(*regparse++);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	1990	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1991
				1992	while (regparse != NUL && regparse != ']')
				1993	{
				1994	if (*regparse == '-')
				1995	{
				1996	++regparse;
				1997	/* The '-' is not used for a range at the end and
				1998	* after or before a '\n'. */
				1999	if (regparse == ']' \|\| regparse == NUL
				2000	\|\| startc == -1
				2001	\|\| (regparse[0] == '\\' && regparse[1] == 'n'))
				2002	{
				2003	regc('-');
				2004	startc = '-'; /* [--x] is a range */
				2005	}
				2006	else
				2007	{
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2008	/* Also accept "a-[.z.]" */
				2009	endc = 0;
				2010	if (*regparse == '[')
				2011	endc = get_coll_element(&regparse);
				2012	if (endc == 0)
				2013	{
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2014	#ifdef FEAT_MBYTE
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2015	if (has_mbyte)
				2016	endc = mb_ptr2char_adv(&regparse);
				2017	else
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2018	#endif
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2019	endc = *regparse++;
				2020	}
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	2021
				2022	/* Handle \o40, \x20 and \u20AC style sequences */
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2023	if (endc == '\\' && !cpo_lit && !cpo_bsl)
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	2024	endc = coll_get_char();
				2025
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2026	if (startc > endc)
				2027	EMSG_RET_NULL(_(e_invrange));
				2028	#ifdef FEAT_MBYTE
				2029	if (has_mbyte && ((*mb_char2len)(startc) > 1
				2030	\|\| (*mb_char2len)(endc) > 1))
				2031	{
				2032	/* Limit to a range of 256 chars */
				2033	if (endc > startc + 256)
				2034	EMSG_RET_NULL(_(e_invrange));
				2035	while (++startc <= endc)
				2036	regmbc(startc);
				2037	}
				2038	else
				2039	#endif
				2040	{
				2041	#ifdef EBCDIC
				2042	int alpha_only = FALSE;
				2043
				2044	/* for alphabetical range skip the gaps
				2045	* 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'. */
				2046	if (isalpha(startc) && isalpha(endc))
				2047	alpha_only = TRUE;
				2048	#endif
				2049	while (++startc <= endc)
				2050	#ifdef EBCDIC
				2051	if (!alpha_only \|\| isalpha(startc))
				2052	#endif
				2053	regc(startc);
				2054	}
				2055	startc = -1;
				2056	}
				2057	}
				2058	/*
				2059	* Only "\]", "\^", "\]" and "\\" are special in Vi. Vim
				2060	* accepts "\t", "\e", etc., but only when the 'l' flag in
				2061	* 'cpoptions' is not included.
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2062	* Posix doesn't recognize backslash at all.
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2063	*/
				2064	else if (*regparse == '\\'
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2065	&& !cpo_bsl
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2066	&& (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
				2067	\|\| (!cpo_lit
				2068	&& vim_strchr(REGEXP_ABBR,
				2069	regparse[1]) != NULL)))
				2070	{
				2071	regparse++;
				2072	if (*regparse == 'n')
				2073	{
				2074	/* '\n' in range: also match NL */
				2075	if (ret != JUST_CALC_SIZE)
				2076	{
				2077	if (*ret == ANYBUT)
				2078	*ret = ANYBUT + ADD_NL;
				2079	else if (*ret == ANYOF)
				2080	*ret = ANYOF + ADD_NL;
				2081	/* else: must have had a \n already */
				2082	}
				2083	*flagp \|= HASNL;
				2084	regparse++;
				2085	startc = -1;
				2086	}
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	2087	else if (*regparse == 'd'
				2088	\|\| *regparse == 'o'
				2089	\|\| *regparse == 'x'
				2090	\|\| *regparse == 'u'
				2091	\|\| *regparse == 'U')
				2092	{
				2093	startc = coll_get_char();
				2094	if (startc == 0)
				2095	regc(0x0a);
				2096	else
				2097	#ifdef FEAT_MBYTE
				2098	regmbc(startc);
				2099	#else
				2100	regc(startc);
				2101	#endif
				2102	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2103	else
				2104	{
				2105	startc = backslash_trans(*regparse++);
				2106	regc(startc);
				2107	}
				2108	}
				2109	else if (*regparse == '[')
				2110	{
				2111	int c_class;
				2112	int cu;
				2113
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2114	c_class = get_char_class(&regparse);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2115	startc = -1;
				2116	/* Characters assumed to be 8 bits! */
				2117	switch (c_class)
				2118	{
				2119	case CLASS_NONE:
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2120	c_class = get_equi_class(&regparse);
				2121	if (c_class != 0)
				2122	{
				2123	/* produce equivalence class */
				2124	reg_equi_class(c_class);
				2125	}
				2126	else if ((c_class =
				2127	get_coll_element(&regparse)) != 0)
				2128	{
				2129	/* produce a collating element */
				2130	regmbc(c_class);
				2131	}
				2132	else
				2133	{
				2134	/* literal '[', allow [[-x] as a range */
				2135	startc = *regparse++;
				2136	regc(startc);
				2137	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2138	break;
				2139	case CLASS_ALNUM:
				2140	for (cu = 1; cu <= 255; cu++)
				2141	if (isalnum(cu))
				2142	regc(cu);
				2143	break;
				2144	case CLASS_ALPHA:
				2145	for (cu = 1; cu <= 255; cu++)
				2146	if (isalpha(cu))
				2147	regc(cu);
				2148	break;
				2149	case CLASS_BLANK:
				2150	regc(' ');
				2151	regc('\t');
				2152	break;
				2153	case CLASS_CNTRL:
				2154	for (cu = 1; cu <= 255; cu++)
				2155	if (iscntrl(cu))
				2156	regc(cu);
				2157	break;
				2158	case CLASS_DIGIT:
				2159	for (cu = 1; cu <= 255; cu++)
				2160	if (VIM_ISDIGIT(cu))
				2161	regc(cu);
				2162	break;
				2163	case CLASS_GRAPH:
				2164	for (cu = 1; cu <= 255; cu++)
				2165	if (isgraph(cu))
				2166	regc(cu);
				2167	break;
				2168	case CLASS_LOWER:
				2169	for (cu = 1; cu <= 255; cu++)
				2170	if (islower(cu))
				2171	regc(cu);
				2172	break;
				2173	case CLASS_PRINT:
				2174	for (cu = 1; cu <= 255; cu++)
				2175	if (vim_isprintc(cu))
				2176	regc(cu);
				2177	break;
				2178	case CLASS_PUNCT:
				2179	for (cu = 1; cu <= 255; cu++)
				2180	if (ispunct(cu))
				2181	regc(cu);
				2182	break;
				2183	case CLASS_SPACE:
				2184	for (cu = 9; cu <= 13; cu++)
				2185	regc(cu);
				2186	regc(' ');
				2187	break;
				2188	case CLASS_UPPER:
				2189	for (cu = 1; cu <= 255; cu++)
				2190	if (isupper(cu))
				2191	regc(cu);
				2192	break;
				2193	case CLASS_XDIGIT:
				2194	for (cu = 1; cu <= 255; cu++)
				2195	if (vim_isxdigit(cu))
				2196	regc(cu);
				2197	break;
				2198	case CLASS_TAB:
				2199	regc('\t');
				2200	break;
				2201	case CLASS_RETURN:
				2202	regc('\r');
				2203	break;
				2204	case CLASS_BACKSPACE:
				2205	regc('\b');
				2206	break;
				2207	case CLASS_ESCAPE:
				2208	regc('\033');
				2209	break;
				2210	}
				2211	}
				2212	else
				2213	{
				2214	#ifdef FEAT_MBYTE
				2215	if (has_mbyte)
				2216	{
				2217	int len;
				2218
				2219	/* produce a multibyte character, including any
				2220	* following composing characters */
				2221	startc = mb_ptr2char(regparse);
				2222	len = (*mb_ptr2len_check)(regparse);
				2223	if (enc_utf8 && utf_char2len(startc) != len)
				2224	startc = -1; /* composing chars */
				2225	while (--len >= 0)
				2226	regc(*regparse++);
				2227	}
				2228	else
				2229	#endif
				2230	{
				2231	startc = *regparse++;
				2232	regc(startc);
				2233	}
				2234	}
				2235	}
				2236	regc(NUL);
				2237	prevchr_len = 1; /* last char was the ']' */
				2238	if (*regparse != ']')
				2239	EMSG_RET_NULL(_(e_toomsbra)); /* Cannot happen? */
				2240	skipchr(); /* let's be friends with the lexer again */
				2241	*flagp \|= HASWIDTH \| SIMPLE;
				2242	break;
				2243	}
				2244	}
				2245	/* FALLTHROUGH */
				2246
				2247	default:
				2248	{
				2249	int len;
				2250
				2251	#ifdef FEAT_MBYTE
				2252	/* A multi-byte character is handled as a separate atom if it's
				2253	* before a multi. */
				2254	if (has_mbyte && (*mb_char2len)(c) > 1
				2255	&& re_multi_type(peekchr()) != NOT_MULTI)
				2256	{
				2257	ret = regnode(MULTIBYTECODE);
				2258	regmbc(c);
				2259	*flagp \|= HASWIDTH \| SIMPLE;
				2260	break;
				2261	}
				2262	#endif
				2263
				2264	ret = regnode(EXACTLY);
				2265
				2266	/*
				2267	* Append characters as long as:
				2268	* - there is no following multi, we then need the character in
				2269	* front of it as a single character operand
				2270	* - not running into a Magic character
				2271	* - "one_exactly" is not set
				2272	* But always emit at least one character. Might be a Multi,
				2273	* e.g., a "[" without matching "]".
				2274	*/
				2275	for (len = 0; c != NUL && (len == 0
				2276	\|\| (re_multi_type(peekchr()) == NOT_MULTI
				2277	&& !one_exactly
				2278	&& !is_Magic(c))); ++len)
				2279	{
				2280	c = no_Magic(c);
				2281	#ifdef FEAT_MBYTE
				2282	if (has_mbyte)
				2283	{
				2284	regmbc(c);
				2285	if (enc_utf8)
				2286	{
				2287	int off;
				2288	int l;
				2289
				2290	/* Need to get composing character too, directly
				2291	* access regparse for that, because skipchr() skips
				2292	* over composing chars. */
				2293	ungetchr();
				2294	if (*regparse == '\\' && regparse[1] != NUL)
				2295	off = 1;
				2296	else
				2297	off = 0;
				2298	for (;;)
				2299	{
				2300	l = utf_ptr2len_check(regparse + off);
				2301	if (!UTF_COMPOSINGLIKE(regparse + off,
				2302	regparse + off + l))
				2303	break;
				2304	off += l;
				2305	regmbc(utf_ptr2char(regparse + off));
				2306	}
				2307	skipchr();
				2308	}
				2309	}
				2310	else
				2311	#endif
				2312	regc(c);
				2313	c = getchr();
				2314	}
				2315	ungetchr();
				2316
				2317	regc(NUL);
				2318	*flagp \|= HASWIDTH;
				2319	if (len == 1)
				2320	*flagp \|= SIMPLE;
				2321	}
				2322	break;
				2323	}
				2324
				2325	return ret;
				2326	}
				2327
				2328	/*
				2329	* emit a node
				2330	* Return pointer to generated code.
				2331	*/
				2332	static char_u *
				2333	regnode(op)
				2334	int op;
				2335	{
				2336	char_u *ret;
				2337
				2338	ret = regcode;
				2339	if (ret == JUST_CALC_SIZE)
				2340	regsize += 3;
				2341	else
				2342	{
				2343	*regcode++ = op;
				2344	regcode++ = NUL; / Null "next" pointer. */
				2345	*regcode++ = NUL;
				2346	}
				2347	return ret;
				2348	}
				2349
				2350	/*
				2351	* Emit (if appropriate) a byte of code
				2352	*/
				2353	static void
				2354	regc(b)
				2355	int b;
				2356	{
				2357	if (regcode == JUST_CALC_SIZE)
				2358	regsize++;
				2359	else
				2360	*regcode++ = b;
				2361	}
				2362
				2363	#ifdef FEAT_MBYTE
				2364	/*
				2365	* Emit (if appropriate) a multi-byte character of code
				2366	*/
				2367	static void
				2368	regmbc(c)
				2369	int c;
				2370	{
				2371	if (regcode == JUST_CALC_SIZE)
				2372	regsize += (*mb_char2len)(c);
				2373	else
				2374	regcode += (*mb_char2bytes)(c, regcode);
				2375	}
				2376	#endif
				2377
				2378	/*
				2379	* reginsert - insert an operator in front of already-emitted operand
				2380	*
				2381	* Means relocating the operand.
				2382	*/
				2383	static void
				2384	reginsert(op, opnd)
				2385	int op;
				2386	char_u *opnd;
				2387	{
				2388	char_u *src;
				2389	char_u *dst;
				2390	char_u *place;
				2391
				2392	if (regcode == JUST_CALC_SIZE)
				2393	{
				2394	regsize += 3;
				2395	return;
				2396	}
				2397	src = regcode;
				2398	regcode += 3;
				2399	dst = regcode;
				2400	while (src > opnd)
				2401	--dst = --src;
				2402
				2403	place = opnd; /* Op node, where operand used to be. */
				2404	*place++ = op;
				2405	*place++ = NUL;
				2406	*place = NUL;
				2407	}
				2408
				2409	/*
				2410	* reginsert_limits - insert an operator in front of already-emitted operand.
				2411	* The operator has the given limit values as operands. Also set next pointer.
				2412	*
				2413	* Means relocating the operand.
				2414	*/
				2415	static void
				2416	reginsert_limits(op, minval, maxval, opnd)
				2417	int op;
				2418	long minval;
				2419	long maxval;
				2420	char_u *opnd;
				2421	{
				2422	char_u *src;
				2423	char_u *dst;
				2424	char_u *place;
				2425
				2426	if (regcode == JUST_CALC_SIZE)
				2427	{
				2428	regsize += 11;
				2429	return;
				2430	}
				2431	src = regcode;
				2432	regcode += 11;
				2433	dst = regcode;
				2434	while (src > opnd)
				2435	--dst = --src;
				2436
				2437	place = opnd; /* Op node, where operand used to be. */
				2438	*place++ = op;
				2439	*place++ = NUL;
				2440	*place++ = NUL;
				2441	place = re_put_long(place, (long_u)minval);
				2442	place = re_put_long(place, (long_u)maxval);
				2443	regtail(opnd, place);
				2444	}
				2445
				2446	/*
				2447	* Write a long as four bytes at "p" and return pointer to the next char.
				2448	*/
				2449	static char_u *
				2450	re_put_long(p, val)
				2451	char_u *p;
				2452	long_u val;
				2453	{
				2454	*p++ = (char_u) ((val >> 24) & 0377);
				2455	*p++ = (char_u) ((val >> 16) & 0377);
				2456	*p++ = (char_u) ((val >> 8) & 0377);
				2457	*p++ = (char_u) (val & 0377);
				2458	return p;
				2459	}
				2460
				2461	/*
				2462	* regtail - set the next-pointer at the end of a node chain
				2463	*/
				2464	static void
				2465	regtail(p, val)
				2466	char_u *p;
				2467	char_u *val;
				2468	{
				2469	char_u *scan;
				2470	char_u *temp;
				2471	int offset;
				2472
				2473	if (p == JUST_CALC_SIZE)
				2474	return;
				2475
				2476	/* Find last node. */
				2477	scan = p;
				2478	for (;;)
				2479	{
				2480	temp = regnext(scan);
				2481	if (temp == NULL)
				2482	break;
				2483	scan = temp;
				2484	}
				2485
				2486	if (OP(scan) == BACK)
				2487	offset = (int)(scan - val);
				2488	else
				2489	offset = (int)(val - scan);
				2490	*(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
				2491	*(scan + 2) = (char_u) (offset & 0377);
				2492	}
				2493
				2494	/*
				2495	* regoptail - regtail on item after a BRANCH; nop if none
				2496	*/
				2497	static void
				2498	regoptail(p, val)
				2499	char_u *p;
				2500	char_u *val;
				2501	{
				2502	/* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
				2503	if (p == NULL \|\| p == JUST_CALC_SIZE
				2504	\|\| (OP(p) != BRANCH
				2505	&& (OP(p) < BRACE_COMPLEX \|\| OP(p) > BRACE_COMPLEX + 9)))
				2506	return;
				2507	regtail(OPERAND(p), val);
				2508	}
				2509
				2510	/*
				2511	* getchr() - get the next character from the pattern. We know about
				2512	* magic and such, so therefore we need a lexical analyzer.
				2513	*/
				2514
				2515	/* static int curchr; */
				2516	static int prevprevchr;
				2517	static int prevchr;
				2518	static int nextchr; /* used for ungetchr() */
				2519	/*
				2520	* Note: prevchr is sometimes -1 when we are not at the start,
				2521	* eg in /[ ^I]^ the pattern was never found even if it existed, because ^ was
				2522	* taken to be magic -- webb
				2523	*/
				2524	static int at_start; /* True when on the first character */
				2525	static int prev_at_start; /* True when on the second character */
				2526
				2527	static void
				2528	initchr(str)
				2529	char_u *str;
				2530	{
				2531	regparse = str;
				2532	prevchr_len = 0;
				2533	curchr = prevprevchr = prevchr = nextchr = -1;
				2534	at_start = TRUE;
				2535	prev_at_start = FALSE;
				2536	}
				2537
				2538	static int
				2539	peekchr()
				2540	{
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2541	static int after_slash = FALSE;
				2542
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2543	if (curchr == -1)
				2544	{
				2545	switch (curchr = regparse[0])
				2546	{
				2547	case '.':
				2548	case '[':
				2549	case '~':
				2550	/* magic when 'magic' is on */
				2551	if (reg_magic >= MAGIC_ON)
				2552	curchr = Magic(curchr);
				2553	break;
				2554	case '(':
				2555	case ')':
				2556	case '{':
				2557	case '%':
				2558	case '+':
				2559	case '=':
				2560	case '?':
				2561	case '@':
				2562	case '!':
				2563	case '&':
				2564	case '\|':
				2565	case '<':
				2566	case '>':
				2567	case '#': /* future ext. */
				2568	case '"': /* future ext. */
				2569	case '\'': /* future ext. */
				2570	case ',': /* future ext. */
				2571	case '-': /* future ext. */
				2572	case ':': /* future ext. */
				2573	case ';': /* future ext. */
				2574	case '`': /* future ext. */
				2575	case '/': /* Can't be used in / command */
				2576	/* magic only after "\v" */
				2577	if (reg_magic == MAGIC_ALL)
				2578	curchr = Magic(curchr);
				2579	break;
				2580	case '*':
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2581	/* * is not magic as the very first character, eg "?*ptr", when
				2582	* after '^', eg "/^*ptr" and when after "\(", "\\|", "\&". But
				2583	* "\(\" is not magic, thus must be magic if "after_slash" /
				2584	if (reg_magic >= MAGIC_ON
				2585	&& !at_start
				2586	&& !(prev_at_start && prevchr == Magic('^'))
				2587	&& (after_slash
				2588	\|\| (prevchr != Magic('(')
				2589	&& prevchr != Magic('&')
				2590	&& prevchr != Magic('\|'))))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2591	curchr = Magic('*');
				2592	break;
				2593	case '^':
				2594	/* '^' is only magic as the very first character and if it's after
				2595	* "\(", "\\|", "\&' or "\n" */
				2596	if (reg_magic >= MAGIC_OFF
				2597	&& (at_start
				2598	\|\| reg_magic == MAGIC_ALL
				2599	\|\| prevchr == Magic('(')
				2600	\|\| prevchr == Magic('\|')
				2601	\|\| prevchr == Magic('&')
				2602	\|\| prevchr == Magic('n')
				2603	\|\| (no_Magic(prevchr) == '('
				2604	&& prevprevchr == Magic('%'))))
				2605	{
				2606	curchr = Magic('^');
				2607	at_start = TRUE;
				2608	prev_at_start = FALSE;
				2609	}
				2610	break;
				2611	case '$':
				2612	/* '$' is only magic as the very last char and if it's in front of
				2613	* either "\\|", "\)", "\&", or "\n" */
				2614	if (reg_magic >= MAGIC_OFF)
				2615	{
				2616	char_u *p = regparse + 1;
				2617
				2618	/* ignore \c \C \m and \M after '$' */
				2619	while (p[0] == '\\' && (p[1] == 'c' \|\| p[1] == 'C'
				2620	\|\| p[1] == 'm' \|\| p[1] == 'M' \|\| p[1] == 'Z'))
				2621	p += 2;
				2622	if (p[0] == NUL
				2623	\|\| (p[0] == '\\'
				2624	&& (p[1] == '\|' \|\| p[1] == '&' \|\| p[1] == ')'
				2625	\|\| p[1] == 'n'))
				2626	\|\| reg_magic == MAGIC_ALL)
				2627	curchr = Magic('$');
				2628	}
				2629	break;
				2630	case '\\':
				2631	{
				2632	int c = regparse[1];
				2633
				2634	if (c == NUL)
				2635	curchr = '\\'; /* trailing '\' */
				2636	else if (
				2637	#ifdef EBCDIC
				2638	vim_strchr(META, c)
				2639	#else
				2640	c <= '~' && META_flags[c]
				2641	#endif
				2642	)
				2643	{
				2644	/*
				2645	* META contains everything that may be magic sometimes,
				2646	* except ^ and $ ("\^" and "\$" are only magic after
				2647	* "\v"). We now fetch the next character and toggle its
				2648	* magicness. Therefore, \ is so meta-magic that it is
				2649	* not in META.
				2650	*/
				2651	curchr = -1;
				2652	prev_at_start = at_start;
				2653	at_start = FALSE; /* be able to say "/\ptr" /
				2654	++regparse;
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2655	++after_slash;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2656	peekchr();
				2657	--regparse;
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2658	--after_slash;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2659	curchr = toggle_Magic(curchr);
				2660	}
				2661	else if (vim_strchr(REGEXP_ABBR, c))
				2662	{
				2663	/*
				2664	* Handle abbreviations, like "\t" for TAB -- webb
				2665	*/
				2666	curchr = backslash_trans(c);
				2667	}
				2668	else if (reg_magic == MAGIC_NONE && (c == '$' \|\| c == '^'))
				2669	curchr = toggle_Magic(c);
				2670	else
				2671	{
				2672	/*
				2673	* Next character can never be (made) magic?
				2674	* Then backslashing it won't do anything.
				2675	*/
				2676	#ifdef FEAT_MBYTE
				2677	if (has_mbyte)
				2678	curchr = (*mb_ptr2char)(regparse + 1);
				2679	else
				2680	#endif
				2681	curchr = c;
				2682	}
				2683	break;
				2684	}
				2685
				2686	#ifdef FEAT_MBYTE
				2687	default:
				2688	if (has_mbyte)
				2689	curchr = (*mb_ptr2char)(regparse);
				2690	#endif
				2691	}
				2692	}
				2693
				2694	return curchr;
				2695	}
				2696
				2697	/*
				2698	* Eat one lexed character. Do this in a way that we can undo it.
				2699	*/
				2700	static void
				2701	skipchr()
				2702	{
				2703	/* peekchr() eats a backslash, do the same here */
				2704	if (*regparse == '\\')
				2705	prevchr_len = 1;
				2706	else
				2707	prevchr_len = 0;
				2708	if (regparse[prevchr_len] != NUL)
				2709	{
				2710	#ifdef FEAT_MBYTE
				2711	if (has_mbyte)
				2712	prevchr_len += (*mb_ptr2len_check)(regparse + prevchr_len);
				2713	else
				2714	#endif
				2715	++prevchr_len;
				2716	}
				2717	regparse += prevchr_len;
				2718	prev_at_start = at_start;
				2719	at_start = FALSE;
				2720	prevprevchr = prevchr;
				2721	prevchr = curchr;
				2722	curchr = nextchr; /* use previously unget char, or -1 */
				2723	nextchr = -1;
				2724	}
				2725
				2726	/*
				2727	* Skip a character while keeping the value of prev_at_start for at_start.
				2728	* prevchr and prevprevchr are also kept.
				2729	*/
				2730	static void
				2731	skipchr_keepstart()
				2732	{
				2733	int as = prev_at_start;
				2734	int pr = prevchr;
				2735	int prpr = prevprevchr;
				2736
				2737	skipchr();
				2738	at_start = as;
				2739	prevchr = pr;
				2740	prevprevchr = prpr;
				2741	}
				2742
				2743	static int
				2744	getchr()
				2745	{
				2746	int chr = peekchr();
				2747
				2748	skipchr();
				2749	return chr;
				2750	}
				2751
				2752	/*
				2753	* put character back. Works only once!
				2754	*/
				2755	static void
				2756	ungetchr()
				2757	{
				2758	nextchr = curchr;
				2759	curchr = prevchr;
				2760	prevchr = prevprevchr;
				2761	at_start = prev_at_start;
				2762	prev_at_start = FALSE;
				2763
				2764	/* Backup regparse, so that it's at the same position as before the
				2765	* getchr(). */
				2766	regparse -= prevchr_len;
				2767	}
				2768
				2769	/*
Bram Moolenaar	7b0294c	2004-10-11 10:16:09 +0000	[diff] [blame]	2770	* Get and return the value of the hex string at the current position.
				2771	* Return -1 if there is no valid hex number.
				2772	* The position is updated:
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	2773	* blahblah\%x20asdf
				2774	* before-^ ^-after
				2775	* The parameter controls the maximum number of input characters. This will be
				2776	* 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
				2777	*/
				2778	static int
				2779	gethexchrs(maxinputlen)
				2780	int maxinputlen;
				2781	{
				2782	int nr = 0;
				2783	int c;
				2784	int i;
				2785
				2786	for (i = 0; i < maxinputlen; ++i)
				2787	{
				2788	c = regparse[0];
				2789	if (!vim_isxdigit(c))
				2790	break;
				2791	nr <<= 4;
				2792	nr \|= hex2nr(c);
				2793	++regparse;
				2794	}
				2795
				2796	if (i == 0)
				2797	return -1;
				2798	return nr;
				2799	}
				2800
				2801	/*
				2802	* get and return the value of the decimal string immediately after the
				2803	* current position. Return -1 for invalid. Consumes all digits.
				2804	*/
				2805	static int
				2806	getdecchrs()
				2807	{
				2808	int nr = 0;
				2809	int c;
				2810	int i;
				2811
				2812	for (i = 0; ; ++i)
				2813	{
				2814	c = regparse[0];
				2815	if (c < '0' \|\| c > '9')
				2816	break;
				2817	nr *= 10;
				2818	nr += c - '0';
				2819	++regparse;
				2820	}
				2821
				2822	if (i == 0)
				2823	return -1;
				2824	return nr;
				2825	}
				2826
				2827	/*
				2828	* get and return the value of the octal string immediately after the current
				2829	* position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
				2830	* numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
				2831	* treat 8 or 9 as recognised characters. Position is updated:
				2832	* blahblah\%o210asdf
				2833	* before-^ ^-after
				2834	*/
				2835	static int
				2836	getoctchrs()
				2837	{
				2838	int nr = 0;
				2839	int c;
				2840	int i;
				2841
				2842	for (i = 0; i < 3 && nr < 040; ++i)
				2843	{
				2844	c = regparse[0];
				2845	if (c < '0' \|\| c > '7')
				2846	break;
				2847	nr <<= 3;
				2848	nr \|= hex2nr(c);
				2849	++regparse;
				2850	}
				2851
				2852	if (i == 0)
				2853	return -1;
				2854	return nr;
				2855	}
				2856
				2857	/*
				2858	* Get a number after a backslash that is inside [].
				2859	* When nothing is recognized return a backslash.
				2860	*/
				2861	static int
				2862	coll_get_char()
				2863	{
				2864	int nr = -1;
				2865
				2866	switch (*regparse++)
				2867	{
				2868	case 'd': nr = getdecchrs(); break;
				2869	case 'o': nr = getoctchrs(); break;
				2870	case 'x': nr = gethexchrs(2); break;
				2871	case 'u': nr = gethexchrs(4); break;
				2872	case 'U': nr = gethexchrs(8); break;
				2873	}
				2874	if (nr < 0)
				2875	{
				2876	/* If getting the number fails be backwards compatible: the character
				2877	* is a backslash. */
				2878	--regparse;
				2879	nr = '\\';
				2880	}
				2881	return nr;
				2882	}
				2883
				2884	/*
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2885	* read_limits - Read two integers to be taken as a minimum and maximum.
				2886	* If the first character is '-', then the range is reversed.
				2887	* Should end with 'end'. If minval is missing, zero is default, if maxval is
				2888	* missing, a very big number is the default.
				2889	*/
				2890	static int
				2891	read_limits(minval, maxval)
				2892	long *minval;
				2893	long *maxval;
				2894	{
				2895	int reverse = FALSE;
				2896	char_u *first_char;
				2897	long tmp;
				2898
				2899	if (*regparse == '-')
				2900	{
				2901	/* Starts with '-', so reverse the range later */
				2902	regparse++;
				2903	reverse = TRUE;
				2904	}
				2905	first_char = regparse;
				2906	*minval = getdigits(&regparse);
				2907	if (regparse == ',') / There is a comma */
				2908	{
				2909	if (vim_isdigit(*++regparse))
				2910	*maxval = getdigits(&regparse);
				2911	else
				2912	*maxval = MAX_LIMIT;
				2913	}
				2914	else if (VIM_ISDIGIT(*first_char))
				2915	maxval = minval; /* It was \{n} or \{-n} */
				2916	else
				2917	maxval = MAX_LIMIT; / It was \{} or \{-} */
				2918	if (*regparse == '\\')
				2919	regparse++; /* Allow either \{...} or \{...\} */
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2920	if (*regparse != '}')
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2921	{
				2922	sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"),
				2923	reg_magic == MAGIC_ALL ? "" : "\\");
				2924	EMSG_RET_FAIL(IObuff);
				2925	}
				2926
				2927	/*
				2928	* Reverse the range if there was a '-', or make sure it is in the right
				2929	* order otherwise.
				2930	*/
				2931	if ((!reverse && minval > maxval) \|\| (reverse && minval < maxval))
				2932	{
				2933	tmp = *minval;
				2934	minval = maxval;
				2935	*maxval = tmp;
				2936	}
				2937	skipchr(); /* let's be friends with the lexer again */
				2938	return OK;
				2939	}
				2940
				2941	/*
				2942	* vim_regexec and friends
				2943	*/
				2944
				2945	/*
				2946	* Global work variables for vim_regexec().
				2947	*/
				2948
				2949	/* The current match-position is remembered with these variables: */
				2950	static linenr_T reglnum; /* line number, relative to first line */
				2951	static char_u regline; / start of current line */
				2952	static char_u reginput; / current input, points into "regline" */
				2953
				2954	static int need_clear_subexpr; /* subexpressions still need to be
				2955	* cleared */
				2956	#ifdef FEAT_SYN_HL
				2957	static int need_clear_zsubexpr = FALSE; /* extmatch subexpressions
				2958	* still need to be cleared */
				2959	#endif
				2960
				2961	static int out_of_stack; /* TRUE when ran out of stack space */
				2962
				2963	/*
				2964	* Structure used to save the current input state, when it needs to be
				2965	* restored after trying a match. Used by reg_save() and reg_restore().
				2966	*/
				2967	typedef struct
				2968	{
				2969	union
				2970	{
				2971	char_u ptr; / reginput pointer, for single-line regexp */
				2972	lpos_T pos; /* reginput pos, for multi-line regexp */
				2973	} rs_u;
				2974	} regsave_T;
				2975
				2976	/* struct to save start/end pointer/position in for */
				2977	typedef struct
				2978	{
				2979	union
				2980	{
				2981	char_u *ptr;
				2982	lpos_T pos;
				2983	} se_u;
				2984	} save_se_T;
				2985
				2986	static char_u *reg_getline __ARGS((linenr_T lnum));
				2987	static long vim_regexec_both __ARGS((char_u *line, colnr_T col));
				2988	static long regtry __ARGS((regprog_T *prog, colnr_T col));
				2989	static void cleanup_subexpr __ARGS((void));
				2990	#ifdef FEAT_SYN_HL
				2991	static void cleanup_zsubexpr __ARGS((void));
				2992	#endif
				2993	static void reg_nextline __ARGS((void));
				2994	static void reg_save __ARGS((regsave_T *save));
				2995	static void reg_restore __ARGS((regsave_T *save));
				2996	static int reg_save_equal __ARGS((regsave_T *save));
				2997	static void save_se_multi __ARGS((save_se_T savep, lpos_T posp));
				2998	static void save_se_one __ARGS((save_se_T savep, char_u *pp));
				2999
				3000	/* Save the sub-expressions before attempting a match. */
				3001	#define save_se(savep, posp, pp) \
				3002	REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
				3003
				3004	/* After a failed match restore the sub-expressions. */
				3005	#define restore_se(savep, posp, pp) { \
				3006	if (REG_MULTI) \
				3007	*(posp) = (savep)->se_u.pos; \
				3008	else \
				3009	*(pp) = (savep)->se_u.ptr; }
				3010
				3011	static int re_num_cmp __ARGS((long_u val, char_u *scan));
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	3012	static int regmatch __ARGS((char_u prog, regsave_T startp));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3013	static int regrepeat __ARGS((char_u *p, long maxcount));
				3014
				3015	#ifdef DEBUG
				3016	int regnarrate = 0;
				3017	#endif
				3018
				3019	/*
				3020	* Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
				3021	* Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
				3022	* contains '\c' or '\C' the value is overruled.
				3023	*/
				3024	static int ireg_ic;
				3025
				3026	#ifdef FEAT_MBYTE
				3027	/*
				3028	* Similar to ireg_ic, but only for 'combining' characters. Set with \Z flag
				3029	* in the regexp. Defaults to false, always.
				3030	*/
				3031	static int ireg_icombine;
				3032	#endif
				3033
				3034	/*
				3035	* Sometimes need to save a copy of a line. Since alloc()/free() is very
				3036	* slow, we keep one allocated piece of memory and only re-allocate it when
				3037	* it's too small. It's freed in vim_regexec_both() when finished.
				3038	*/
				3039	static char_u *reg_tofree;
				3040	static unsigned reg_tofreelen;
				3041
				3042	/*
				3043	* These variables are set when executing a regexp to speed up the execution.
				3044	* Which ones are set depends on whethere a single-line or multi-line match is
				3045	* done:
				3046	* single-line multi-line
				3047	* reg_match &regmatch_T NULL
				3048	* reg_mmatch NULL &regmmatch_T
				3049	* reg_startp reg_match->startp <invalid>
				3050	* reg_endp reg_match->endp <invalid>
				3051	* reg_startpos <invalid> reg_mmatch->startpos
				3052	* reg_endpos <invalid> reg_mmatch->endpos
				3053	* reg_win NULL window in which to search
				3054	* reg_buf <invalid> buffer in which to search
				3055	* reg_firstlnum <invalid> first line in which to search
				3056	* reg_maxline 0 last line nr
				3057	* reg_line_lbr FALSE or TRUE FALSE
				3058	*/
				3059	static regmatch_T *reg_match;
				3060	static regmmatch_T *reg_mmatch;
				3061	static char_u **reg_startp = NULL;
				3062	static char_u **reg_endp = NULL;
				3063	static lpos_T *reg_startpos = NULL;
				3064	static lpos_T *reg_endpos = NULL;
				3065	static win_T *reg_win;
				3066	static buf_T *reg_buf;
				3067	static linenr_T reg_firstlnum;
				3068	static linenr_T reg_maxline;
				3069	static int reg_line_lbr; /* "\n" in string is line break */
				3070
				3071	/*
				3072	* Get pointer to the line "lnum", which is relative to "reg_firstlnum".
				3073	*/
				3074	static char_u *
				3075	reg_getline(lnum)
				3076	linenr_T lnum;
				3077	{
				3078	/* when looking behind for a match/no-match lnum is negative. But we
				3079	* can't go before line 1 */
				3080	if (reg_firstlnum + lnum < 1)
				3081	return NULL;
				3082	return ml_get_buf(reg_buf, reg_firstlnum + lnum, FALSE);
				3083	}
				3084
				3085	static regsave_T behind_pos;
				3086
				3087	#ifdef FEAT_SYN_HL
				3088	static char_u reg_startzp[NSUBEXP]; / Workspace to mark beginning */
				3089	static char_u reg_endzp[NSUBEXP]; / and end of \z(...\) matches */
				3090	static lpos_T reg_startzpos[NSUBEXP]; /* idem, beginning pos */
				3091	static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
				3092	#endif
				3093
				3094	/* TRUE if using multi-line regexp. */
				3095	#define REG_MULTI (reg_match == NULL)
				3096
				3097	/*
				3098	* Match a regexp against a string.
				3099	* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
				3100	* Uses curbuf for line count and 'iskeyword'.
				3101	*
				3102	* Return TRUE if there is a match, FALSE if not.
				3103	*/
				3104	int
				3105	vim_regexec(rmp, line, col)
				3106	regmatch_T *rmp;
				3107	char_u line; / string to match against */
				3108	colnr_T col; /* column to start looking for match */
				3109	{
				3110	reg_match = rmp;
				3111	reg_mmatch = NULL;
				3112	reg_maxline = 0;
				3113	reg_line_lbr = FALSE;
				3114	reg_win = NULL;
				3115	ireg_ic = rmp->rm_ic;
				3116	#ifdef FEAT_MBYTE
				3117	ireg_icombine = FALSE;
				3118	#endif
				3119	return (vim_regexec_both(line, col) != 0);
				3120	}
				3121
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	3122	#if defined(FEAT_MODIFY_FNAME) \|\| defined(FEAT_EVAL) \
				3123	\|\| defined(FIND_REPLACE_DIALOG) \|\| defined(PROTO)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3124	/*
				3125	* Like vim_regexec(), but consider a "\n" in "line" to be a line break.
				3126	*/
				3127	int
				3128	vim_regexec_nl(rmp, line, col)
				3129	regmatch_T *rmp;
				3130	char_u line; / string to match against */
				3131	colnr_T col; /* column to start looking for match */
				3132	{
				3133	reg_match = rmp;
				3134	reg_mmatch = NULL;
				3135	reg_maxline = 0;
				3136	reg_line_lbr = TRUE;
				3137	reg_win = NULL;
				3138	ireg_ic = rmp->rm_ic;
				3139	#ifdef FEAT_MBYTE
				3140	ireg_icombine = FALSE;
				3141	#endif
				3142	return (vim_regexec_both(line, col) != 0);
				3143	}
				3144	#endif
				3145
				3146	/*
				3147	* Match a regexp against multiple lines.
				3148	* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
				3149	* Uses curbuf for line count and 'iskeyword'.
				3150	*
				3151	* Return zero if there is no match. Return number of lines contained in the
				3152	* match otherwise.
				3153	*/
				3154	long
				3155	vim_regexec_multi(rmp, win, buf, lnum, col)
				3156	regmmatch_T *rmp;
				3157	win_T win; / window in which to search or NULL */
				3158	buf_T buf; / buffer in which to search */
				3159	linenr_T lnum; /* nr of line to start looking for match */
				3160	colnr_T col; /* column to start looking for match */
				3161	{
				3162	long r;
				3163	buf_T *save_curbuf = curbuf;
				3164
				3165	reg_match = NULL;
				3166	reg_mmatch = rmp;
				3167	reg_buf = buf;
				3168	reg_win = win;
				3169	reg_firstlnum = lnum;
				3170	reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
				3171	reg_line_lbr = FALSE;
				3172	ireg_ic = rmp->rmm_ic;
				3173	#ifdef FEAT_MBYTE
				3174	ireg_icombine = FALSE;
				3175	#endif
				3176
				3177	/* Need to switch to buffer "buf" to make vim_iswordc() work. */
				3178	curbuf = buf;
				3179	r = vim_regexec_both(NULL, col);
				3180	curbuf = save_curbuf;
				3181
				3182	return r;
				3183	}
				3184
				3185	/*
				3186	* Match a regexp against a string ("line" points to the string) or multiple
				3187	* lines ("line" is NULL, use reg_getline()).
				3188	*/
				3189	#ifdef HAVE_SETJMP_H
				3190	static long
				3191	vim_regexec_both(line_arg, col_arg)
				3192	char_u *line_arg;
				3193	colnr_T col_arg; /* column to start looking for match */
				3194	#else
				3195	static long
				3196	vim_regexec_both(line, col)
				3197	char_u *line;
				3198	colnr_T col; /* column to start looking for match */
				3199	#endif
				3200	{
				3201	regprog_T *prog;
				3202	char_u *s;
				3203	long retval;
				3204	#ifdef HAVE_SETJMP_H
				3205	char_u *line;
				3206	colnr_T col;
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	3207	int did_mch_startjmp = FALSE;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3208	#endif
				3209
				3210	reg_tofree = NULL;
				3211
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3212	#ifdef HAVE_SETJMP_H
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3213	/* Trick to avoid "might be clobbered by `longjmp'" warning from gcc. */
				3214	line = line_arg;
				3215	col = col_arg;
				3216	#endif
				3217	retval = 0L;
				3218
				3219	if (REG_MULTI)
				3220	{
				3221	prog = reg_mmatch->regprog;
				3222	line = reg_getline((linenr_T)0);
				3223	reg_startpos = reg_mmatch->startpos;
				3224	reg_endpos = reg_mmatch->endpos;
				3225	}
				3226	else
				3227	{
				3228	prog = reg_match->regprog;
				3229	reg_startp = reg_match->startp;
				3230	reg_endp = reg_match->endp;
				3231	}
				3232
				3233	/* Be paranoid... */
				3234	if (prog == NULL \|\| line == NULL)
				3235	{
				3236	EMSG(_(e_null));
				3237	goto theend;
				3238	}
				3239
				3240	/* Check validity of program. */
				3241	if (prog_magic_wrong())
				3242	goto theend;
				3243
				3244	/* If pattern contains "\c" or "\C": overrule value of ireg_ic */
				3245	if (prog->regflags & RF_ICASE)
				3246	ireg_ic = TRUE;
				3247	else if (prog->regflags & RF_NOICASE)
				3248	ireg_ic = FALSE;
				3249
				3250	#ifdef FEAT_MBYTE
				3251	/* If pattern contains "\Z" overrule value of ireg_icombine */
				3252	if (prog->regflags & RF_ICOMBINE)
				3253	ireg_icombine = TRUE;
				3254	#endif
				3255
				3256	/* If there is a "must appear" string, look for it. */
				3257	if (prog->regmust != NULL)
				3258	{
				3259	int c;
				3260
				3261	#ifdef FEAT_MBYTE
				3262	if (has_mbyte)
				3263	c = (*mb_ptr2char)(prog->regmust);
				3264	else
				3265	#endif
				3266	c = *prog->regmust;
				3267	s = line + col;
Bram Moolenaar	05159a0	2005-02-26 23:04:13 +0000	[diff] [blame]	3268
				3269	/*
				3270	* This is used very often, esp. for ":global". Use three versions of
				3271	* the loop to avoid overhead of conditions.
				3272	*/
				3273	if (!ireg_ic
				3274	#ifdef FEAT_MBYTE
				3275	&& !has_mbyte
				3276	#endif
				3277	)
				3278	while ((s = vim_strbyte(s, c)) != NULL)
				3279	{
				3280	if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
				3281	break; /* Found it. */
				3282	++s;
				3283	}
				3284	#ifdef FEAT_MBYTE
				3285	else if (!ireg_ic \|\| (!enc_utf8 && mb_char2len(c) > 1))
				3286	while ((s = vim_strchr(s, c)) != NULL)
				3287	{
				3288	if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
				3289	break; /* Found it. */
				3290	mb_ptr_adv(s);
				3291	}
				3292	#endif
				3293	else
				3294	while ((s = cstrchr(s, c)) != NULL)
				3295	{
				3296	if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
				3297	break; /* Found it. */
				3298	mb_ptr_adv(s);
				3299	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3300	if (s == NULL) /* Not present. */
				3301	goto theend;
				3302	}
				3303
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	3304	#ifdef HAVE_TRY_EXCEPT
				3305	__try
				3306	{
				3307	#endif
				3308
				3309	#ifdef HAVE_SETJMP_H
				3310	/*
				3311	* Matching with a regexp may cause a very deep recursive call of
				3312	* regmatch(). Vim will crash when running out of stack space. Catch
				3313	* this here if the system supports it.
				3314	* It's a bit slow, do it after the check for "regmust".
				3315	* Don't do it if the caller already set it up.
				3316	*/
				3317	if (!lc_active)
				3318	{
				3319	did_mch_startjmp = TRUE;
				3320	mch_startjmp();
				3321	if (SETJMP(lc_jump_env) != 0)
				3322	{
				3323	mch_didjmp();
				3324	# ifdef SIGHASARG
				3325	if (lc_signal != SIGINT)
				3326	# endif
				3327	EMSG(_(e_complex));
				3328	retval = 0L;
				3329	goto inner_end;
				3330	}
				3331	}
				3332	#endif
				3333
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3334	regline = line;
				3335	reglnum = 0;
				3336	out_of_stack = FALSE;
				3337
				3338	/* Simplest case: Anchored match need be tried only once. */
				3339	if (prog->reganch)
				3340	{
				3341	int c;
				3342
				3343	#ifdef FEAT_MBYTE
				3344	if (has_mbyte)
				3345	c = (*mb_ptr2char)(regline + col);
				3346	else
				3347	#endif
				3348	c = regline[col];
				3349	if (prog->regstart == NUL
				3350	\|\| prog->regstart == c
				3351	\|\| (ireg_ic && ((
				3352	#ifdef FEAT_MBYTE
				3353	(enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
				3354	\|\| (c < 255 && prog->regstart < 255 &&
				3355	#endif
				3356	TOLOWER_LOC(prog->regstart) == TOLOWER_LOC(c)))))
				3357	retval = regtry(prog, col);
				3358	else
				3359	retval = 0;
				3360	}
				3361	else
				3362	{
				3363	/* Messy cases: unanchored match. */
				3364	while (!got_int && !out_of_stack)
				3365	{
				3366	if (prog->regstart != NUL)
				3367	{
Bram Moolenaar	05159a0	2005-02-26 23:04:13 +0000	[diff] [blame]	3368	/* Skip until the char we know it must start with.
				3369	* Used often, do some work to avoid call overhead. */
				3370	if (!ireg_ic
				3371	#ifdef FEAT_MBYTE
				3372	&& !has_mbyte
				3373	#endif
				3374	)
				3375	s = vim_strbyte(regline + col, prog->regstart);
				3376	else
				3377	s = cstrchr(regline + col, prog->regstart);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3378	if (s == NULL)
				3379	{
				3380	retval = 0;
				3381	break;
				3382	}
				3383	col = (int)(s - regline);
				3384	}
				3385
				3386	retval = regtry(prog, col);
				3387	if (retval > 0)
				3388	break;
				3389
				3390	/* if not currently on the first line, get it again */
				3391	if (reglnum != 0)
				3392	{
				3393	regline = reg_getline((linenr_T)0);
				3394	reglnum = 0;
				3395	}
				3396	if (regline[col] == NUL)
				3397	break;
				3398	#ifdef FEAT_MBYTE
				3399	if (has_mbyte)
				3400	col += (*mb_ptr2len_check)(regline + col);
				3401	else
				3402	#endif
				3403	++col;
				3404	}
				3405	}
				3406
				3407	if (out_of_stack)
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	3408	EMSG(_(e_outofstack));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3409
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	3410	#ifdef HAVE_SETJMP_H
				3411	inner_end:
Bram Moolenaar	05159a0	2005-02-26 23:04:13 +0000	[diff] [blame]	3412	if (did_mch_startjmp)
				3413	mch_endjmp();
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	3414	#endif
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3415	#ifdef HAVE_TRY_EXCEPT
				3416	}
				3417	__except(EXCEPTION_EXECUTE_HANDLER)
				3418	{
				3419	if (GetExceptionCode() == EXCEPTION_STACK_OVERFLOW)
				3420	{
				3421	RESETSTKOFLW();
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	3422	EMSG(_(e_outofstack));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3423	}
				3424	else
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	3425	EMSG(_(e_complex));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3426	retval = 0L;
				3427	}
				3428	#endif
				3429
				3430	theend:
				3431	/* Didn't find a match. */
				3432	vim_free(reg_tofree);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3433	return retval;
				3434	}
				3435
				3436	#ifdef FEAT_SYN_HL
				3437	static reg_extmatch_T *make_extmatch __ARGS((void));
				3438
				3439	/*
				3440	* Create a new extmatch and mark it as referenced once.
				3441	*/
				3442	static reg_extmatch_T *
				3443	make_extmatch()
				3444	{
				3445	reg_extmatch_T *em;
				3446
				3447	em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T));
				3448	if (em != NULL)
				3449	em->refcnt = 1;
				3450	return em;
				3451	}
				3452
				3453	/*
				3454	* Add a reference to an extmatch.
				3455	*/
				3456	reg_extmatch_T *
				3457	ref_extmatch(em)
				3458	reg_extmatch_T *em;
				3459	{
				3460	if (em != NULL)
				3461	em->refcnt++;
				3462	return em;
				3463	}
				3464
				3465	/*
				3466	* Remove a reference to an extmatch. If there are no references left, free
				3467	* the info.
				3468	*/
				3469	void
				3470	unref_extmatch(em)
				3471	reg_extmatch_T *em;
				3472	{
				3473	int i;
				3474
				3475	if (em != NULL && --em->refcnt <= 0)
				3476	{
				3477	for (i = 0; i < NSUBEXP; ++i)
				3478	vim_free(em->matches[i]);
				3479	vim_free(em);
				3480	}
				3481	}
				3482	#endif
				3483
				3484	/*
				3485	* regtry - try match of "prog" with at regline["col"].
				3486	* Returns 0 for failure, number of lines contained in the match otherwise.
				3487	*/
				3488	static long
				3489	regtry(prog, col)
				3490	regprog_T *prog;
				3491	colnr_T col;
				3492	{
				3493	reginput = regline + col;
				3494	need_clear_subexpr = TRUE;
				3495	#ifdef FEAT_SYN_HL
				3496	/* Clear the external match subpointers if necessary. */
				3497	if (prog->reghasz == REX_SET)
				3498	need_clear_zsubexpr = TRUE;
				3499	#endif
				3500
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	3501	if (regmatch(prog->program + 1, NULL))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3502	{
				3503	cleanup_subexpr();
				3504	if (REG_MULTI)
				3505	{
				3506	if (reg_startpos[0].lnum < 0)
				3507	{
				3508	reg_startpos[0].lnum = 0;
				3509	reg_startpos[0].col = col;
				3510	}
				3511	if (reg_endpos[0].lnum < 0)
				3512	{
				3513	reg_endpos[0].lnum = reglnum;
				3514	reg_endpos[0].col = (int)(reginput - regline);
				3515	}
				3516	else
				3517	/* Use line number of "\ze". */
				3518	reglnum = reg_endpos[0].lnum;
				3519	}
				3520	else
				3521	{
				3522	if (reg_startp[0] == NULL)
				3523	reg_startp[0] = regline + col;
				3524	if (reg_endp[0] == NULL)
				3525	reg_endp[0] = reginput;
				3526	}
				3527	#ifdef FEAT_SYN_HL
				3528	/* Package any found \z(...\) matches for export. Default is none. */
				3529	unref_extmatch(re_extmatch_out);
				3530	re_extmatch_out = NULL;
				3531
				3532	if (prog->reghasz == REX_SET)
				3533	{
				3534	int i;
				3535
				3536	cleanup_zsubexpr();
				3537	re_extmatch_out = make_extmatch();
				3538	for (i = 0; i < NSUBEXP; i++)
				3539	{
				3540	if (REG_MULTI)
				3541	{
				3542	/* Only accept single line matches. */
				3543	if (reg_startzpos[i].lnum >= 0
				3544	&& reg_endzpos[i].lnum == reg_startzpos[i].lnum)
				3545	re_extmatch_out->matches[i] =
				3546	vim_strnsave(reg_getline(reg_startzpos[i].lnum)
				3547	+ reg_startzpos[i].col,
				3548	reg_endzpos[i].col - reg_startzpos[i].col);
				3549	}
				3550	else
				3551	{
				3552	if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
				3553	re_extmatch_out->matches[i] =
				3554	vim_strnsave(reg_startzp[i],
				3555	(int)(reg_endzp[i] - reg_startzp[i]));
				3556	}
				3557	}
				3558	}
				3559	#endif
				3560	return 1 + reglnum;
				3561	}
				3562	return 0;
				3563	}
				3564
				3565	#ifdef FEAT_MBYTE
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3566	static int reg_prev_class __ARGS((void));
				3567
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3568	/*
				3569	* Get class of previous character.
				3570	*/
				3571	static int
				3572	reg_prev_class()
				3573	{
				3574	if (reginput > regline)
				3575	return mb_get_class(reginput - 1
				3576	- (*mb_head_off)(regline, reginput - 1));
				3577	return -1;
				3578	}
				3579
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3580	#endif
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	3581	#define ADVANCE_REGINPUT() mb_ptr_adv(reginput)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3582
				3583	/*
				3584	* The arguments from BRACE_LIMITS are stored here. They are actually local
				3585	* to regmatch(), but they are here to reduce the amount of stack space used
				3586	* (it can be called recursively many times).
				3587	*/
				3588	static long bl_minval;
				3589	static long bl_maxval;
				3590
				3591	/*
				3592	* regmatch - main matching routine
				3593	*
				3594	* Conceptually the strategy is simple: Check to see whether the current
				3595	* node matches, call self recursively to see whether the rest matches,
				3596	* and then act accordingly. In practice we make some effort to avoid
				3597	* recursion, in particular by going through "ordinary" nodes (that don't
				3598	* need to know whether the rest of the match failed) by a loop instead of
				3599	* by recursion.
				3600	*
				3601	* Returns TRUE when there is a match. Leaves reginput and reglnum just after
				3602	* the last matched character.
				3603	* Returns FALSE when there is no match. Leaves reginput and reglnum in an
				3604	* undefined state!
				3605	*/
				3606	static int
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	3607	regmatch(scan, startp)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3608	char_u scan; / Current node. */
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	3609	regsave_T startp; / start position for BACK */
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3610	{
				3611	char_u next; / Next node. */
				3612	int op;
				3613	int c;
				3614
				3615	#ifdef HAVE_GETRLIMIT
				3616	/* Check if we are running out of stack space. Could be caused by
				3617	* recursively calling ourselves. */
				3618	if (out_of_stack \|\| mch_stackcheck((char *)&op) == FAIL)
				3619	{
				3620	out_of_stack = TRUE;
				3621	return FALSE;
				3622	}
				3623	#endif
				3624
				3625	/* Some patterns my cause a long time to match, even though they are not
				3626	* illegal. E.g., "$[a-z]\+$\+Q". Allow breaking them with CTRL-C. */
				3627	fast_breakcheck();
				3628
				3629	#ifdef DEBUG
				3630	if (scan != NULL && regnarrate)
				3631	{
				3632	mch_errmsg(regprop(scan));
				3633	mch_errmsg("(\n");
				3634	}
				3635	#endif
				3636	while (scan != NULL)
				3637	{
				3638	if (got_int \|\| out_of_stack)
				3639	return FALSE;
				3640	#ifdef DEBUG
				3641	if (regnarrate)
				3642	{
				3643	mch_errmsg(regprop(scan));
				3644	mch_errmsg("...\n");
				3645	# ifdef FEAT_SYN_HL
				3646	if (re_extmatch_in != NULL)
				3647	{
				3648	int i;
				3649
				3650	mch_errmsg(_("External submatches:\n"));
				3651	for (i = 0; i < NSUBEXP; i++)
				3652	{
				3653	mch_errmsg(" \"");
				3654	if (re_extmatch_in->matches[i] != NULL)
				3655	mch_errmsg(re_extmatch_in->matches[i]);
				3656	mch_errmsg("\"\n");
				3657	}
				3658	}
				3659	# endif
				3660	}
				3661	#endif
				3662	next = regnext(scan);
				3663
				3664	op = OP(scan);
				3665	/* Check for character class with NL added. */
				3666	if (WITH_NL(op) && *reginput == NUL && reglnum < reg_maxline)
				3667	{
				3668	reg_nextline();
				3669	}
				3670	else if (reg_line_lbr && WITH_NL(op) && *reginput == '\n')
				3671	{
				3672	ADVANCE_REGINPUT();
				3673	}
				3674	else
				3675	{
				3676	if (WITH_NL(op))
				3677	op -= ADD_NL;
				3678	#ifdef FEAT_MBYTE
				3679	if (has_mbyte)
				3680	c = (*mb_ptr2char)(reginput);
				3681	else
				3682	#endif
				3683	c = *reginput;
				3684	switch (op)
				3685	{
				3686	case BOL:
				3687	if (reginput != regline)
				3688	return FALSE;
				3689	break;
				3690
				3691	case EOL:
				3692	if (c != NUL)
				3693	return FALSE;
				3694	break;
				3695
				3696	case RE_BOF:
				3697	/* Passing -1 to the getline() function provided for the search
				3698	* should always return NULL if the current line is the first
				3699	* line of the file. */
				3700	if (reglnum != 0 \|\| reginput != regline
				3701	\|\| (REG_MULTI && reg_getline((linenr_T)-1) != NULL))
				3702	return FALSE;
				3703	break;
				3704
				3705	case RE_EOF:
				3706	if (reglnum != reg_maxline \|\| c != NUL)
				3707	return FALSE;
				3708	break;
				3709
				3710	case CURSOR:
				3711	/* Check if the buffer is in a window and compare the
				3712	* reg_win->w_cursor position to the match position. */
				3713	if (reg_win == NULL
				3714	\|\| (reglnum + reg_firstlnum != reg_win->w_cursor.lnum)
				3715	\|\| ((colnr_T)(reginput - regline) != reg_win->w_cursor.col))
				3716	return FALSE;
				3717	break;
				3718
				3719	case RE_LNUM:
				3720	if (!REG_MULTI \|\| !re_num_cmp((long_u)(reglnum + reg_firstlnum),
				3721	scan))
				3722	return FALSE;
				3723	break;
				3724
				3725	case RE_COL:
				3726	if (!re_num_cmp((long_u)(reginput - regline) + 1, scan))
				3727	return FALSE;
				3728	break;
				3729
				3730	case RE_VCOL:
				3731	if (!re_num_cmp((long_u)win_linetabsize(
				3732	reg_win == NULL ? curwin : reg_win,
				3733	regline, (colnr_T)(reginput - regline)) + 1, scan))
				3734	return FALSE;
				3735	break;
				3736
				3737	case BOW: /* \<word; reginput points to w */
				3738	if (c == NUL) /* Can't match at end of line */
				3739	return FALSE;
				3740	#ifdef FEAT_MBYTE
				3741	if (has_mbyte)
				3742	{
				3743	int this_class;
				3744
				3745	/* Get class of current and previous char (if it exists). */
				3746	this_class = mb_get_class(reginput);
				3747	if (this_class <= 1)
				3748	return FALSE; /* not on a word at all */
				3749	if (reg_prev_class() == this_class)
				3750	return FALSE; /* previous char is in same word */
				3751	}
				3752	#endif
				3753	else
				3754	{
				3755	if (!vim_iswordc(c)
				3756	\|\| (reginput > regline && vim_iswordc(reginput[-1])))
				3757	return FALSE;
				3758	}
				3759	break;
				3760
				3761	case EOW: /* word\>; reginput points after d */
				3762	if (reginput == regline) /* Can't match at start of line */
				3763	return FALSE;
				3764	#ifdef FEAT_MBYTE
				3765	if (has_mbyte)
				3766	{
				3767	int this_class, prev_class;
				3768
				3769	/* Get class of current and previous char (if it exists). */
				3770	this_class = mb_get_class(reginput);
				3771	prev_class = reg_prev_class();
				3772	if (this_class == prev_class)
				3773	return FALSE;
				3774	if (prev_class == 0 \|\| prev_class == 1)
				3775	return FALSE;
				3776	}
				3777	else
				3778	#endif
				3779	{
				3780	if (!vim_iswordc(reginput[-1]))
				3781	return FALSE;
				3782	if (reginput[0] != NUL && vim_iswordc(c))
				3783	return FALSE;
				3784	}
				3785	break; /* Matched with EOW */
				3786
				3787	case ANY:
				3788	if (c == NUL)
				3789	return FALSE;
				3790	ADVANCE_REGINPUT();
				3791	break;
				3792
				3793	case IDENT:
				3794	if (!vim_isIDc(c))
				3795	return FALSE;
				3796	ADVANCE_REGINPUT();
				3797	break;
				3798
				3799	case SIDENT:
				3800	if (VIM_ISDIGIT(*reginput) \|\| !vim_isIDc(c))
				3801	return FALSE;
				3802	ADVANCE_REGINPUT();
				3803	break;
				3804
				3805	case KWORD:
				3806	if (!vim_iswordp(reginput))
				3807	return FALSE;
				3808	ADVANCE_REGINPUT();
				3809	break;
				3810
				3811	case SKWORD:
				3812	if (VIM_ISDIGIT(*reginput) \|\| !vim_iswordp(reginput))
				3813	return FALSE;
				3814	ADVANCE_REGINPUT();
				3815	break;
				3816
				3817	case FNAME:
				3818	if (!vim_isfilec(c))
				3819	return FALSE;
				3820	ADVANCE_REGINPUT();
				3821	break;
				3822
				3823	case SFNAME:
				3824	if (VIM_ISDIGIT(*reginput) \|\| !vim_isfilec(c))
				3825	return FALSE;
				3826	ADVANCE_REGINPUT();
				3827	break;
				3828
				3829	case PRINT:
				3830	if (ptr2cells(reginput) != 1)
				3831	return FALSE;
				3832	ADVANCE_REGINPUT();
				3833	break;
				3834
				3835	case SPRINT:
				3836	if (VIM_ISDIGIT(*reginput) \|\| ptr2cells(reginput) != 1)
				3837	return FALSE;
				3838	ADVANCE_REGINPUT();
				3839	break;
				3840
				3841	case WHITE:
				3842	if (!vim_iswhite(c))
				3843	return FALSE;
				3844	ADVANCE_REGINPUT();
				3845	break;
				3846
				3847	case NWHITE:
				3848	if (c == NUL \|\| vim_iswhite(c))
				3849	return FALSE;
				3850	ADVANCE_REGINPUT();
				3851	break;
				3852
				3853	case DIGIT:
				3854	if (!ri_digit(c))
				3855	return FALSE;
				3856	ADVANCE_REGINPUT();
				3857	break;
				3858
				3859	case NDIGIT:
				3860	if (c == NUL \|\| ri_digit(c))
				3861	return FALSE;
				3862	ADVANCE_REGINPUT();
				3863	break;
				3864
				3865	case HEX:
				3866	if (!ri_hex(c))
				3867	return FALSE;
				3868	ADVANCE_REGINPUT();
				3869	break;
				3870
				3871	case NHEX:
				3872	if (c == NUL \|\| ri_hex(c))
				3873	return FALSE;
				3874	ADVANCE_REGINPUT();
				3875	break;
				3876
				3877	case OCTAL:
				3878	if (!ri_octal(c))
				3879	return FALSE;
				3880	ADVANCE_REGINPUT();
				3881	break;
				3882
				3883	case NOCTAL:
				3884	if (c == NUL \|\| ri_octal(c))
				3885	return FALSE;
				3886	ADVANCE_REGINPUT();
				3887	break;
				3888
				3889	case WORD:
				3890	if (!ri_word(c))
				3891	return FALSE;
				3892	ADVANCE_REGINPUT();
				3893	break;
				3894
				3895	case NWORD:
				3896	if (c == NUL \|\| ri_word(c))
				3897	return FALSE;
				3898	ADVANCE_REGINPUT();
				3899	break;
				3900
				3901	case HEAD:
				3902	if (!ri_head(c))
				3903	return FALSE;
				3904	ADVANCE_REGINPUT();
				3905	break;
				3906
				3907	case NHEAD:
				3908	if (c == NUL \|\| ri_head(c))
				3909	return FALSE;
				3910	ADVANCE_REGINPUT();
				3911	break;
				3912
				3913	case ALPHA:
				3914	if (!ri_alpha(c))
				3915	return FALSE;
				3916	ADVANCE_REGINPUT();
				3917	break;
				3918
				3919	case NALPHA:
				3920	if (c == NUL \|\| ri_alpha(c))
				3921	return FALSE;
				3922	ADVANCE_REGINPUT();
				3923	break;
				3924
				3925	case LOWER:
				3926	if (!ri_lower(c))
				3927	return FALSE;
				3928	ADVANCE_REGINPUT();
				3929	break;
				3930
				3931	case NLOWER:
				3932	if (c == NUL \|\| ri_lower(c))
				3933	return FALSE;
				3934	ADVANCE_REGINPUT();
				3935	break;
				3936
				3937	case UPPER:
				3938	if (!ri_upper(c))
				3939	return FALSE;
				3940	ADVANCE_REGINPUT();
				3941	break;
				3942
				3943	case NUPPER:
				3944	if (c == NUL \|\| ri_upper(c))
				3945	return FALSE;
				3946	ADVANCE_REGINPUT();
				3947	break;
				3948
				3949	case EXACTLY:
				3950	{
				3951	int len;
				3952	char_u *opnd;
				3953
				3954	opnd = OPERAND(scan);
				3955	/* Inline the first byte, for speed. */
				3956	if (opnd != reginput
				3957	&& (!ireg_ic \|\| (
				3958	#ifdef FEAT_MBYTE
				3959	!enc_utf8 &&
				3960	#endif
				3961	TOLOWER_LOC(opnd) != TOLOWER_LOC(reginput))))
				3962	return FALSE;
				3963	if (*opnd == NUL)
				3964	{
				3965	/* match empty string always works; happens when "~" is
				3966	* empty. */
				3967	}
				3968	else if (opnd[1] == NUL
				3969	#ifdef FEAT_MBYTE
				3970	&& !(enc_utf8 && ireg_ic)
				3971	#endif
				3972	)
				3973	++reginput; /* matched a single char */
				3974	else
				3975	{
				3976	len = (int)STRLEN(opnd);
				3977	/* Need to match first byte again for multi-byte. */
				3978	if (cstrncmp(opnd, reginput, &len) != 0)
				3979	return FALSE;
				3980	#ifdef FEAT_MBYTE
				3981	/* Check for following composing character. */
				3982	if (enc_utf8 && UTF_COMPOSINGLIKE(reginput, reginput + len))
				3983	{
				3984	/* raaron: This code makes a composing character get
				3985	* ignored, which is the correct behavior (sometimes)
				3986	* for voweled Hebrew texts. */
				3987	if (!ireg_icombine)
				3988	return FALSE;
				3989	}
				3990	else
				3991	#endif
				3992	reginput += len;
				3993	}
				3994	}
				3995	break;
				3996
				3997	case ANYOF:
				3998	case ANYBUT:
				3999	if (c == NUL)
				4000	return FALSE;
				4001	if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
				4002	return FALSE;
				4003	ADVANCE_REGINPUT();
				4004	break;
				4005
				4006	#ifdef FEAT_MBYTE
				4007	case MULTIBYTECODE:
				4008	if (has_mbyte)
				4009	{
				4010	int i, len;
				4011	char_u *opnd;
				4012
				4013	opnd = OPERAND(scan);
				4014	/* Safety check (just in case 'encoding' was changed since
				4015	* compiling the program). */
				4016	if ((len = (*mb_ptr2len_check)(opnd)) < 2)
				4017	return FALSE;
				4018	for (i = 0; i < len; ++i)
				4019	if (opnd[i] != reginput[i])
				4020	return FALSE;
				4021	reginput += len;
				4022	}
				4023	else
				4024	return FALSE;
				4025	break;
				4026	#endif
				4027
				4028	case NOTHING:
				4029	break;
				4030
				4031	case BACK:
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4032	/* When we run into BACK without matching something non-empty, we
				4033	* fail. */
				4034	if (startp != NULL && reg_save_equal(startp))
				4035	return FALSE;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4036	break;
				4037
				4038	case MOPEN + 0: /* Match start: \zs */
				4039	case MOPEN + 1: /* \( */
				4040	case MOPEN + 2:
				4041	case MOPEN + 3:
				4042	case MOPEN + 4:
				4043	case MOPEN + 5:
				4044	case MOPEN + 6:
				4045	case MOPEN + 7:
				4046	case MOPEN + 8:
				4047	case MOPEN + 9:
				4048	{
				4049	int no;
				4050	save_se_T save;
				4051
				4052	no = op - MOPEN;
				4053	cleanup_subexpr();
				4054	save_se(&save, &reg_startpos[no], &reg_startp[no]);
				4055
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4056	if (regmatch(next, startp))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4057	return TRUE;
				4058
				4059	restore_se(&save, &reg_startpos[no], &reg_startp[no]);
				4060	return FALSE;
				4061	}
				4062	/* break; Not Reached */
				4063
				4064	case NOPEN: /* \%( */
				4065	case NCLOSE: /* \) after \%( */
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4066	if (regmatch(next, startp))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4067	return TRUE;
				4068	return FALSE;
				4069	/* break; Not Reached */
				4070
				4071	#ifdef FEAT_SYN_HL
				4072	case ZOPEN + 1:
				4073	case ZOPEN + 2:
				4074	case ZOPEN + 3:
				4075	case ZOPEN + 4:
				4076	case ZOPEN + 5:
				4077	case ZOPEN + 6:
				4078	case ZOPEN + 7:
				4079	case ZOPEN + 8:
				4080	case ZOPEN + 9:
				4081	{
				4082	int no;
				4083	save_se_T save;
				4084
				4085	no = op - ZOPEN;
				4086	cleanup_zsubexpr();
				4087	save_se(&save, &reg_startzpos[no], &reg_startzp[no]);
				4088
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4089	if (regmatch(next, startp))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4090	return TRUE;
				4091
				4092	restore_se(&save, &reg_startzpos[no], &reg_startzp[no]);
				4093	return FALSE;
				4094	}
				4095	/* break; Not Reached */
				4096	#endif
				4097
				4098	case MCLOSE + 0: /* Match end: \ze */
				4099	case MCLOSE + 1: /* \) */
				4100	case MCLOSE + 2:
				4101	case MCLOSE + 3:
				4102	case MCLOSE + 4:
				4103	case MCLOSE + 5:
				4104	case MCLOSE + 6:
				4105	case MCLOSE + 7:
				4106	case MCLOSE + 8:
				4107	case MCLOSE + 9:
				4108	{
				4109	int no;
				4110	save_se_T save;
				4111
				4112	no = op - MCLOSE;
				4113	cleanup_subexpr();
				4114	save_se(&save, &reg_endpos[no], &reg_endp[no]);
				4115
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4116	if (regmatch(next, startp))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4117	return TRUE;
				4118
				4119	restore_se(&save, &reg_endpos[no], &reg_endp[no]);
				4120	return FALSE;
				4121	}
				4122	/* break; Not Reached */
				4123
				4124	#ifdef FEAT_SYN_HL
				4125	case ZCLOSE + 1: /* \) after \z( */
				4126	case ZCLOSE + 2:
				4127	case ZCLOSE + 3:
				4128	case ZCLOSE + 4:
				4129	case ZCLOSE + 5:
				4130	case ZCLOSE + 6:
				4131	case ZCLOSE + 7:
				4132	case ZCLOSE + 8:
				4133	case ZCLOSE + 9:
				4134	{
				4135	int no;
				4136	save_se_T save;
				4137
				4138	no = op - ZCLOSE;
				4139	cleanup_zsubexpr();
				4140	save_se(&save, &reg_endzpos[no], &reg_endzp[no]);
				4141
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4142	if (regmatch(next, startp))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4143	return TRUE;
				4144
				4145	restore_se(&save, &reg_endzpos[no], &reg_endzp[no]);
				4146	return FALSE;
				4147	}
				4148	/* break; Not Reached */
				4149	#endif
				4150
				4151	case BACKREF + 1:
				4152	case BACKREF + 2:
				4153	case BACKREF + 3:
				4154	case BACKREF + 4:
				4155	case BACKREF + 5:
				4156	case BACKREF + 6:
				4157	case BACKREF + 7:
				4158	case BACKREF + 8:
				4159	case BACKREF + 9:
				4160	{
				4161	int no;
				4162	int len;
				4163	linenr_T clnum;
				4164	colnr_T ccol;
				4165	char_u *p;
				4166
				4167	no = op - BACKREF;
				4168	cleanup_subexpr();
				4169	if (!REG_MULTI) /* Single-line regexp */
				4170	{
				4171	if (reg_endp[no] == NULL)
				4172	{
				4173	/* Backref was not set: Match an empty string. */
				4174	len = 0;
				4175	}
				4176	else
				4177	{
				4178	/* Compare current input with back-ref in the same
				4179	* line. */
				4180	len = (int)(reg_endp[no] - reg_startp[no]);
				4181	if (cstrncmp(reg_startp[no], reginput, &len) != 0)
				4182	return FALSE;
				4183	}
				4184	}
				4185	else /* Multi-line regexp */
				4186	{
				4187	if (reg_endpos[no].lnum < 0)
				4188	{
				4189	/* Backref was not set: Match an empty string. */
				4190	len = 0;
				4191	}
				4192	else
				4193	{
				4194	if (reg_startpos[no].lnum == reglnum
				4195	&& reg_endpos[no].lnum == reglnum)
				4196	{
				4197	/* Compare back-ref within the current line. */
				4198	len = reg_endpos[no].col - reg_startpos[no].col;
				4199	if (cstrncmp(regline + reg_startpos[no].col,
				4200	reginput, &len) != 0)
				4201	return FALSE;
				4202	}
				4203	else
				4204	{
				4205	/* Messy situation: Need to compare between two
				4206	* lines. */
				4207	ccol = reg_startpos[no].col;
				4208	clnum = reg_startpos[no].lnum;
				4209	for (;;)
				4210	{
				4211	/* Since getting one line may invalidate
				4212	* the other, need to make copy. Slow! */
				4213	if (regline != reg_tofree)
				4214	{
				4215	len = (int)STRLEN(regline);
				4216	if (reg_tofree == NULL
				4217	\|\| len >= (int)reg_tofreelen)
				4218	{
				4219	len += 50; /* get some extra */
				4220	vim_free(reg_tofree);
				4221	reg_tofree = alloc(len);
				4222	if (reg_tofree == NULL)
				4223	return FALSE; /* out of memory! */
				4224	reg_tofreelen = len;
				4225	}
				4226	STRCPY(reg_tofree, regline);
				4227	reginput = reg_tofree
				4228	+ (reginput - regline);
				4229	regline = reg_tofree;
				4230	}
				4231
				4232	/* Get the line to compare with. */
				4233	p = reg_getline(clnum);
				4234	if (clnum == reg_endpos[no].lnum)
				4235	len = reg_endpos[no].col - ccol;
				4236	else
				4237	len = (int)STRLEN(p + ccol);
				4238
				4239	if (cstrncmp(p + ccol, reginput, &len) != 0)
				4240	return FALSE; /* doesn't match */
				4241	if (clnum == reg_endpos[no].lnum)
				4242	break; /* match and at end! */
				4243	if (reglnum == reg_maxline)
				4244	return FALSE; /* text too short */
				4245
				4246	/* Advance to next line. */
				4247	reg_nextline();
				4248	++clnum;
				4249	ccol = 0;
				4250	if (got_int \|\| out_of_stack)
				4251	return FALSE;
				4252	}
				4253
				4254	/* found a match! Note that regline may now point
				4255	* to a copy of the line, that should not matter. */
				4256	}
				4257	}
				4258	}
				4259
				4260	/* Matched the backref, skip over it. */
				4261	reginput += len;
				4262	}
				4263	break;
				4264
				4265	#ifdef FEAT_SYN_HL
				4266	case ZREF + 1:
				4267	case ZREF + 2:
				4268	case ZREF + 3:
				4269	case ZREF + 4:
				4270	case ZREF + 5:
				4271	case ZREF + 6:
				4272	case ZREF + 7:
				4273	case ZREF + 8:
				4274	case ZREF + 9:
				4275	{
				4276	int no;
				4277	int len;
				4278
				4279	cleanup_zsubexpr();
				4280	no = op - ZREF;
				4281	if (re_extmatch_in != NULL
				4282	&& re_extmatch_in->matches[no] != NULL)
				4283	{
				4284	len = (int)STRLEN(re_extmatch_in->matches[no]);
				4285	if (cstrncmp(re_extmatch_in->matches[no],
				4286	reginput, &len) != 0)
				4287	return FALSE;
				4288	reginput += len;
				4289	}
				4290	else
				4291	{
				4292	/* Backref was not set: Match an empty string. */
				4293	}
				4294	}
				4295	break;
				4296	#endif
				4297
				4298	case BRANCH:
				4299	{
				4300	if (OP(next) != BRANCH) /* No choice. */
				4301	next = OPERAND(scan); /* Avoid recursion. */
				4302	else
				4303	{
				4304	regsave_T save;
				4305
				4306	do
				4307	{
				4308	reg_save(&save);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4309	if (regmatch(OPERAND(scan), &save))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4310	return TRUE;
				4311	reg_restore(&save);
				4312	scan = regnext(scan);
				4313	} while (scan != NULL && OP(scan) == BRANCH);
				4314	return FALSE;
				4315	/* NOTREACHED */
				4316	}
				4317	}
				4318	break;
				4319
				4320	case BRACE_LIMITS:
				4321	{
				4322	int no;
				4323
				4324	if (OP(next) == BRACE_SIMPLE)
				4325	{
				4326	bl_minval = OPERAND_MIN(scan);
				4327	bl_maxval = OPERAND_MAX(scan);
				4328	}
				4329	else if (OP(next) >= BRACE_COMPLEX
				4330	&& OP(next) < BRACE_COMPLEX + 10)
				4331	{
				4332	no = OP(next) - BRACE_COMPLEX;
				4333	brace_min[no] = OPERAND_MIN(scan);
				4334	brace_max[no] = OPERAND_MAX(scan);
				4335	brace_count[no] = 0;
				4336	}
				4337	else
				4338	{
				4339	EMSG(_(e_internal)); /* Shouldn't happen */
				4340	return FALSE;
				4341	}
				4342	}
				4343	break;
				4344
				4345	case BRACE_COMPLEX + 0:
				4346	case BRACE_COMPLEX + 1:
				4347	case BRACE_COMPLEX + 2:
				4348	case BRACE_COMPLEX + 3:
				4349	case BRACE_COMPLEX + 4:
				4350	case BRACE_COMPLEX + 5:
				4351	case BRACE_COMPLEX + 6:
				4352	case BRACE_COMPLEX + 7:
				4353	case BRACE_COMPLEX + 8:
				4354	case BRACE_COMPLEX + 9:
				4355	{
				4356	int no;
				4357	regsave_T save;
				4358
				4359	no = op - BRACE_COMPLEX;
				4360	++brace_count[no];
				4361
				4362	/* If not matched enough times yet, try one more */
				4363	if (brace_count[no] <= (brace_min[no] <= brace_max[no]
				4364	? brace_min[no] : brace_max[no]))
				4365	{
				4366	reg_save(&save);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4367	if (regmatch(OPERAND(scan), &save))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4368	return TRUE;
				4369	reg_restore(&save);
				4370	--brace_count[no]; /* failed, decrement match count */
				4371	return FALSE;
				4372	}
				4373
				4374	/* If matched enough times, may try matching some more */
				4375	if (brace_min[no] <= brace_max[no])
				4376	{
				4377	/* Range is the normal way around, use longest match */
				4378	if (brace_count[no] <= brace_max[no])
				4379	{
				4380	reg_save(&save);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4381	if (regmatch(OPERAND(scan), &save))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4382	return TRUE; /* matched some more times */
				4383	reg_restore(&save);
				4384	--brace_count[no]; /* matched just enough times */
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4385	/* { continue with the items after \{} */
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4386	}
				4387	}
				4388	else
				4389	{
				4390	/* Range is backwards, use shortest match first */
				4391	if (brace_count[no] <= brace_min[no])
				4392	{
				4393	reg_save(&save);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4394	if (regmatch(next, &save))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4395	return TRUE;
				4396	reg_restore(&save);
				4397	next = OPERAND(scan);
				4398	/* must try to match one more item */
				4399	}
				4400	}
				4401	}
				4402	break;
				4403
				4404	case BRACE_SIMPLE:
				4405	case STAR:
				4406	case PLUS:
				4407	{
				4408	int nextb; /* next byte */
				4409	int nextb_ic; /* next byte reverse case */
				4410	long count;
				4411	regsave_T save;
				4412	long minval;
				4413	long maxval;
				4414
				4415	/*
				4416	* Lookahead to avoid useless match attempts when we know
				4417	* what character comes next.
				4418	*/
				4419	if (OP(next) == EXACTLY)
				4420	{
				4421	nextb = *OPERAND(next);
				4422	if (ireg_ic)
				4423	{
				4424	if (isupper(nextb))
				4425	nextb_ic = TOLOWER_LOC(nextb);
				4426	else
				4427	nextb_ic = TOUPPER_LOC(nextb);
				4428	}
				4429	else
				4430	nextb_ic = nextb;
				4431	}
				4432	else
				4433	{
				4434	nextb = NUL;
				4435	nextb_ic = NUL;
				4436	}
				4437	if (op != BRACE_SIMPLE)
				4438	{
				4439	minval = (op == STAR) ? 0 : 1;
				4440	maxval = MAX_LIMIT;
				4441	}
				4442	else
				4443	{
				4444	minval = bl_minval;
				4445	maxval = bl_maxval;
				4446	}
				4447
				4448	/*
				4449	* When maxval > minval, try matching as much as possible, up
				4450	* to maxval. When maxval < minval, try matching at least the
				4451	* minimal number (since the range is backwards, that's also
				4452	* maxval!).
				4453	*/
				4454	count = regrepeat(OPERAND(scan), maxval);
				4455	if (got_int)
				4456	return FALSE;
				4457	if (minval <= maxval)
				4458	{
				4459	/* Range is the normal way around, use longest match */
				4460	while (count >= minval)
				4461	{
				4462	/* If it could match, try it. */
				4463	if (nextb == NUL \|\| *reginput == nextb
				4464	\|\| *reginput == nextb_ic)
				4465	{
				4466	reg_save(&save);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4467	if (regmatch(next, startp))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4468	return TRUE;
				4469	reg_restore(&save);
				4470	}
				4471	/* Couldn't or didn't match -- back up one char. */
				4472	if (--count < minval)
				4473	break;
				4474	if (reginput == regline)
				4475	{
				4476	/* backup to last char of previous line */
				4477	--reglnum;
				4478	regline = reg_getline(reglnum);
				4479	/* Just in case regrepeat() didn't count right. */
				4480	if (regline == NULL)
				4481	return FALSE;
				4482	reginput = regline + STRLEN(regline);
				4483	fast_breakcheck();
				4484	if (got_int \|\| out_of_stack)
				4485	return FALSE;
				4486	}
				4487	else
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4488	mb_ptr_back(regline, reginput);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4489	}
				4490	}
				4491	else
				4492	{
				4493	/* Range is backwards, use shortest match first.
				4494	* Careful: maxval and minval are exchanged! */
				4495	if (count < maxval)
				4496	return FALSE;
				4497	for (;;)
				4498	{
				4499	/* If it could work, try it. */
				4500	if (nextb == NUL \|\| *reginput == nextb
				4501	\|\| *reginput == nextb_ic)
				4502	{
				4503	reg_save(&save);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4504	if (regmatch(next, &save))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4505	return TRUE;
				4506	reg_restore(&save);
				4507	}
				4508	/* Couldn't or didn't match: try advancing one char. */
				4509	if (count == minval
				4510	\|\| regrepeat(OPERAND(scan), 1L) == 0)
				4511	break;
				4512	++count;
				4513	if (got_int \|\| out_of_stack)
				4514	return FALSE;
				4515	}
				4516	}
				4517	return FALSE;
				4518	}
				4519	/* break; Not Reached */
				4520
				4521	case NOMATCH:
				4522	{
				4523	regsave_T save;
				4524
				4525	/* If the operand matches, we fail. Otherwise backup and
				4526	* continue with the next item. */
				4527	reg_save(&save);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4528	if (regmatch(OPERAND(scan), startp))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4529	return FALSE;
				4530	reg_restore(&save);
				4531	}
				4532	break;
				4533
				4534	case MATCH:
				4535	case SUBPAT:
				4536	{
				4537	regsave_T save;
				4538
				4539	/* If the operand doesn't match, we fail. Otherwise backup
				4540	* and continue with the next item. */
				4541	reg_save(&save);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4542	if (!regmatch(OPERAND(scan), startp))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4543	return FALSE;
				4544	if (op == MATCH) /* zero-width */
				4545	reg_restore(&save);
				4546	}
				4547	break;
				4548
				4549	case BEHIND:
				4550	case NOBEHIND:
				4551	{
				4552	regsave_T save_after, save_start;
				4553	regsave_T save_behind_pos;
				4554	int needmatch = (op == BEHIND);
				4555
				4556	/*
				4557	* Look back in the input of the operand matches or not. This
				4558	* must be done at every position in the input and checking if
				4559	* the match ends at the current position.
				4560	* First check if the next item matches, that's probably
				4561	* faster.
				4562	*/
				4563	reg_save(&save_start);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4564	if (regmatch(next, startp))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4565	{
				4566	/* save the position after the found match for next */
				4567	reg_save(&save_after);
				4568
				4569	/* start looking for a match with operand at the current
				4570	* postion. Go back one character until we find the
				4571	* result, hitting the start of the line or the previous
				4572	* line (for multi-line matching).
				4573	* Set behind_pos to where the match should end, BHPOS
				4574	* will match it. */
				4575	save_behind_pos = behind_pos;
				4576	behind_pos = save_start;
				4577	for (;;)
				4578	{
				4579	reg_restore(&save_start);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4580	if (regmatch(OPERAND(scan), startp)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4581	&& reg_save_equal(&behind_pos))
				4582	{
				4583	behind_pos = save_behind_pos;
				4584	/* found a match that ends where "next" started */
				4585	if (needmatch)
				4586	{
				4587	reg_restore(&save_after);
				4588	return TRUE;
				4589	}
				4590	return FALSE;
				4591	}
				4592	/*
				4593	* No match: Go back one character. May go to
				4594	* previous line once.
				4595	*/
				4596	if (REG_MULTI)
				4597	{
				4598	if (save_start.rs_u.pos.col == 0)
				4599	{
				4600	if (save_start.rs_u.pos.lnum
				4601	< behind_pos.rs_u.pos.lnum
				4602	\|\| reg_getline(
				4603	--save_start.rs_u.pos.lnum) == NULL)
				4604	break;
				4605	reg_restore(&save_start);
				4606	save_start.rs_u.pos.col =
				4607	(colnr_T)STRLEN(regline);
				4608	}
				4609	else
				4610	--save_start.rs_u.pos.col;
				4611	}
				4612	else
				4613	{
				4614	if (save_start.rs_u.ptr == regline)
				4615	break;
				4616	--save_start.rs_u.ptr;
				4617	}
				4618	}
				4619
				4620	/* NOBEHIND succeeds when no match was found */
				4621	behind_pos = save_behind_pos;
				4622	if (!needmatch)
				4623	{
				4624	reg_restore(&save_after);
				4625	return TRUE;
				4626	}
				4627	}
				4628	return FALSE;
				4629	}
				4630
				4631	case BHPOS:
				4632	if (REG_MULTI)
				4633	{
				4634	if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline)
				4635	\|\| behind_pos.rs_u.pos.lnum != reglnum)
				4636	return FALSE;
				4637	}
				4638	else if (behind_pos.rs_u.ptr != reginput)
				4639	return FALSE;
				4640	break;
				4641
				4642	case NEWL:
				4643	if ((c != NUL \|\| reglnum == reg_maxline)
				4644	&& (c != '\n' \|\| !reg_line_lbr))
				4645	return FALSE;
				4646	if (reg_line_lbr)
				4647	ADVANCE_REGINPUT();
				4648	else
				4649	reg_nextline();
				4650	break;
				4651
				4652	case END:
				4653	return TRUE; /* Success! */
				4654
				4655	default:
				4656	EMSG(_(e_re_corr));
				4657	#ifdef DEBUG
				4658	printf("Illegal op code %d\n", op);
				4659	#endif
				4660	return FALSE;
				4661	}
				4662	}
				4663
				4664	scan = next;
				4665	}
				4666
				4667	/*
				4668	* We get here only if there's trouble -- normally "case END" is the
				4669	* terminating point.
				4670	*/
				4671	EMSG(_(e_re_corr));
				4672	#ifdef DEBUG
				4673	printf("Premature EOL\n");
				4674	#endif
				4675	return FALSE;
				4676	}
				4677
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4678	/*
				4679	* regrepeat - repeatedly match something simple, return how many.
				4680	* Advances reginput (and reglnum) to just after the matched chars.
				4681	*/
				4682	static int
				4683	regrepeat(p, maxcount)
				4684	char_u *p;
				4685	long maxcount; /* maximum number of matches allowed */
				4686	{
				4687	long count = 0;
				4688	char_u *scan;
				4689	char_u *opnd;
				4690	int mask;
				4691	int testval = 0;
				4692
				4693	scan = reginput; /* Make local copy of reginput for speed. */
				4694	opnd = OPERAND(p);
				4695	switch (OP(p))
				4696	{
				4697	case ANY:
				4698	case ANY + ADD_NL:
				4699	while (count < maxcount)
				4700	{
				4701	/* Matching anything means we continue until end-of-line (or
				4702	* end-of-file for ANY + ADD_NL), only limited by maxcount. */
				4703	while (*scan != NUL && count < maxcount)
				4704	{
				4705	++count;
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4706	mb_ptr_adv(scan);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4707	}
				4708	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline \|\| count == maxcount)
				4709	break;
				4710	++count; /* count the line-break */
				4711	reg_nextline();
				4712	scan = reginput;
				4713	if (got_int)
				4714	break;
				4715	}
				4716	break;
				4717
				4718	case IDENT:
				4719	case IDENT + ADD_NL:
				4720	testval = TRUE;
				4721	/FALLTHROUGH/
				4722	case SIDENT:
				4723	case SIDENT + ADD_NL:
				4724	while (count < maxcount)
				4725	{
				4726	if (vim_isIDc(scan) && (testval \|\| !VIM_ISDIGIT(scan)))
				4727	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4728	mb_ptr_adv(scan);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4729	}
				4730	else if (*scan == NUL)
				4731	{
				4732	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4733	break;
				4734	reg_nextline();
				4735	scan = reginput;
				4736	if (got_int)
				4737	break;
				4738	}
				4739	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4740	++scan;
				4741	else
				4742	break;
				4743	++count;
				4744	}
				4745	break;
				4746
				4747	case KWORD:
				4748	case KWORD + ADD_NL:
				4749	testval = TRUE;
				4750	/FALLTHROUGH/
				4751	case SKWORD:
				4752	case SKWORD + ADD_NL:
				4753	while (count < maxcount)
				4754	{
				4755	if (vim_iswordp(scan) && (testval \|\| !VIM_ISDIGIT(*scan)))
				4756	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4757	mb_ptr_adv(scan);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4758	}
				4759	else if (*scan == NUL)
				4760	{
				4761	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4762	break;
				4763	reg_nextline();
				4764	scan = reginput;
				4765	if (got_int)
				4766	break;
				4767	}
				4768	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4769	++scan;
				4770	else
				4771	break;
				4772	++count;
				4773	}
				4774	break;
				4775
				4776	case FNAME:
				4777	case FNAME + ADD_NL:
				4778	testval = TRUE;
				4779	/FALLTHROUGH/
				4780	case SFNAME:
				4781	case SFNAME + ADD_NL:
				4782	while (count < maxcount)
				4783	{
				4784	if (vim_isfilec(scan) && (testval \|\| !VIM_ISDIGIT(scan)))
				4785	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4786	mb_ptr_adv(scan);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4787	}
				4788	else if (*scan == NUL)
				4789	{
				4790	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4791	break;
				4792	reg_nextline();
				4793	scan = reginput;
				4794	if (got_int)
				4795	break;
				4796	}
				4797	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4798	++scan;
				4799	else
				4800	break;
				4801	++count;
				4802	}
				4803	break;
				4804
				4805	case PRINT:
				4806	case PRINT + ADD_NL:
				4807	testval = TRUE;
				4808	/FALLTHROUGH/
				4809	case SPRINT:
				4810	case SPRINT + ADD_NL:
				4811	while (count < maxcount)
				4812	{
				4813	if (*scan == NUL)
				4814	{
				4815	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4816	break;
				4817	reg_nextline();
				4818	scan = reginput;
				4819	if (got_int)
				4820	break;
				4821	}
				4822	else if (ptr2cells(scan) == 1 && (testval \|\| !VIM_ISDIGIT(*scan)))
				4823	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4824	mb_ptr_adv(scan);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4825	}
				4826	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4827	++scan;
				4828	else
				4829	break;
				4830	++count;
				4831	}
				4832	break;
				4833
				4834	case WHITE:
				4835	case WHITE + ADD_NL:
				4836	testval = mask = RI_WHITE;
				4837	do_class:
				4838	while (count < maxcount)
				4839	{
				4840	#ifdef FEAT_MBYTE
				4841	int l;
				4842	#endif
				4843	if (*scan == NUL)
				4844	{
				4845	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4846	break;
				4847	reg_nextline();
				4848	scan = reginput;
				4849	if (got_int)
				4850	break;
				4851	}
				4852	#ifdef FEAT_MBYTE
				4853	else if (has_mbyte && (l = (*mb_ptr2len_check)(scan)) > 1)
				4854	{
				4855	if (testval != 0)
				4856	break;
				4857	scan += l;
				4858	}
				4859	#endif
				4860	else if ((class_tab[*scan] & mask) == testval)
				4861	++scan;
				4862	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4863	++scan;
				4864	else
				4865	break;
				4866	++count;
				4867	}
				4868	break;
				4869
				4870	case NWHITE:
				4871	case NWHITE + ADD_NL:
				4872	mask = RI_WHITE;
				4873	goto do_class;
				4874	case DIGIT:
				4875	case DIGIT + ADD_NL:
				4876	testval = mask = RI_DIGIT;
				4877	goto do_class;
				4878	case NDIGIT:
				4879	case NDIGIT + ADD_NL:
				4880	mask = RI_DIGIT;
				4881	goto do_class;
				4882	case HEX:
				4883	case HEX + ADD_NL:
				4884	testval = mask = RI_HEX;
				4885	goto do_class;
				4886	case NHEX:
				4887	case NHEX + ADD_NL:
				4888	mask = RI_HEX;
				4889	goto do_class;
				4890	case OCTAL:
				4891	case OCTAL + ADD_NL:
				4892	testval = mask = RI_OCTAL;
				4893	goto do_class;
				4894	case NOCTAL:
				4895	case NOCTAL + ADD_NL:
				4896	mask = RI_OCTAL;
				4897	goto do_class;
				4898	case WORD:
				4899	case WORD + ADD_NL:
				4900	testval = mask = RI_WORD;
				4901	goto do_class;
				4902	case NWORD:
				4903	case NWORD + ADD_NL:
				4904	mask = RI_WORD;
				4905	goto do_class;
				4906	case HEAD:
				4907	case HEAD + ADD_NL:
				4908	testval = mask = RI_HEAD;
				4909	goto do_class;
				4910	case NHEAD:
				4911	case NHEAD + ADD_NL:
				4912	mask = RI_HEAD;
				4913	goto do_class;
				4914	case ALPHA:
				4915	case ALPHA + ADD_NL:
				4916	testval = mask = RI_ALPHA;
				4917	goto do_class;
				4918	case NALPHA:
				4919	case NALPHA + ADD_NL:
				4920	mask = RI_ALPHA;
				4921	goto do_class;
				4922	case LOWER:
				4923	case LOWER + ADD_NL:
				4924	testval = mask = RI_LOWER;
				4925	goto do_class;
				4926	case NLOWER:
				4927	case NLOWER + ADD_NL:
				4928	mask = RI_LOWER;
				4929	goto do_class;
				4930	case UPPER:
				4931	case UPPER + ADD_NL:
				4932	testval = mask = RI_UPPER;
				4933	goto do_class;
				4934	case NUPPER:
				4935	case NUPPER + ADD_NL:
				4936	mask = RI_UPPER;
				4937	goto do_class;
				4938
				4939	case EXACTLY:
				4940	{
				4941	int cu, cl;
				4942
				4943	/* This doesn't do a multi-byte character, because a MULTIBYTECODE
				4944	* would have been used for it. */
				4945	if (ireg_ic)
				4946	{
				4947	cu = TOUPPER_LOC(*opnd);
				4948	cl = TOLOWER_LOC(*opnd);
				4949	while (count < maxcount && (scan == cu \|\| scan == cl))
				4950	{
				4951	count++;
				4952	scan++;
				4953	}
				4954	}
				4955	else
				4956	{
				4957	cu = *opnd;
				4958	while (count < maxcount && *scan == cu)
				4959	{
				4960	count++;
				4961	scan++;
				4962	}
				4963	}
				4964	break;
				4965	}
				4966
				4967	#ifdef FEAT_MBYTE
				4968	case MULTIBYTECODE:
				4969	{
				4970	int i, len, cf = 0;
				4971
				4972	/* Safety check (just in case 'encoding' was changed since
				4973	* compiling the program). */
				4974	if ((len = (*mb_ptr2len_check)(opnd)) > 1)
				4975	{
				4976	if (ireg_ic && enc_utf8)
				4977	cf = utf_fold(utf_ptr2char(opnd));
				4978	while (count < maxcount)
				4979	{
				4980	for (i = 0; i < len; ++i)
				4981	if (opnd[i] != scan[i])
				4982	break;
				4983	if (i < len && (!ireg_ic \|\| !enc_utf8
				4984	\|\| utf_fold(utf_ptr2char(scan)) != cf))
				4985	break;
				4986	scan += len;
				4987	++count;
				4988	}
				4989	}
				4990	}
				4991	break;
				4992	#endif
				4993
				4994	case ANYOF:
				4995	case ANYOF + ADD_NL:
				4996	testval = TRUE;
				4997	/FALLTHROUGH/
				4998
				4999	case ANYBUT:
				5000	case ANYBUT + ADD_NL:
				5001	while (count < maxcount)
				5002	{
				5003	#ifdef FEAT_MBYTE
				5004	int len;
				5005	#endif
				5006	if (*scan == NUL)
				5007	{
				5008	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				5009	break;
				5010	reg_nextline();
				5011	scan = reginput;
				5012	if (got_int)
				5013	break;
				5014	}
				5015	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				5016	++scan;
				5017	#ifdef FEAT_MBYTE
				5018	else if (has_mbyte && (len = (*mb_ptr2len_check)(scan)) > 1)
				5019	{
				5020	if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
				5021	break;
				5022	scan += len;
				5023	}
				5024	#endif
				5025	else
				5026	{
				5027	if ((cstrchr(opnd, *scan) == NULL) == testval)
				5028	break;
				5029	++scan;
				5030	}
				5031	++count;
				5032	}
				5033	break;
				5034
				5035	case NEWL:
				5036	while (count < maxcount
				5037	&& ((*scan == NUL && reglnum < reg_maxline)
				5038	\|\| (*scan == '\n' && reg_line_lbr)))
				5039	{
				5040	count++;
				5041	if (reg_line_lbr)
				5042	ADVANCE_REGINPUT();
				5043	else
				5044	reg_nextline();
				5045	scan = reginput;
				5046	if (got_int)
				5047	break;
				5048	}
				5049	break;
				5050
				5051	default: /* Oh dear. Called inappropriately. */
				5052	EMSG(_(e_re_corr));
				5053	#ifdef DEBUG
				5054	printf("Called regrepeat with op code %d\n", OP(p));
				5055	#endif
				5056	break;
				5057	}
				5058
				5059	reginput = scan;
				5060
				5061	return (int)count;
				5062	}
				5063
				5064	/*
				5065	* regnext - dig the "next" pointer out of a node
				5066	*/
				5067	static char_u *
				5068	regnext(p)
				5069	char_u *p;
				5070	{
				5071	int offset;
				5072
				5073	if (p == JUST_CALC_SIZE)
				5074	return NULL;
				5075
				5076	offset = NEXT(p);
				5077	if (offset == 0)
				5078	return NULL;
				5079
				5080	if (OP(p) == BACK)
				5081	return p - offset;
				5082	else
				5083	return p + offset;
				5084	}
				5085
				5086	/*
				5087	* Check the regexp program for its magic number.
				5088	* Return TRUE if it's wrong.
				5089	*/
				5090	static int
				5091	prog_magic_wrong()
				5092	{
				5093	if (UCHARAT(REG_MULTI
				5094	? reg_mmatch->regprog->program
				5095	: reg_match->regprog->program) != REGMAGIC)
				5096	{
				5097	EMSG(_(e_re_corr));
				5098	return TRUE;
				5099	}
				5100	return FALSE;
				5101	}
				5102
				5103	/*
				5104	* Cleanup the subexpressions, if this wasn't done yet.
				5105	* This construction is used to clear the subexpressions only when they are
				5106	* used (to increase speed).
				5107	*/
				5108	static void
				5109	cleanup_subexpr()
				5110	{
				5111	if (need_clear_subexpr)
				5112	{
				5113	if (REG_MULTI)
				5114	{
				5115	/* Use 0xff to set lnum to -1 */
				5116	vim_memset(reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				5117	vim_memset(reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				5118	}
				5119	else
				5120	{
				5121	vim_memset(reg_startp, 0, sizeof(char_u ) NSUBEXP);
				5122	vim_memset(reg_endp, 0, sizeof(char_u ) NSUBEXP);
				5123	}
				5124	need_clear_subexpr = FALSE;
				5125	}
				5126	}
				5127
				5128	#ifdef FEAT_SYN_HL
				5129	static void
				5130	cleanup_zsubexpr()
				5131	{
				5132	if (need_clear_zsubexpr)
				5133	{
				5134	if (REG_MULTI)
				5135	{
				5136	/* Use 0xff to set lnum to -1 */
				5137	vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				5138	vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				5139	}
				5140	else
				5141	{
				5142	vim_memset(reg_startzp, 0, sizeof(char_u ) NSUBEXP);
				5143	vim_memset(reg_endzp, 0, sizeof(char_u ) NSUBEXP);
				5144	}
				5145	need_clear_zsubexpr = FALSE;
				5146	}
				5147	}
				5148	#endif
				5149
				5150	/*
				5151	* Advance reglnum, regline and reginput to the next line.
				5152	*/
				5153	static void
				5154	reg_nextline()
				5155	{
				5156	regline = reg_getline(++reglnum);
				5157	reginput = regline;
				5158	fast_breakcheck();
				5159	}
				5160
				5161	/*
				5162	* Save the input line and position in a regsave_T.
				5163	*/
				5164	static void
				5165	reg_save(save)
				5166	regsave_T *save;
				5167	{
				5168	if (REG_MULTI)
				5169	{
				5170	save->rs_u.pos.col = (colnr_T)(reginput - regline);
				5171	save->rs_u.pos.lnum = reglnum;
				5172	}
				5173	else
				5174	save->rs_u.ptr = reginput;
				5175	}
				5176
				5177	/*
				5178	* Restore the input line and position from a regsave_T.
				5179	*/
				5180	static void
				5181	reg_restore(save)
				5182	regsave_T *save;
				5183	{
				5184	if (REG_MULTI)
				5185	{
				5186	if (reglnum != save->rs_u.pos.lnum)
				5187	{
				5188	/* only call reg_getline() when the line number changed to save
				5189	* a bit of time */
				5190	reglnum = save->rs_u.pos.lnum;
				5191	regline = reg_getline(reglnum);
				5192	}
				5193	reginput = regline + save->rs_u.pos.col;
				5194	}
				5195	else
				5196	reginput = save->rs_u.ptr;
				5197	}
				5198
				5199	/*
				5200	* Return TRUE if current position is equal to saved position.
				5201	*/
				5202	static int
				5203	reg_save_equal(save)
				5204	regsave_T *save;
				5205	{
				5206	if (REG_MULTI)
				5207	return reglnum == save->rs_u.pos.lnum
				5208	&& reginput == regline + save->rs_u.pos.col;
				5209	return reginput == save->rs_u.ptr;
				5210	}
				5211
				5212	/*
				5213	* Tentatively set the sub-expression start to the current position (after
				5214	* calling regmatch() they will have changed). Need to save the existing
				5215	* values for when there is no match.
				5216	* Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
				5217	* depending on REG_MULTI.
				5218	*/
				5219	static void
				5220	save_se_multi(savep, posp)
				5221	save_se_T *savep;
				5222	lpos_T *posp;
				5223	{
				5224	savep->se_u.pos = *posp;
				5225	posp->lnum = reglnum;
				5226	posp->col = (colnr_T)(reginput - regline);
				5227	}
				5228
				5229	static void
				5230	save_se_one(savep, pp)
				5231	save_se_T *savep;
				5232	char_u **pp;
				5233	{
				5234	savep->se_u.ptr = *pp;
				5235	*pp = reginput;
				5236	}
				5237
				5238	/*
				5239	* Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
				5240	*/
				5241	static int
				5242	re_num_cmp(val, scan)
				5243	long_u val;
				5244	char_u *scan;
				5245	{
				5246	long_u n = OPERAND_MIN(scan);
				5247
				5248	if (OPERAND_CMP(scan) == '>')
				5249	return val > n;
				5250	if (OPERAND_CMP(scan) == '<')
				5251	return val < n;
				5252	return val == n;
				5253	}
				5254
				5255
				5256	#ifdef DEBUG
				5257
				5258	/*
				5259	* regdump - dump a regexp onto stdout in vaguely comprehensible form
				5260	*/
				5261	static void
				5262	regdump(pattern, r)
				5263	char_u *pattern;
				5264	regprog_T *r;
				5265	{
				5266	char_u *s;
				5267	int op = EXACTLY; /* Arbitrary non-END op. */
				5268	char_u *next;
				5269	char_u *end = NULL;
				5270
				5271	printf("\r\nregcomp(%s):\r\n", pattern);
				5272
				5273	s = r->program + 1;
				5274	/*
				5275	* Loop until we find the END that isn't before a referred next (an END
				5276	* can also appear in a NOMATCH operand).
				5277	*/
				5278	while (op != END \|\| s <= end)
				5279	{
				5280	op = OP(s);
				5281	printf("%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
				5282	next = regnext(s);
				5283	if (next == NULL) /* Next ptr. */
				5284	printf("(0)");
				5285	else
				5286	printf("(%d)", (int)((s - r->program) + (next - s)));
				5287	if (end < next)
				5288	end = next;
				5289	if (op == BRACE_LIMITS)
				5290	{
				5291	/* Two short ints */
				5292	printf(" minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
				5293	s += 8;
				5294	}
				5295	s += 3;
				5296	if (op == ANYOF \|\| op == ANYOF + ADD_NL
				5297	\|\| op == ANYBUT \|\| op == ANYBUT + ADD_NL
				5298	\|\| op == EXACTLY)
				5299	{
				5300	/* Literal string, where present. */
				5301	while (*s != NUL)
				5302	printf("%c", *s++);
				5303	s++;
				5304	}
				5305	printf("\r\n");
				5306	}
				5307
				5308	/* Header fields of interest. */
				5309	if (r->regstart != NUL)
				5310	printf("start `%s' 0x%x; ", r->regstart < 256
				5311	? (char *)transchar(r->regstart)
				5312	: "multibyte", r->regstart);
				5313	if (r->reganch)
				5314	printf("anchored; ");
				5315	if (r->regmust != NULL)
				5316	printf("must have \"%s\"", r->regmust);
				5317	printf("\r\n");
				5318	}
				5319
				5320	/*
				5321	* regprop - printable representation of opcode
				5322	*/
				5323	static char_u *
				5324	regprop(op)
				5325	char_u *op;
				5326	{
				5327	char_u *p;
				5328	static char_u buf[50];
				5329
				5330	(void) strcpy(buf, ":");
				5331
				5332	switch (OP(op))
				5333	{
				5334	case BOL:
				5335	p = "BOL";
				5336	break;
				5337	case EOL:
				5338	p = "EOL";
				5339	break;
				5340	case RE_BOF:
				5341	p = "BOF";
				5342	break;
				5343	case RE_EOF:
				5344	p = "EOF";
				5345	break;
				5346	case CURSOR:
				5347	p = "CURSOR";
				5348	break;
				5349	case RE_LNUM:
				5350	p = "RE_LNUM";
				5351	break;
				5352	case RE_COL:
				5353	p = "RE_COL";
				5354	break;
				5355	case RE_VCOL:
				5356	p = "RE_VCOL";
				5357	break;
				5358	case BOW:
				5359	p = "BOW";
				5360	break;
				5361	case EOW:
				5362	p = "EOW";
				5363	break;
				5364	case ANY:
				5365	p = "ANY";
				5366	break;
				5367	case ANY + ADD_NL:
				5368	p = "ANY+NL";
				5369	break;
				5370	case ANYOF:
				5371	p = "ANYOF";
				5372	break;
				5373	case ANYOF + ADD_NL:
				5374	p = "ANYOF+NL";
				5375	break;
				5376	case ANYBUT:
				5377	p = "ANYBUT";
				5378	break;
				5379	case ANYBUT + ADD_NL:
				5380	p = "ANYBUT+NL";
				5381	break;
				5382	case IDENT:
				5383	p = "IDENT";
				5384	break;
				5385	case IDENT + ADD_NL:
				5386	p = "IDENT+NL";
				5387	break;
				5388	case SIDENT:
				5389	p = "SIDENT";
				5390	break;
				5391	case SIDENT + ADD_NL:
				5392	p = "SIDENT+NL";
				5393	break;
				5394	case KWORD:
				5395	p = "KWORD";
				5396	break;
				5397	case KWORD + ADD_NL:
				5398	p = "KWORD+NL";
				5399	break;
				5400	case SKWORD:
				5401	p = "SKWORD";
				5402	break;
				5403	case SKWORD + ADD_NL:
				5404	p = "SKWORD+NL";
				5405	break;
				5406	case FNAME:
				5407	p = "FNAME";
				5408	break;
				5409	case FNAME + ADD_NL:
				5410	p = "FNAME+NL";
				5411	break;
				5412	case SFNAME:
				5413	p = "SFNAME";
				5414	break;
				5415	case SFNAME + ADD_NL:
				5416	p = "SFNAME+NL";
				5417	break;
				5418	case PRINT:
				5419	p = "PRINT";
				5420	break;
				5421	case PRINT + ADD_NL:
				5422	p = "PRINT+NL";
				5423	break;
				5424	case SPRINT:
				5425	p = "SPRINT";
				5426	break;
				5427	case SPRINT + ADD_NL:
				5428	p = "SPRINT+NL";
				5429	break;
				5430	case WHITE:
				5431	p = "WHITE";
				5432	break;
				5433	case WHITE + ADD_NL:
				5434	p = "WHITE+NL";
				5435	break;
				5436	case NWHITE:
				5437	p = "NWHITE";
				5438	break;
				5439	case NWHITE + ADD_NL:
				5440	p = "NWHITE+NL";
				5441	break;
				5442	case DIGIT:
				5443	p = "DIGIT";
				5444	break;
				5445	case DIGIT + ADD_NL:
				5446	p = "DIGIT+NL";
				5447	break;
				5448	case NDIGIT:
				5449	p = "NDIGIT";
				5450	break;
				5451	case NDIGIT + ADD_NL:
				5452	p = "NDIGIT+NL";
				5453	break;
				5454	case HEX:
				5455	p = "HEX";
				5456	break;
				5457	case HEX + ADD_NL:
				5458	p = "HEX+NL";
				5459	break;
				5460	case NHEX:
				5461	p = "NHEX";
				5462	break;
				5463	case NHEX + ADD_NL:
				5464	p = "NHEX+NL";
				5465	break;
				5466	case OCTAL:
				5467	p = "OCTAL";
				5468	break;
				5469	case OCTAL + ADD_NL:
				5470	p = "OCTAL+NL";
				5471	break;
				5472	case NOCTAL:
				5473	p = "NOCTAL";
				5474	break;
				5475	case NOCTAL + ADD_NL:
				5476	p = "NOCTAL+NL";
				5477	break;
				5478	case WORD:
				5479	p = "WORD";
				5480	break;
				5481	case WORD + ADD_NL:
				5482	p = "WORD+NL";
				5483	break;
				5484	case NWORD:
				5485	p = "NWORD";
				5486	break;
				5487	case NWORD + ADD_NL:
				5488	p = "NWORD+NL";
				5489	break;
				5490	case HEAD:
				5491	p = "HEAD";
				5492	break;
				5493	case HEAD + ADD_NL:
				5494	p = "HEAD+NL";
				5495	break;
				5496	case NHEAD:
				5497	p = "NHEAD";
				5498	break;
				5499	case NHEAD + ADD_NL:
				5500	p = "NHEAD+NL";
				5501	break;
				5502	case ALPHA:
				5503	p = "ALPHA";
				5504	break;
				5505	case ALPHA + ADD_NL:
				5506	p = "ALPHA+NL";
				5507	break;
				5508	case NALPHA:
				5509	p = "NALPHA";
				5510	break;
				5511	case NALPHA + ADD_NL:
				5512	p = "NALPHA+NL";
				5513	break;
				5514	case LOWER:
				5515	p = "LOWER";
				5516	break;
				5517	case LOWER + ADD_NL:
				5518	p = "LOWER+NL";
				5519	break;
				5520	case NLOWER:
				5521	p = "NLOWER";
				5522	break;
				5523	case NLOWER + ADD_NL:
				5524	p = "NLOWER+NL";
				5525	break;
				5526	case UPPER:
				5527	p = "UPPER";
				5528	break;
				5529	case UPPER + ADD_NL:
				5530	p = "UPPER+NL";
				5531	break;
				5532	case NUPPER:
				5533	p = "NUPPER";
				5534	break;
				5535	case NUPPER + ADD_NL:
				5536	p = "NUPPER+NL";
				5537	break;
				5538	case BRANCH:
				5539	p = "BRANCH";
				5540	break;
				5541	case EXACTLY:
				5542	p = "EXACTLY";
				5543	break;
				5544	case NOTHING:
				5545	p = "NOTHING";
				5546	break;
				5547	case BACK:
				5548	p = "BACK";
				5549	break;
				5550	case END:
				5551	p = "END";
				5552	break;
				5553	case MOPEN + 0:
				5554	p = "MATCH START";
				5555	break;
				5556	case MOPEN + 1:
				5557	case MOPEN + 2:
				5558	case MOPEN + 3:
				5559	case MOPEN + 4:
				5560	case MOPEN + 5:
				5561	case MOPEN + 6:
				5562	case MOPEN + 7:
				5563	case MOPEN + 8:
				5564	case MOPEN + 9:
				5565	sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
				5566	p = NULL;
				5567	break;
				5568	case MCLOSE + 0:
				5569	p = "MATCH END";
				5570	break;
				5571	case MCLOSE + 1:
				5572	case MCLOSE + 2:
				5573	case MCLOSE + 3:
				5574	case MCLOSE + 4:
				5575	case MCLOSE + 5:
				5576	case MCLOSE + 6:
				5577	case MCLOSE + 7:
				5578	case MCLOSE + 8:
				5579	case MCLOSE + 9:
				5580	sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
				5581	p = NULL;
				5582	break;
				5583	case BACKREF + 1:
				5584	case BACKREF + 2:
				5585	case BACKREF + 3:
				5586	case BACKREF + 4:
				5587	case BACKREF + 5:
				5588	case BACKREF + 6:
				5589	case BACKREF + 7:
				5590	case BACKREF + 8:
				5591	case BACKREF + 9:
				5592	sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
				5593	p = NULL;
				5594	break;
				5595	case NOPEN:
				5596	p = "NOPEN";
				5597	break;
				5598	case NCLOSE:
				5599	p = "NCLOSE";
				5600	break;
				5601	#ifdef FEAT_SYN_HL
				5602	case ZOPEN + 1:
				5603	case ZOPEN + 2:
				5604	case ZOPEN + 3:
				5605	case ZOPEN + 4:
				5606	case ZOPEN + 5:
				5607	case ZOPEN + 6:
				5608	case ZOPEN + 7:
				5609	case ZOPEN + 8:
				5610	case ZOPEN + 9:
				5611	sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
				5612	p = NULL;
				5613	break;
				5614	case ZCLOSE + 1:
				5615	case ZCLOSE + 2:
				5616	case ZCLOSE + 3:
				5617	case ZCLOSE + 4:
				5618	case ZCLOSE + 5:
				5619	case ZCLOSE + 6:
				5620	case ZCLOSE + 7:
				5621	case ZCLOSE + 8:
				5622	case ZCLOSE + 9:
				5623	sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
				5624	p = NULL;
				5625	break;
				5626	case ZREF + 1:
				5627	case ZREF + 2:
				5628	case ZREF + 3:
				5629	case ZREF + 4:
				5630	case ZREF + 5:
				5631	case ZREF + 6:
				5632	case ZREF + 7:
				5633	case ZREF + 8:
				5634	case ZREF + 9:
				5635	sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
				5636	p = NULL;
				5637	break;
				5638	#endif
				5639	case STAR:
				5640	p = "STAR";
				5641	break;
				5642	case PLUS:
				5643	p = "PLUS";
				5644	break;
				5645	case NOMATCH:
				5646	p = "NOMATCH";
				5647	break;
				5648	case MATCH:
				5649	p = "MATCH";
				5650	break;
				5651	case BEHIND:
				5652	p = "BEHIND";
				5653	break;
				5654	case NOBEHIND:
				5655	p = "NOBEHIND";
				5656	break;
				5657	case SUBPAT:
				5658	p = "SUBPAT";
				5659	break;
				5660	case BRACE_LIMITS:
				5661	p = "BRACE_LIMITS";
				5662	break;
				5663	case BRACE_SIMPLE:
				5664	p = "BRACE_SIMPLE";
				5665	break;
				5666	case BRACE_COMPLEX + 0:
				5667	case BRACE_COMPLEX + 1:
				5668	case BRACE_COMPLEX + 2:
				5669	case BRACE_COMPLEX + 3:
				5670	case BRACE_COMPLEX + 4:
				5671	case BRACE_COMPLEX + 5:
				5672	case BRACE_COMPLEX + 6:
				5673	case BRACE_COMPLEX + 7:
				5674	case BRACE_COMPLEX + 8:
				5675	case BRACE_COMPLEX + 9:
				5676	sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
				5677	p = NULL;
				5678	break;
				5679	#ifdef FEAT_MBYTE
				5680	case MULTIBYTECODE:
				5681	p = "MULTIBYTECODE";
				5682	break;
				5683	#endif
				5684	case NEWL:
				5685	p = "NEWL";
				5686	break;
				5687	default:
				5688	sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
				5689	p = NULL;
				5690	break;
				5691	}
				5692	if (p != NULL)
				5693	(void) strcat(buf, p);
				5694	return buf;
				5695	}
				5696	#endif
				5697
				5698	#ifdef FEAT_MBYTE
				5699	static void mb_decompose __ARGS((int c, int c1, int c2, int *c3));
				5700
				5701	typedef struct
				5702	{
				5703	int a, b, c;
				5704	} decomp_T;
				5705
				5706
				5707	/* 0xfb20 - 0xfb4f */
				5708	decomp_T decomp_table[0xfb4f-0xfb20+1] =
				5709	{
				5710	{0x5e2,0,0}, /* 0xfb20 alt ayin */
				5711	{0x5d0,0,0}, /* 0xfb21 alt alef */
				5712	{0x5d3,0,0}, /* 0xfb22 alt dalet */
				5713	{0x5d4,0,0}, /* 0xfb23 alt he */
				5714	{0x5db,0,0}, /* 0xfb24 alt kaf */
				5715	{0x5dc,0,0}, /* 0xfb25 alt lamed */
				5716	{0x5dd,0,0}, /* 0xfb26 alt mem-sofit */
				5717	{0x5e8,0,0}, /* 0xfb27 alt resh */
				5718	{0x5ea,0,0}, /* 0xfb28 alt tav */
				5719	{'+', 0, 0}, /* 0xfb29 alt plus */
				5720	{0x5e9, 0x5c1, 0}, /* 0xfb2a shin+shin-dot */
				5721	{0x5e9, 0x5c2, 0}, /* 0xfb2b shin+sin-dot */
				5722	{0x5e9, 0x5c1, 0x5bc}, /* 0xfb2c shin+shin-dot+dagesh */
				5723	{0x5e9, 0x5c2, 0x5bc}, /* 0xfb2d shin+sin-dot+dagesh */
				5724	{0x5d0, 0x5b7, 0}, /* 0xfb2e alef+patah */
				5725	{0x5d0, 0x5b8, 0}, /* 0xfb2f alef+qamats */
				5726	{0x5d0, 0x5b4, 0}, /* 0xfb30 alef+hiriq */
				5727	{0x5d1, 0x5bc, 0}, /* 0xfb31 bet+dagesh */
				5728	{0x5d2, 0x5bc, 0}, /* 0xfb32 gimel+dagesh */
				5729	{0x5d3, 0x5bc, 0}, /* 0xfb33 dalet+dagesh */
				5730	{0x5d4, 0x5bc, 0}, /* 0xfb34 he+dagesh */
				5731	{0x5d5, 0x5bc, 0}, /* 0xfb35 vav+dagesh */
				5732	{0x5d6, 0x5bc, 0}, /* 0xfb36 zayin+dagesh */
				5733	{0xfb37, 0, 0}, /* 0xfb37 -- UNUSED */
				5734	{0x5d8, 0x5bc, 0}, /* 0xfb38 tet+dagesh */
				5735	{0x5d9, 0x5bc, 0}, /* 0xfb39 yud+dagesh */
				5736	{0x5da, 0x5bc, 0}, /* 0xfb3a kaf sofit+dagesh */
				5737	{0x5db, 0x5bc, 0}, /* 0xfb3b kaf+dagesh */
				5738	{0x5dc, 0x5bc, 0}, /* 0xfb3c lamed+dagesh */
				5739	{0xfb3d, 0, 0}, /* 0xfb3d -- UNUSED */
				5740	{0x5de, 0x5bc, 0}, /* 0xfb3e mem+dagesh */
				5741	{0xfb3f, 0, 0}, /* 0xfb3f -- UNUSED */
				5742	{0x5e0, 0x5bc, 0}, /* 0xfb40 nun+dagesh */
				5743	{0x5e1, 0x5bc, 0}, /* 0xfb41 samech+dagesh */
				5744	{0xfb42, 0, 0}, /* 0xfb42 -- UNUSED */
				5745	{0x5e3, 0x5bc, 0}, /* 0xfb43 pe sofit+dagesh */
				5746	{0x5e4, 0x5bc,0}, /* 0xfb44 pe+dagesh */
				5747	{0xfb45, 0, 0}, /* 0xfb45 -- UNUSED */
				5748	{0x5e6, 0x5bc, 0}, /* 0xfb46 tsadi+dagesh */
				5749	{0x5e7, 0x5bc, 0}, /* 0xfb47 qof+dagesh */
				5750	{0x5e8, 0x5bc, 0}, /* 0xfb48 resh+dagesh */
				5751	{0x5e9, 0x5bc, 0}, /* 0xfb49 shin+dagesh */
				5752	{0x5ea, 0x5bc, 0}, /* 0xfb4a tav+dagesh */
				5753	{0x5d5, 0x5b9, 0}, /* 0xfb4b vav+holam */
				5754	{0x5d1, 0x5bf, 0}, /* 0xfb4c bet+rafe */
				5755	{0x5db, 0x5bf, 0}, /* 0xfb4d kaf+rafe */
				5756	{0x5e4, 0x5bf, 0}, /* 0xfb4e pe+rafe */
				5757	{0x5d0, 0x5dc, 0} /* 0xfb4f alef-lamed */
				5758	};
				5759
				5760	static void
				5761	mb_decompose(c, c1, c2, c3)
				5762	int c, c1, c2, *c3;
				5763	{
				5764	decomp_T d;
				5765
				5766	if (c >= 0x4b20 && c <= 0xfb4f)
				5767	{
				5768	d = decomp_table[c - 0xfb20];
				5769	*c1 = d.a;
				5770	*c2 = d.b;
				5771	*c3 = d.c;
				5772	}
				5773	else
				5774	{
				5775	*c1 = c;
				5776	c2 = c3 = 0;
				5777	}
				5778	}
				5779	#endif
				5780
				5781	/*
				5782	* Compare two strings, ignore case if ireg_ic set.
				5783	* Return 0 if strings match, non-zero otherwise.
				5784	* Correct the length "*n" when composing characters are ignored.
				5785	*/
				5786	static int
				5787	cstrncmp(s1, s2, n)
				5788	char_u s1, s2;
				5789	int *n;
				5790	{
				5791	int result;
				5792
				5793	if (!ireg_ic)
				5794	result = STRNCMP(s1, s2, *n);
				5795	else
				5796	result = MB_STRNICMP(s1, s2, *n);
				5797
				5798	#ifdef FEAT_MBYTE
				5799	/* if it failed and it's utf8 and we want to combineignore: */
				5800	if (result != 0 && enc_utf8 && ireg_icombine)
				5801	{
				5802	char_u str1, str2;
				5803	int c1, c2, c11, c12;
				5804	int ix;
				5805	int junk;
				5806
				5807	/* we have to handle the strcmp ourselves, since it is necessary to
				5808	* deal with the composing characters by ignoring them: */
				5809	str1 = s1;
				5810	str2 = s2;
				5811	c1 = c2 = 0;
				5812	for (ix = 0; ix < *n; )
				5813	{
				5814	c1 = mb_ptr2char_adv(&str1);
				5815	c2 = mb_ptr2char_adv(&str2);
				5816	ix += utf_char2len(c1);
				5817
				5818	/* decompose the character if necessary, into 'base' characters
				5819	* because I don't care about Arabic, I will hard-code the Hebrew
				5820	* which I do care about! So sue me... */
				5821	if (c1 != c2 && (!ireg_ic \|\| utf_fold(c1) != utf_fold(c2)))
				5822	{
				5823	/* decomposition necessary? */
				5824	mb_decompose(c1, &c11, &junk, &junk);
				5825	mb_decompose(c2, &c12, &junk, &junk);
				5826	c1 = c11;
				5827	c2 = c12;
				5828	if (c11 != c12 && (!ireg_ic \|\| utf_fold(c11) != utf_fold(c12)))
				5829	break;
				5830	}
				5831	}
				5832	result = c2 - c1;
				5833	if (result == 0)
				5834	*n = (int)(str2 - s2);
				5835	}
				5836	#endif
				5837
				5838	return result;
				5839	}
				5840
				5841	/*
				5842	* cstrchr: This function is used a lot for simple searches, keep it fast!
				5843	*/
				5844	static char_u *
				5845	cstrchr(s, c)
				5846	char_u *s;
				5847	int c;
				5848	{
				5849	char_u *p;
				5850	int cc;
				5851
				5852	if (!ireg_ic
				5853	#ifdef FEAT_MBYTE
				5854	\|\| (!enc_utf8 && mb_char2len(c) > 1)
				5855	#endif
				5856	)
				5857	return vim_strchr(s, c);
				5858
				5859	/* tolower() and toupper() can be slow, comparing twice should be a lot
				5860	* faster (esp. when using MS Visual C++!).
				5861	* For UTF-8 need to use folded case. */
				5862	#ifdef FEAT_MBYTE
				5863	if (enc_utf8 && c > 0x80)
				5864	cc = utf_fold(c);
				5865	else
				5866	#endif
				5867	if (isupper(c))
				5868	cc = TOLOWER_LOC(c);
				5869	else if (islower(c))
				5870	cc = TOUPPER_LOC(c);
				5871	else
				5872	return vim_strchr(s, c);
				5873
				5874	#ifdef FEAT_MBYTE
				5875	if (has_mbyte)
				5876	{
				5877	for (p = s; p != NUL; p += (mb_ptr2len_check)(p))
				5878	{
				5879	if (enc_utf8 && c > 0x80)
				5880	{
				5881	if (utf_fold(utf_ptr2char(p)) == cc)
				5882	return p;
				5883	}
				5884	else if (p == c \|\| p == cc)
				5885	return p;
				5886	}
				5887	}
				5888	else
				5889	#endif
				5890	/* Faster version for when there are no multi-byte characters. */
				5891	for (p = s; *p != NUL; ++p)
				5892	if (p == c \|\| p == cc)
				5893	return p;
				5894
				5895	return NULL;
				5896	}
				5897
				5898	/***************************************************************
				5899	* regsub stuff *
				5900	***************************************************************/
				5901
				5902	/* This stuff below really confuses cc on an SGI -- webb */
				5903	#ifdef __sgi
				5904	# undef __ARGS
				5905	# define __ARGS(x) ()
				5906	#endif
				5907
				5908	/*
				5909	* We should define ftpr as a pointer to a function returning a pointer to
				5910	* a function returning a pointer to a function ...
				5911	* This is impossible, so we declare a pointer to a function returning a
				5912	* pointer to a function returning void. This should work for all compilers.
				5913	*/
				5914	typedef void ((fptr) __ARGS((char_u *, int)))();
				5915
				5916	static fptr do_upper __ARGS((char_u *, int));
				5917	static fptr do_Upper __ARGS((char_u *, int));
				5918	static fptr do_lower __ARGS((char_u *, int));
				5919	static fptr do_Lower __ARGS((char_u *, int));
				5920
				5921	static int vim_regsub_both __ARGS((char_u source, char_u dest, int copy, int magic, int backslash));
				5922
				5923	static fptr
				5924	do_upper(d, c)
				5925	char_u *d;
				5926	int c;
				5927	{
				5928	*d = TOUPPER_LOC(c);
				5929
				5930	return (fptr)NULL;
				5931	}
				5932
				5933	static fptr
				5934	do_Upper(d, c)
				5935	char_u *d;
				5936	int c;
				5937	{
				5938	*d = TOUPPER_LOC(c);
				5939
				5940	return (fptr)do_Upper;
				5941	}
				5942
				5943	static fptr
				5944	do_lower(d, c)
				5945	char_u *d;
				5946	int c;
				5947	{
				5948	*d = TOLOWER_LOC(c);
				5949
				5950	return (fptr)NULL;
				5951	}
				5952
				5953	static fptr
				5954	do_Lower(d, c)
				5955	char_u *d;
				5956	int c;
				5957	{
				5958	*d = TOLOWER_LOC(c);
				5959
				5960	return (fptr)do_Lower;
				5961	}
				5962
				5963	/*
				5964	* regtilde(): Replace tildes in the pattern by the old pattern.
				5965	*
				5966	* Short explanation of the tilde: It stands for the previous replacement
				5967	* pattern. If that previous pattern also contains a ~ we should go back a
				5968	* step further... But we insert the previous pattern into the current one
				5969	* and remember that.
				5970	* This still does not handle the case where "magic" changes. TODO?
				5971	*
				5972	* The tildes are parsed once before the first call to vim_regsub().
				5973	*/
				5974	char_u *
				5975	regtilde(source, magic)
				5976	char_u *source;
				5977	int magic;
				5978	{
				5979	char_u *newsub = source;
				5980	char_u *tmpsub;
				5981	char_u *p;
				5982	int len;
				5983	int prevlen;
				5984
				5985	for (p = newsub; *p; ++p)
				5986	{
				5987	if ((p == '~' && magic) \|\| (p == '\\' && *(p + 1) == '~' && !magic))
				5988	{
				5989	if (reg_prev_sub != NULL)
				5990	{
				5991	/* length = len(newsub) - 1 + len(prev_sub) + 1 */
				5992	prevlen = (int)STRLEN(reg_prev_sub);
				5993	tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen));
				5994	if (tmpsub != NULL)
				5995	{
				5996	/* copy prefix */
				5997	len = (int)(p - newsub); /* not including ~ */
				5998	mch_memmove(tmpsub, newsub, (size_t)len);
				5999	/* interpretate tilde */
				6000	mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
				6001	/* copy postfix */
				6002	if (!magic)
				6003	++p; /* back off \ */
				6004	STRCPY(tmpsub + len + prevlen, p + 1);
				6005
				6006	if (newsub != source) /* already allocated newsub */
				6007	vim_free(newsub);
				6008	newsub = tmpsub;
				6009	p = newsub + len + prevlen;
				6010	}
				6011	}
				6012	else if (magic)
				6013	STRCPY(p, p + 1); /* remove '~' */
				6014	else
				6015	STRCPY(p, p + 2); /* remove '\~' */
				6016	--p;
				6017	}
				6018	else
				6019	{
				6020	if (p == '\\' && p[1]) / skip escaped characters */
				6021	++p;
				6022	#ifdef FEAT_MBYTE
				6023	if (has_mbyte)
				6024	p += (*mb_ptr2len_check)(p) - 1;
				6025	#endif
				6026	}
				6027	}
				6028
				6029	vim_free(reg_prev_sub);
				6030	if (newsub != source) /* newsub was allocated, just keep it */
				6031	reg_prev_sub = newsub;
				6032	else /* no ~ found, need to save newsub */
				6033	reg_prev_sub = vim_strsave(newsub);
				6034	return newsub;
				6035	}
				6036
				6037	#ifdef FEAT_EVAL
				6038	static int can_f_submatch = FALSE; /* TRUE when submatch() can be used */
				6039
				6040	/* These pointers are used instead of reg_match and reg_mmatch for
				6041	* reg_submatch(). Needed for when the substitution string is an expression
				6042	* that contains a call to substitute() and submatch(). */
				6043	static regmatch_T *submatch_match;
				6044	static regmmatch_T *submatch_mmatch;
				6045	#endif
				6046
				6047	#if defined(FEAT_MODIFY_FNAME) \|\| defined(FEAT_EVAL) \|\| defined(PROTO)
				6048	/*
				6049	* vim_regsub() - perform substitutions after a vim_regexec() or
				6050	* vim_regexec_multi() match.
				6051	*
				6052	* If "copy" is TRUE really copy into "dest".
				6053	* If "copy" is FALSE nothing is copied, this is just to find out the length
				6054	* of the result.
				6055	*
				6056	* If "backslash" is TRUE, a backslash will be removed later, need to double
				6057	* them to keep them, and insert a backslash before a CR to avoid it being
				6058	* replaced with a line break later.
				6059	*
				6060	* Note: The matched text must not change between the call of
				6061	* vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
				6062	* references invalid!
				6063	*
				6064	* Returns the size of the replacement, including terminating NUL.
				6065	*/
				6066	int
				6067	vim_regsub(rmp, source, dest, copy, magic, backslash)
				6068	regmatch_T *rmp;
				6069	char_u *source;
				6070	char_u *dest;
				6071	int copy;
				6072	int magic;
				6073	int backslash;
				6074	{
				6075	reg_match = rmp;
				6076	reg_mmatch = NULL;
				6077	reg_maxline = 0;
				6078	return vim_regsub_both(source, dest, copy, magic, backslash);
				6079	}
				6080	#endif
				6081
				6082	int
				6083	vim_regsub_multi(rmp, lnum, source, dest, copy, magic, backslash)
				6084	regmmatch_T *rmp;
				6085	linenr_T lnum;
				6086	char_u *source;
				6087	char_u *dest;
				6088	int copy;
				6089	int magic;
				6090	int backslash;
				6091	{
				6092	reg_match = NULL;
				6093	reg_mmatch = rmp;
				6094	reg_buf = curbuf; /* always works on the current buffer! */
				6095	reg_firstlnum = lnum;
				6096	reg_maxline = curbuf->b_ml.ml_line_count - lnum;
				6097	return vim_regsub_both(source, dest, copy, magic, backslash);
				6098	}
				6099
				6100	static int
				6101	vim_regsub_both(source, dest, copy, magic, backslash)
				6102	char_u *source;
				6103	char_u *dest;
				6104	int copy;
				6105	int magic;
				6106	int backslash;
				6107	{
				6108	char_u *src;
				6109	char_u *dst;
				6110	char_u *s;
				6111	int c;
				6112	int no = -1;
				6113	fptr func = (fptr)NULL;
				6114	linenr_T clnum = 0; /* init for GCC */
				6115	int len = 0; /* init for GCC */
				6116	#ifdef FEAT_EVAL
				6117	static char_u *eval_result = NULL;
				6118	#endif
				6119	#ifdef FEAT_MBYTE
				6120	int l;
				6121	#endif
				6122
				6123
				6124	/* Be paranoid... */
				6125	if (source == NULL \|\| dest == NULL)
				6126	{
				6127	EMSG(_(e_null));
				6128	return 0;
				6129	}
				6130	if (prog_magic_wrong())
				6131	return 0;
				6132	src = source;
				6133	dst = dest;
				6134
				6135	/*
				6136	* When the substitute part starts with "\=" evaluate it as an expression.
				6137	*/
				6138	if (source[0] == '\\' && source[1] == '='
				6139	#ifdef FEAT_EVAL
				6140	&& !can_f_submatch /* can't do this recursively */
				6141	#endif
				6142	)
				6143	{
				6144	#ifdef FEAT_EVAL
				6145	/* To make sure that the length doesn't change between checking the
				6146	* length and copying the string, and to speed up things, the
				6147	* resulting string is saved from the call with "copy" == FALSE to the
				6148	* call with "copy" == TRUE. */
				6149	if (copy)
				6150	{
				6151	if (eval_result != NULL)
				6152	{
				6153	STRCPY(dest, eval_result);
				6154	dst += STRLEN(eval_result);
				6155	vim_free(eval_result);
				6156	eval_result = NULL;
				6157	}
				6158	}
				6159	else
				6160	{
				6161	linenr_T save_reg_maxline;
				6162	win_T *save_reg_win;
				6163	int save_ireg_ic;
				6164
				6165	vim_free(eval_result);
				6166
				6167	/* The expression may contain substitute(), which calls us
				6168	* recursively. Make sure submatch() gets the text from the first
				6169	* level. Don't need to save "reg_buf", because
				6170	* vim_regexec_multi() can't be called recursively. */
				6171	submatch_match = reg_match;
				6172	submatch_mmatch = reg_mmatch;
				6173	save_reg_maxline = reg_maxline;
				6174	save_reg_win = reg_win;
				6175	save_ireg_ic = ireg_ic;
				6176	can_f_submatch = TRUE;
				6177
				6178	eval_result = eval_to_string(source + 2, NULL);
				6179	if (eval_result != NULL)
				6180	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	6181	for (s = eval_result; *s != NUL; mb_ptr_adv(s))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	6182	{
				6183	/* Change NL to CR, so that it becomes a line break.
				6184	* Skip over a backslashed character. */
				6185	if (*s == NL)
				6186	*s = CAR;
				6187	else if (*s == '\\' && s[1] != NUL)
				6188	++s;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	6189	}
				6190
				6191	dst += STRLEN(eval_result);
				6192	}
				6193
				6194	reg_match = submatch_match;
				6195	reg_mmatch = submatch_mmatch;
				6196	reg_maxline = save_reg_maxline;
				6197	reg_win = save_reg_win;
				6198	ireg_ic = save_ireg_ic;
				6199	can_f_submatch = FALSE;
				6200	}
				6201	#endif
				6202	}
				6203	else
				6204	while ((c = *src++) != NUL)
				6205	{
				6206	if (c == '&' && magic)
				6207	no = 0;
				6208	else if (c == '\\' && *src != NUL)
				6209	{
				6210	if (*src == '&' && !magic)
				6211	{
				6212	++src;
				6213	no = 0;
				6214	}
				6215	else if ('0' <= src && src <= '9')
				6216	{
				6217	no = *src++ - '0';
				6218	}
				6219	else if (vim_strchr((char_u )"uUlLeE", src))
				6220	{
				6221	switch (*src++)
				6222	{
				6223	case 'u': func = (fptr)do_upper;
				6224	continue;
				6225	case 'U': func = (fptr)do_Upper;
				6226	continue;
				6227	case 'l': func = (fptr)do_lower;
				6228	continue;
				6229	case 'L': func = (fptr)do_Lower;
				6230	continue;
				6231	case 'e':
				6232	case 'E': func = (fptr)NULL;
				6233	continue;
				6234	}
				6235	}
				6236	}
				6237	if (no < 0) /* Ordinary character. */
				6238	{
				6239	if (c == '\\' && *src != NUL)
				6240	{
				6241	/* Check for abbreviations -- webb */
				6242	switch (*src)
				6243	{
				6244	case 'r': c = CAR; ++src; break;
				6245	case 'n': c = NL; ++src; break;
				6246	case 't': c = TAB; ++src; break;
				6247	/* Oh no! \e already has meaning in subst pat :-( */
				6248	/* case 'e': c = ESC; ++src; break; */
				6249	case 'b': c = Ctrl_H; ++src; break;
				6250
				6251	/* If "backslash" is TRUE the backslash will be removed
				6252	* later. Used to insert a literal CR. */
				6253	default: if (backslash)
				6254	{
				6255	if (copy)
				6256	*dst = '\\';
				6257	++dst;
				6258	}
				6259	c = *src++;
				6260	}
				6261	}
				6262
				6263	/* Write to buffer, if copy is set. */
				6264	#ifdef FEAT_MBYTE
				6265	if (has_mbyte && (l = (*mb_ptr2len_check)(src - 1)) > 1)
				6266	{
				6267	/* TODO: should use "func" here. */
				6268	if (copy)
				6269	mch_memmove(dst, src - 1, l);
				6270	dst += l - 1;
				6271	src += l - 1;
				6272	}
				6273	else
				6274	{
				6275	#endif
				6276	if (copy)
				6277	{
				6278	if (func == (fptr)NULL) /* just copy */
				6279	*dst = c;
				6280	else /* change case */
				6281	func = (fptr)(func(dst, c));
				6282	/* Turbo C complains without the typecast */
				6283	}
				6284	#ifdef FEAT_MBYTE
				6285	}
				6286	#endif
				6287	dst++;
				6288	}
				6289	else
				6290	{
				6291	if (REG_MULTI)
				6292	{
				6293	clnum = reg_mmatch->startpos[no].lnum;
				6294	if (clnum < 0 \|\| reg_mmatch->endpos[no].lnum < 0)
				6295	s = NULL;
				6296	else
				6297	{
				6298	s = reg_getline(clnum) + reg_mmatch->startpos[no].col;
				6299	if (reg_mmatch->endpos[no].lnum == clnum)
				6300	len = reg_mmatch->endpos[no].col
				6301	- reg_mmatch->startpos[no].col;
				6302	else
				6303	len = (int)STRLEN(s);
				6304	}
				6305	}
				6306	else
				6307	{
				6308	s = reg_match->startp[no];
				6309	if (reg_match->endp[no] == NULL)
				6310	s = NULL;
				6311	else
				6312	len = (int)(reg_match->endp[no] - s);
				6313	}
				6314	if (s != NULL)
				6315	{
				6316	for (;;)
				6317	{
				6318	if (len == 0)
				6319	{
				6320	if (REG_MULTI)
				6321	{
				6322	if (reg_mmatch->endpos[no].lnum == clnum)
				6323	break;
				6324	if (copy)
				6325	*dst = CAR;
				6326	++dst;
				6327	s = reg_getline(++clnum);
				6328	if (reg_mmatch->endpos[no].lnum == clnum)
				6329	len = reg_mmatch->endpos[no].col;
				6330	else
				6331	len = (int)STRLEN(s);
				6332	}
				6333	else
				6334	break;
				6335	}
				6336	else if (s == NUL) / we hit NUL. */
				6337	{
				6338	if (copy)
				6339	EMSG(_(e_re_damg));
				6340	goto exit;
				6341	}
				6342	else
				6343	{
				6344	if (backslash && (s == CAR \|\| s == '\\'))
				6345	{
				6346	/*
				6347	* Insert a backslash in front of a CR, otherwise
				6348	* it will be replaced by a line break.
				6349	* Number of backslashes will be halved later,
				6350	* double them here.
				6351	*/
				6352	if (copy)
				6353	{
				6354	dst[0] = '\\';
				6355	dst[1] = *s;
				6356	}
				6357	dst += 2;
				6358	}
				6359	#ifdef FEAT_MBYTE
				6360	else if (has_mbyte && (l = (*mb_ptr2len_check)(s)) > 1)
				6361	{
				6362	/* TODO: should use "func" here. */
				6363	if (copy)
				6364	mch_memmove(dst, s, l);
				6365	dst += l;
				6366	s += l - 1;
				6367	len -= l - 1;
				6368	}
				6369	#endif
				6370	else
				6371	{
				6372	if (copy)
				6373	{
				6374	if (func == (fptr)NULL) /* just copy */
				6375	dst = s;
				6376	else /* change case */
				6377	func = (fptr)(func(dst, *s));
				6378	/* Turbo C complains without the typecast */
				6379	}
				6380	++dst;
				6381	}
				6382	++s;
				6383	--len;
				6384	}
				6385	}
				6386	}
				6387	no = -1;
				6388	}
				6389	}
				6390	if (copy)
				6391	*dst = NUL;
				6392
				6393	exit:
				6394	return (int)((dst - dest) + 1);
				6395	}
				6396
				6397	#ifdef FEAT_EVAL
				6398	/*
				6399	* Used for the submatch() function: get the string from tne n'th submatch in
				6400	* allocated memory.
				6401	* Returns NULL when not in a ":s" command and for a non-existing submatch.
				6402	*/
				6403	char_u *
				6404	reg_submatch(no)
				6405	int no;
				6406	{
				6407	char_u *retval = NULL;
				6408	char_u *s;
				6409	int len;
				6410	int round;
				6411	linenr_T lnum;
				6412
				6413	if (!can_f_submatch)
				6414	return NULL;
				6415
				6416	if (submatch_match == NULL)
				6417	{
				6418	/*
				6419	* First round: compute the length and allocate memory.
				6420	* Second round: copy the text.
				6421	*/
				6422	for (round = 1; round <= 2; ++round)
				6423	{
				6424	lnum = submatch_mmatch->startpos[no].lnum;
				6425	if (lnum < 0 \|\| submatch_mmatch->endpos[no].lnum < 0)
				6426	return NULL;
				6427
				6428	s = reg_getline(lnum) + submatch_mmatch->startpos[no].col;
				6429	if (s == NULL) /* anti-crash check, cannot happen? */
				6430	break;
				6431	if (submatch_mmatch->endpos[no].lnum == lnum)
				6432	{
				6433	/* Within one line: take form start to end col. */
				6434	len = submatch_mmatch->endpos[no].col
				6435	- submatch_mmatch->startpos[no].col;
				6436	if (round == 2)
				6437	{
				6438	STRNCPY(retval, s, len);
				6439	retval[len] = NUL;
				6440	}
				6441	++len;
				6442	}
				6443	else
				6444	{
				6445	/* Multiple lines: take start line from start col, middle
				6446	* lines completely and end line up to end col. */
				6447	len = (int)STRLEN(s);
				6448	if (round == 2)
				6449	{
				6450	STRCPY(retval, s);
				6451	retval[len] = '\n';
				6452	}
				6453	++len;
				6454	++lnum;
				6455	while (lnum < submatch_mmatch->endpos[no].lnum)
				6456	{
				6457	s = reg_getline(lnum++);
				6458	if (round == 2)
				6459	STRCPY(retval + len, s);
				6460	len += (int)STRLEN(s);
				6461	if (round == 2)
				6462	retval[len] = '\n';
				6463	++len;
				6464	}
				6465	if (round == 2)
				6466	STRNCPY(retval + len, reg_getline(lnum),
				6467	submatch_mmatch->endpos[no].col);
				6468	len += submatch_mmatch->endpos[no].col;
				6469	if (round == 2)
				6470	retval[len] = NUL;
				6471	++len;
				6472	}
				6473
				6474	if (round == 1)
				6475	{
				6476	retval = lalloc((long_u)len, TRUE);
				6477	if (s == NULL)
				6478	return NULL;
				6479	}
				6480	}
				6481	}
				6482	else
				6483	{
				6484	if (submatch_match->endp[no] == NULL)
				6485	retval = NULL;
				6486	else
				6487	{
				6488	s = submatch_match->startp[no];
				6489	retval = vim_strnsave(s, (int)(submatch_match->endp[no] - s));
				6490	}
				6491	}
				6492
				6493	return retval;
				6494	}
				6495	#endif