Blame - src/regexp.c - android_external_vim

blob: 23e31af22611851e36d90f03f89b1d93e5f92484 [file] [log] [blame]

Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1	/* vi:set ts=8 sts=4 sw=4:
				2	*
				3	* Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
				4	*
				5	* NOTICE:
				6	*
				7	* This is NOT the original regular expression code as written by Henry
				8	* Spencer. This code has been modified specifically for use with the VIM
				9	* editor, and should not be used separately from Vim. If you want a good
				10	* regular expression library, get the original code. The copyright notice
				11	* that follows is from the original.
				12	*
				13	* END NOTICE
				14	*
				15	* Copyright (c) 1986 by University of Toronto.
				16	* Written by Henry Spencer. Not derived from licensed software.
				17	*
				18	* Permission is granted to anyone to use this software for any
				19	* purpose on any computer system, and to redistribute it freely,
				20	* subject to the following restrictions:
				21	*
				22	* 1. The author is not responsible for the consequences of use of
				23	* this software, no matter how awful, even if they arise
				24	* from defects in it.
				25	*
				26	* 2. The origin of this software must not be misrepresented, either
				27	* by explicit claim or by omission.
				28	*
				29	* 3. Altered versions must be plainly marked as such, and must not
				30	* be misrepresented as being the original software.
				31	*
				32	* Beware that some of this code is subtly aware of the way operator
				33	* precedence is structured in regular expressions. Serious changes in
				34	* regular-expression syntax might require a total rethink.
				35	*
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	36	* Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
				37	* Webb, Ciaran McCreesh and Bram Moolenaar.
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	38	* Named character class support added by Walter Briscoe (1998 Jul 01)
				39	*/
				40
				41	#include "vim.h"
				42
				43	#undef DEBUG
				44
				45	/*
				46	* The "internal use only" fields in regexp.h are present to pass info from
				47	* compile to execute that permits the execute phase to run lots faster on
				48	* simple cases. They are:
				49	*
				50	* regstart char that must begin a match; NUL if none obvious; Can be a
				51	* multi-byte character.
				52	* reganch is the match anchored (at beginning-of-line only)?
				53	* regmust string (pointer into program) that match must include, or NULL
				54	* regmlen length of regmust string
				55	* regflags RF_ values or'ed together
				56	*
				57	* Regstart and reganch permit very fast decisions on suitable starting points
				58	* for a match, cutting down the work a lot. Regmust permits fast rejection
				59	* of lines that cannot possibly match. The regmust tests are costly enough
				60	* that vim_regcomp() supplies a regmust only if the r.e. contains something
				61	* potentially expensive (at present, the only such thing detected is * or +
				62	* at the start of the r.e., which can involve a lot of backup). Regmlen is
				63	* supplied because the test in vim_regexec() needs it and vim_regcomp() is
				64	* computing it anyway.
				65	*/
				66
				67	/*
				68	* Structure for regexp "program". This is essentially a linear encoding
				69	* of a nondeterministic finite-state machine (aka syntax charts or
				70	* "railroad normal form" in parsing technology). Each node is an opcode
				71	* plus a "next" pointer, possibly plus an operand. "Next" pointers of
				72	* all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
				73	* pointer with a BRANCH on both ends of it is connecting two alternatives.
				74	* (Here we have one of the subtle syntax dependencies: an individual BRANCH
				75	* (as opposed to a collection of them) is never concatenated with anything
				76	* because of operator precedence). The "next" pointer of a BRACES_COMPLEX
				77	* node points to the node after the stuff to be repeated. The operand of some
				78	* types of node is a literal string; for others, it is a node leading into a
				79	* sub-FSM. In particular, the operand of a BRANCH node is the first node of
				80	* the branch. (NB this is not a tree structure: the tail of the branch
				81	* connects to the thing following the set of BRANCHes.)
				82	*
				83	* pattern is coded like:
				84	*
				85	* +-----------------+
				86	* \| V
				87	* <aa>\\|<bb> BRANCH <aa> BRANCH <bb> --> END
				88	* \| ^ \| ^
				89	* +------+ +----------+
				90	*
				91	*
				92	* +------------------+
				93	* V \|
				94	* <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
				95	* \| \| ^ ^
				96	* \| +---------------+ \|
				97	* +---------------------------------------------+
				98	*
				99	*
				100	* +-------------------------+
				101	* V \|
				102	* <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
				103	* \| \| ^
				104	* \| +----------------+
				105	* +-----------------------------------------------+
				106	*
				107	*
				108	* <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END
				109	* \| \| ^ ^
				110	* \| +----------------+ \|
				111	* +--------------------------------+
				112	*
				113	* +---------+
				114	* \| V
				115	* \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END
				116	* \| \| \| \| ^ ^
				117	* \| \| \| +-----+ \|
				118	* \| \| +----------------+ \|
				119	* \| +---------------------------+ \|
				120	* +------------------------------------------------------+
				121	*
				122	* They all start with a BRANCH for "\\|" alternaties, even when there is only
				123	* one alternative.
				124	*/
				125
				126	/*
				127	* The opcodes are:
				128	*/
				129
				130	/* definition number opnd? meaning */
				131	#define END 0 /* End of program or NOMATCH operand. */
				132	#define BOL 1 /* Match "" at beginning of line. */
				133	#define EOL 2 /* Match "" at end of line. */
				134	#define BRANCH 3 /* node Match this alternative, or the
				135	* next... */
				136	#define BACK 4 /* Match "", "next" ptr points backward. */
				137	#define EXACTLY 5 /* str Match this string. */
				138	#define NOTHING 6 /* Match empty string. */
				139	#define STAR 7 /* node Match this (simple) thing 0 or more
				140	* times. */
				141	#define PLUS 8 /* node Match this (simple) thing 1 or more
				142	* times. */
				143	#define MATCH 9 /* node match the operand zero-width */
				144	#define NOMATCH 10 /* node check for no match with operand */
				145	#define BEHIND 11 /* node look behind for a match with operand */
				146	#define NOBEHIND 12 /* node look behind for no match with operand */
				147	#define SUBPAT 13 /* node match the operand here */
				148	#define BRACE_SIMPLE 14 /* node Match this (simple) thing between m and
				149	* n times (\{m,n\}). */
				150	#define BOW 15 /* Match "" after [^a-zA-Z0-9_] */
				151	#define EOW 16 /* Match "" at [^a-zA-Z0-9_] */
				152	#define BRACE_LIMITS 17 /* nr nr define the min & max for BRACE_SIMPLE
				153	* and BRACE_COMPLEX. */
				154	#define NEWL 18 /* Match line-break */
				155	#define BHPOS 19 /* End position for BEHIND or NOBEHIND */
				156
				157
				158	/* character classes: 20-48 normal, 50-78 include a line-break */
				159	#define ADD_NL 30
				160	#define FIRST_NL ANY + ADD_NL
				161	#define ANY 20 /* Match any one character. */
				162	#define ANYOF 21 /* str Match any character in this string. */
				163	#define ANYBUT 22 /* str Match any character not in this
				164	* string. */
				165	#define IDENT 23 /* Match identifier char */
				166	#define SIDENT 24 /* Match identifier char but no digit */
				167	#define KWORD 25 /* Match keyword char */
				168	#define SKWORD 26 /* Match word char but no digit */
				169	#define FNAME 27 /* Match file name char */
				170	#define SFNAME 28 /* Match file name char but no digit */
				171	#define PRINT 29 /* Match printable char */
				172	#define SPRINT 30 /* Match printable char but no digit */
				173	#define WHITE 31 /* Match whitespace char */
				174	#define NWHITE 32 /* Match non-whitespace char */
				175	#define DIGIT 33 /* Match digit char */
				176	#define NDIGIT 34 /* Match non-digit char */
				177	#define HEX 35 /* Match hex char */
				178	#define NHEX 36 /* Match non-hex char */
				179	#define OCTAL 37 /* Match octal char */
				180	#define NOCTAL 38 /* Match non-octal char */
				181	#define WORD 39 /* Match word char */
				182	#define NWORD 40 /* Match non-word char */
				183	#define HEAD 41 /* Match head char */
				184	#define NHEAD 42 /* Match non-head char */
				185	#define ALPHA 43 /* Match alpha char */
				186	#define NALPHA 44 /* Match non-alpha char */
				187	#define LOWER 45 /* Match lowercase char */
				188	#define NLOWER 46 /* Match non-lowercase char */
				189	#define UPPER 47 /* Match uppercase char */
				190	#define NUPPER 48 /* Match non-uppercase char */
				191	#define LAST_NL NUPPER + ADD_NL
				192	#define WITH_NL(op) ((op) >= FIRST_NL && (op) <= LAST_NL)
				193
				194	#define MOPEN 80 /* -89 Mark this point in input as start of
				195	* \( subexpr. MOPEN + 0 marks start of
				196	* match. */
				197	#define MCLOSE 90 /* -99 Analogous to MOPEN. MCLOSE + 0 marks
				198	* end of match. */
				199	#define BACKREF 100 /* -109 node Match same string again \1-\9 */
				200
				201	#ifdef FEAT_SYN_HL
				202	# define ZOPEN 110 /* -119 Mark this point in input as start of
				203	* \z( subexpr. */
				204	# define ZCLOSE 120 /* -129 Analogous to ZOPEN. */
				205	# define ZREF 130 /* -139 node Match external submatch \z1-\z9 */
				206	#endif
				207
				208	#define BRACE_COMPLEX 140 /* -149 node Match nodes between m & n times */
				209
				210	#define NOPEN 150 /* Mark this point in input as start of
				211	\%( subexpr. */
				212	#define NCLOSE 151 /* Analogous to NOPEN. */
				213
				214	#define MULTIBYTECODE 200 /* mbc Match one multi-byte character */
				215	#define RE_BOF 201 /* Match "" at beginning of file. */
				216	#define RE_EOF 202 /* Match "" at end of file. */
				217	#define CURSOR 203 /* Match location of cursor. */
				218
				219	#define RE_LNUM 204 /* nr cmp Match line number */
				220	#define RE_COL 205 /* nr cmp Match column number */
				221	#define RE_VCOL 206 /* nr cmp Match virtual column number */
				222
				223	/*
				224	* Magic characters have a special meaning, they don't match literally.
				225	* Magic characters are negative. This separates them from literal characters
				226	* (possibly multi-byte). Only ASCII characters can be Magic.
				227	*/
				228	#define Magic(x) ((int)(x) - 256)
				229	#define un_Magic(x) ((x) + 256)
				230	#define is_Magic(x) ((x) < 0)
				231
				232	static int no_Magic __ARGS((int x));
				233	static int toggle_Magic __ARGS((int x));
				234
				235	static int
				236	no_Magic(x)
				237	int x;
				238	{
				239	if (is_Magic(x))
				240	return un_Magic(x);
				241	return x;
				242	}
				243
				244	static int
				245	toggle_Magic(x)
				246	int x;
				247	{
				248	if (is_Magic(x))
				249	return un_Magic(x);
				250	return Magic(x);
				251	}
				252
				253	/*
				254	* The first byte of the regexp internal "program" is actually this magic
				255	* number; the start node begins in the second byte. It's used to catch the
				256	* most severe mutilation of the program by the caller.
				257	*/
				258
				259	#define REGMAGIC 0234
				260
				261	/*
				262	* Opcode notes:
				263	*
				264	* BRANCH The set of branches constituting a single choice are hooked
				265	* together with their "next" pointers, since precedence prevents
				266	* anything being concatenated to any individual branch. The
				267	* "next" pointer of the last BRANCH in a choice points to the
				268	* thing following the whole choice. This is also where the
				269	* final "next" pointer of each individual branch points; each
				270	* branch starts with the operand node of a BRANCH node.
				271	*
				272	* BACK Normal "next" pointers all implicitly point forward; BACK
				273	* exists to make loop structures possible.
				274	*
				275	* STAR,PLUS '=', and complex '*' and '+', are implemented as circular
				276	* BRANCH structures using BACK. Simple cases (one character
				277	* per match) are implemented with STAR and PLUS for speed
				278	* and to minimize recursive plunges.
				279	*
				280	* BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
				281	* node, and defines the min and max limits to be used for that
				282	* node.
				283	*
				284	* MOPEN,MCLOSE ...are numbered at compile time.
				285	* ZOPEN,ZCLOSE ...ditto
				286	*/
				287
				288	/*
				289	* A node is one char of opcode followed by two chars of "next" pointer.
				290	* "Next" pointers are stored as two 8-bit bytes, high order first. The
				291	* value is a positive offset from the opcode of the node containing it.
				292	* An operand, if any, simply follows the node. (Note that much of the
				293	* code generation knows about this implicit relationship.)
				294	*
				295	* Using two bytes for the "next" pointer is vast overkill for most things,
				296	* but allows patterns to get big without disasters.
				297	*/
				298	#define OP(p) ((int)*(p))
				299	#define NEXT(p) (((((p) + 1) & 0377) << 8) + (((p) + 2) & 0377))
				300	#define OPERAND(p) ((p) + 3)
				301	/* Obtain an operand that was stored as four bytes, MSB first. */
				302	#define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
				303	+ ((long)(p)[5] << 8) + (long)(p)[6])
				304	/* Obtain a second operand stored as four bytes. */
				305	#define OPERAND_MAX(p) OPERAND_MIN((p) + 4)
				306	/* Obtain a second single-byte operand stored after a four bytes operand. */
				307	#define OPERAND_CMP(p) (p)[7]
				308
				309	/*
				310	* Utility definitions.
				311	*/
				312	#define UCHARAT(p) ((int)(char_u )(p))
				313
				314	/* Used for an error (down from) vim_regcomp(): give the error message, set
				315	* rc_did_emsg and return NULL */
				316	#define EMSG_RET_NULL(m) { EMSG(m); rc_did_emsg = TRUE; return NULL; }
				317	#define EMSG_M_RET_NULL(m, c) { EMSG2(m, c ? "" : "\\"); rc_did_emsg = TRUE; return NULL; }
				318	#define EMSG_RET_FAIL(m) { EMSG(m); rc_did_emsg = TRUE; return FAIL; }
				319	#define EMSG_ONE_RET_NULL EMSG_M_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
				320
				321	#define MAX_LIMIT (32767L << 16L)
				322
				323	static int re_multi_type __ARGS((int));
				324	static int cstrncmp __ARGS((char_u s1, char_u s2, int *n));
				325	static char_u cstrchr __ARGS((char_u , int));
				326
				327	#ifdef DEBUG
				328	static void regdump __ARGS((char_u , regprog_T ));
				329	static char_u regprop __ARGS((char_u ));
				330	#endif
				331
				332	#define NOT_MULTI 0
				333	#define MULTI_ONE 1
				334	#define MULTI_MULT 2
				335	/*
				336	* Return NOT_MULTI if c is not a "multi" operator.
				337	* Return MULTI_ONE if c is a single "multi" operator.
				338	* Return MULTI_MULT if c is a multi "multi" operator.
				339	*/
				340	static int
				341	re_multi_type(c)
				342	int c;
				343	{
				344	if (c == Magic('@') \|\| c == Magic('=') \|\| c == Magic('?'))
				345	return MULTI_ONE;
				346	if (c == Magic('*') \|\| c == Magic('+') \|\| c == Magic('{'))
				347	return MULTI_MULT;
				348	return NOT_MULTI;
				349	}
				350
				351	/*
				352	* Flags to be passed up and down.
				353	*/
				354	#define HASWIDTH 0x1 /* Known never to match null string. */
				355	#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */
				356	#define SPSTART 0x4 /* Starts with * or +. */
				357	#define HASNL 0x8 /* Contains some \n. */
				358	#define HASLOOKBH 0x10 /* Contains "\@<=" or "\@<!". */
				359	#define WORST 0 /* Worst case. */
				360
				361	/*
				362	* When regcode is set to this value, code is not emitted and size is computed
				363	* instead.
				364	*/
				365	#define JUST_CALC_SIZE ((char_u *) -1)
				366
				367	static char_u *reg_prev_sub;
				368
				369	/*
				370	* REGEXP_INRANGE contains all characters which are always special in a []
				371	* range after '\'.
				372	* REGEXP_ABBR contains all characters which act as abbreviations after '\'.
				373	* These are:
				374	* \n - New line (NL).
				375	* \r - Carriage Return (CR).
				376	* \t - Tab (TAB).
				377	* \e - Escape (ESC).
				378	* \b - Backspace (Ctrl_H).
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	379	* \d - Character code in decimal, eg \d123
				380	* \o - Character code in octal, eg \o80
				381	* \x - Character code in hex, eg \x4a
				382	* \u - Multibyte character code, eg \u20ac
				383	* \U - Long multibyte character code, eg \U12345678
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	384	*/
				385	static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	386	static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	387
				388	static int backslash_trans __ARGS((int c));
				389	static int skip_class_name __ARGS((char_u **pp));
				390	static char_u skip_anyof __ARGS((char_u p));
				391	static void init_class_tab __ARGS((void));
				392
				393	/*
				394	* Translate '\x' to its control character, except "\n", which is Magic.
				395	*/
				396	static int
				397	backslash_trans(c)
				398	int c;
				399	{
				400	switch (c)
				401	{
				402	case 'r': return CAR;
				403	case 't': return TAB;
				404	case 'e': return ESC;
				405	case 'b': return BS;
				406	}
				407	return c;
				408	}
				409
				410	/*
				411	* Check for a character class name. "pp" points to the '['.
				412	* Returns one of the CLASS_ items. CLASS_NONE means that no item was
				413	* recognized. Otherwise "pp" is advanced to after the item.
				414	*/
				415	static int
				416	skip_class_name(pp)
				417	char_u **pp;
				418	{
				419	static const char *(class_names[]) =
				420	{
				421	"alnum:]",
				422	#define CLASS_ALNUM 0
				423	"alpha:]",
				424	#define CLASS_ALPHA 1
				425	"blank:]",
				426	#define CLASS_BLANK 2
				427	"cntrl:]",
				428	#define CLASS_CNTRL 3
				429	"digit:]",
				430	#define CLASS_DIGIT 4
				431	"graph:]",
				432	#define CLASS_GRAPH 5
				433	"lower:]",
				434	#define CLASS_LOWER 6
				435	"print:]",
				436	#define CLASS_PRINT 7
				437	"punct:]",
				438	#define CLASS_PUNCT 8
				439	"space:]",
				440	#define CLASS_SPACE 9
				441	"upper:]",
				442	#define CLASS_UPPER 10
				443	"xdigit:]",
				444	#define CLASS_XDIGIT 11
				445	"tab:]",
				446	#define CLASS_TAB 12
				447	"return:]",
				448	#define CLASS_RETURN 13
				449	"backspace:]",
				450	#define CLASS_BACKSPACE 14
				451	"escape:]",
				452	#define CLASS_ESCAPE 15
				453	};
				454	#define CLASS_NONE 99
				455	int i;
				456
				457	if ((*pp)[1] == ':')
				458	{
				459	for (i = 0; i < sizeof(class_names) / sizeof(*class_names); ++i)
				460	if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
				461	{
				462	*pp += STRLEN(class_names[i]) + 2;
				463	return i;
				464	}
				465	}
				466	return CLASS_NONE;
				467	}
				468
				469	/*
				470	* Skip over a "[]" range.
				471	* "p" must point to the character after the '['.
				472	* The returned pointer is on the matching ']', or the terminating NUL.
				473	*/
				474	static char_u *
				475	skip_anyof(p)
				476	char_u *p;
				477	{
				478	int cpo_lit; /* 'cpoptions' contains 'l' flag */
				479	#ifdef FEAT_MBYTE
				480	int l;
				481	#endif
				482
				483	cpo_lit = (!reg_syn && vim_strchr(p_cpo, CPO_LITERAL) != NULL);
				484
				485	if (p == '^') / Complement of range. */
				486	++p;
				487	if (p == ']' \|\| p == '-')
				488	++p;
				489	while (p != NUL && p != ']')
				490	{
				491	#ifdef FEAT_MBYTE
				492	if (has_mbyte && (l = (*mb_ptr2len_check)(p)) > 1)
				493	p += l;
				494	else
				495	#endif
				496	if (*p == '-')
				497	{
				498	++p;
				499	if (p != ']' && p != NUL)
				500	{
				501	#ifdef FEAT_MBYTE
				502	if (has_mbyte)
				503	p += (*mb_ptr2len_check)(p);
				504	else
				505	#endif
				506	++p;
				507	}
				508	}
				509	else if (*p == '\\'
				510	&& (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
				511	\|\| (!cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
				512	p += 2;
				513	else if (*p == '[')
				514	{
				515	if (skip_class_name(&p) == CLASS_NONE)
				516	++p; /* It was not a class name */
				517	}
				518	else
				519	++p;
				520	}
				521
				522	return p;
				523	}
				524
				525	/*
				526	* Specific version of character class functions.
				527	* Using a table to keep this fast.
				528	*/
				529	static short class_tab[256];
				530
				531	#define RI_DIGIT 0x01
				532	#define RI_HEX 0x02
				533	#define RI_OCTAL 0x04
				534	#define RI_WORD 0x08
				535	#define RI_HEAD 0x10
				536	#define RI_ALPHA 0x20
				537	#define RI_LOWER 0x40
				538	#define RI_UPPER 0x80
				539	#define RI_WHITE 0x100
				540
				541	static void
				542	init_class_tab()
				543	{
				544	int i;
				545	static int done = FALSE;
				546
				547	if (done)
				548	return;
				549
				550	for (i = 0; i < 256; ++i)
				551	{
				552	if (i >= '0' && i <= '7')
				553	class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
				554	else if (i >= '8' && i <= '9')
				555	class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
				556	else if (i >= 'a' && i <= 'f')
				557	class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
				558	#ifdef EBCDIC
				559	else if ((i >= 'g' && i <= 'i') \|\| (i >= 'j' && i <= 'r')
				560	\|\| (i >= 's' && i <= 'z'))
				561	#else
				562	else if (i >= 'g' && i <= 'z')
				563	#endif
				564	class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
				565	else if (i >= 'A' && i <= 'F')
				566	class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
				567	#ifdef EBCDIC
				568	else if ((i >= 'G' && i <= 'I') \|\| ( i >= 'J' && i <= 'R')
				569	\|\| (i >= 'S' && i <= 'Z'))
				570	#else
				571	else if (i >= 'G' && i <= 'Z')
				572	#endif
				573	class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
				574	else if (i == '_')
				575	class_tab[i] = RI_WORD + RI_HEAD;
				576	else
				577	class_tab[i] = 0;
				578	}
				579	class_tab[' '] \|= RI_WHITE;
				580	class_tab['\t'] \|= RI_WHITE;
				581	done = TRUE;
				582	}
				583
				584	#ifdef FEAT_MBYTE
				585	# define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT))
				586	# define ri_hex(c) (c < 0x100 && (class_tab[c] & RI_HEX))
				587	# define ri_octal(c) (c < 0x100 && (class_tab[c] & RI_OCTAL))
				588	# define ri_word(c) (c < 0x100 && (class_tab[c] & RI_WORD))
				589	# define ri_head(c) (c < 0x100 && (class_tab[c] & RI_HEAD))
				590	# define ri_alpha(c) (c < 0x100 && (class_tab[c] & RI_ALPHA))
				591	# define ri_lower(c) (c < 0x100 && (class_tab[c] & RI_LOWER))
				592	# define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER))
				593	# define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE))
				594	#else
				595	# define ri_digit(c) (class_tab[c] & RI_DIGIT)
				596	# define ri_hex(c) (class_tab[c] & RI_HEX)
				597	# define ri_octal(c) (class_tab[c] & RI_OCTAL)
				598	# define ri_word(c) (class_tab[c] & RI_WORD)
				599	# define ri_head(c) (class_tab[c] & RI_HEAD)
				600	# define ri_alpha(c) (class_tab[c] & RI_ALPHA)
				601	# define ri_lower(c) (class_tab[c] & RI_LOWER)
				602	# define ri_upper(c) (class_tab[c] & RI_UPPER)
				603	# define ri_white(c) (class_tab[c] & RI_WHITE)
				604	#endif
				605
				606	/* flags for regflags */
				607	#define RF_ICASE 1 /* ignore case */
				608	#define RF_NOICASE 2 /* don't ignore case */
				609	#define RF_HASNL 4 /* can match a NL */
				610	#define RF_ICOMBINE 8 /* ignore combining characters */
				611	#define RF_LOOKBH 16 /* uses "\@<=" or "\@<!" */
				612
				613	/*
				614	* Global work variables for vim_regcomp().
				615	*/
				616
				617	static char_u regparse; / Input-scan pointer. */
				618	static int prevchr_len; /* byte length of previous char */
				619	static int num_complex_braces; /* Complex \{...} count */
				620	static int regnpar; /* () count. */
				621	#ifdef FEAT_SYN_HL
				622	static int regnzpar; /* \z() count. */
				623	static int re_has_z; /* \z item detected */
				624	#endif
				625	static char_u regcode; / Code-emit pointer, or JUST_CALC_SIZE */
				626	static long regsize; /* Code size. */
				627	static char_u had_endbrace[NSUBEXP]; /* flags, TRUE if end of () found */
				628	static unsigned regflags; /* RF_ flags for prog */
				629	static long brace_min[10]; /* Minimums for complex brace repeats */
				630	static long brace_max[10]; /* Maximums for complex brace repeats */
				631	static int brace_count[10]; /* Current counts for complex brace repeats */
				632	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				633	static int had_eol; /* TRUE when EOL found by vim_regcomp() */
				634	#endif
				635	static int one_exactly = FALSE; /* only do one char for EXACTLY */
				636
				637	static int reg_magic; /* magicness of the pattern: */
				638	#define MAGIC_NONE 1 /* "\V" very unmagic */
				639	#define MAGIC_OFF 2 /* "\M" or 'magic' off */
				640	#define MAGIC_ON 3 /* "\m" or 'magic' */
				641	#define MAGIC_ALL 4 /* "\v" very magic */
				642
				643	static int reg_string; /* matching with a string instead of a buffer
				644	line */
				645
				646	/*
				647	* META contains all characters that may be magic, except '^' and '$'.
				648	*/
				649
				650	#ifdef EBCDIC
				651	static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{\|~";
				652	#else
				653	/* META[] is used often enough to justify turning it into a table. */
				654	static char_u META_flags[] = {
				655	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				656	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				657	/* % & ( ) * + . */
				658	0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
				659	/* 1 2 3 4 5 6 7 8 9 < = > ? */
				660	0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
				661	/* @ A C D F H I K L M O */
				662	1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
				663	/* P S U V W X Z [ _ */
				664	1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
				665	/* a c d f h i k l m n o */
				666	0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
				667	/* p s u v w x z { \| ~ */
				668	1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
				669	};
				670	#endif
				671
				672	static int curchr;
				673
				674	/* arguments for reg() */
				675	#define REG_NOPAREN 0 /* toplevel reg() */
				676	#define REG_PAREN 1 /* */
				677	#define REG_ZPAREN 2 /* \z(\) */
				678	#define REG_NPAREN 3 /* \%(\) */
				679
				680	/*
				681	* Forward declarations for vim_regcomp()'s friends.
				682	*/
				683	static void initchr __ARGS((char_u *));
				684	static int getchr __ARGS((void));
				685	static void skipchr_keepstart __ARGS((void));
				686	static int peekchr __ARGS((void));
				687	static void skipchr __ARGS((void));
				688	static void ungetchr __ARGS((void));
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	689	static int gethexchrs __ARGS((int maxinputlen));
				690	static int getoctchrs __ARGS((void));
				691	static int getdecchrs __ARGS((void));
				692	static int coll_get_char __ARGS((void));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	693	static void regcomp_start __ARGS((char_u *expr, int flags));
				694	static char_u reg __ARGS((int, int ));
				695	static char_u regbranch __ARGS((int flagp));
				696	static char_u regconcat __ARGS((int flagp));
				697	static char_u regpiece __ARGS((int ));
				698	static char_u regatom __ARGS((int ));
				699	static char_u *regnode __ARGS((int));
				700	static int prog_magic_wrong __ARGS((void));
				701	static char_u regnext __ARGS((char_u ));
				702	static void regc __ARGS((int b));
				703	#ifdef FEAT_MBYTE
				704	static void regmbc __ARGS((int c));
				705	#endif
				706	static void reginsert __ARGS((int, char_u *));
				707	static void reginsert_limits __ARGS((int, long, long, char_u *));
				708	static char_u re_put_long __ARGS((char_u pr, long_u val));
				709	static int read_limits __ARGS((long , long ));
				710	static void regtail __ARGS((char_u , char_u ));
				711	static void regoptail __ARGS((char_u , char_u ));
				712
				713	/*
				714	* Return TRUE if compiled regular expression "prog" can match a line break.
				715	*/
				716	int
				717	re_multiline(prog)
				718	regprog_T *prog;
				719	{
				720	return (prog->regflags & RF_HASNL);
				721	}
				722
				723	/*
				724	* Return TRUE if compiled regular expression "prog" looks before the start
				725	* position (pattern contains "\@<=" or "\@<!").
				726	*/
				727	int
				728	re_lookbehind(prog)
				729	regprog_T *prog;
				730	{
				731	return (prog->regflags & RF_LOOKBH);
				732	}
				733
				734	/*
				735	* Skip past regular expression.
				736	* Stop at end of 'p' of where 'dirc' is found ('/', '?', etc).
				737	* Take care of characters with a backslash in front of it.
				738	* Skip strings inside [ and ].
				739	* When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
				740	* expression and change "\?" to "?". If "*newp" is not NULL the expression
				741	* is changed in-place.
				742	*/
				743	char_u *
				744	skip_regexp(startp, dirc, magic, newp)
				745	char_u *startp;
				746	int dirc;
				747	int magic;
				748	char_u **newp;
				749	{
				750	int mymagic;
				751	char_u *p = startp;
				752
				753	if (magic)
				754	mymagic = MAGIC_ON;
				755	else
				756	mymagic = MAGIC_OFF;
				757
				758	for (; p[0] != NUL; ++p)
				759	{
				760	if (p[0] == dirc) /* found end of regexp */
				761	break;
				762	if ((p[0] == '[' && mymagic >= MAGIC_ON)
				763	\|\| (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
				764	{
				765	p = skip_anyof(p + 1);
				766	if (p[0] == NUL)
				767	break;
				768	}
				769	else if (p[0] == '\\' && p[1] != NUL)
				770	{
				771	if (dirc == '?' && newp != NULL && p[1] == '?')
				772	{
				773	/* change "\?" to "?", make a copy first. */
				774	if (*newp == NULL)
				775	{
				776	*newp = vim_strsave(startp);
				777	if (*newp != NULL)
				778	p = *newp + (p - startp);
				779	}
				780	if (*newp != NULL)
				781	mch_memmove(p, p + 1, STRLEN(p));
				782	else
				783	++p;
				784	}
				785	else
				786	++p; /* skip next character */
				787	if (*p == 'v')
				788	mymagic = MAGIC_ALL;
				789	else if (*p == 'V')
				790	mymagic = MAGIC_NONE;
				791	}
				792	#ifdef FEAT_MBYTE
				793	else if (has_mbyte)
				794	p += (*mb_ptr2len_check)(p) - 1;
				795	#endif
				796	}
				797	return p;
				798	}
				799
				800	/*
				801	* vim_regcomp - compile a regular expression into internal code
				802	*
				803	* We can't allocate space until we know how big the compiled form will be,
				804	* but we can't compile it (and thus know how big it is) until we've got a
				805	* place to put the code. So we cheat: we compile it twice, once with code
				806	* generation turned off and size counting turned on, and once "for real".
				807	* This also means that we don't allocate space until we are sure that the
				808	* thing really will compile successfully, and we never have to move the
				809	* code and thus invalidate pointers into it. (Note that it has to be in
				810	* one piece because vim_free() must be able to free it all.)
				811	*
				812	* Whether upper/lower case is to be ignored is decided when executing the
				813	* program, it does not matter here.
				814	*
				815	* Beware that the optimization-preparation code in here knows about some
				816	* of the structure of the compiled regexp.
				817	* "re_flags": RE_MAGIC and/or RE_STRING.
				818	*/
				819	regprog_T *
				820	vim_regcomp(expr, re_flags)
				821	char_u *expr;
				822	int re_flags;
				823	{
				824	regprog_T *r;
				825	char_u *scan;
				826	char_u *longest;
				827	int len;
				828	int flags;
				829
				830	if (expr == NULL)
				831	EMSG_RET_NULL(_(e_null));
				832
				833	init_class_tab();
				834
				835	/*
				836	* First pass: determine size, legality.
				837	*/
				838	regcomp_start(expr, re_flags);
				839	regcode = JUST_CALC_SIZE;
				840	regc(REGMAGIC);
				841	if (reg(REG_NOPAREN, &flags) == NULL)
				842	return NULL;
				843
				844	/* Small enough for pointer-storage convention? */
				845	#ifdef SMALL_MALLOC /* 16 bit storage allocation */
				846	if (regsize >= 65536L - 256L)
				847	EMSG_RET_NULL(_("E339: Pattern too long"));
				848	#endif
				849
				850	/* Allocate space. */
				851	r = (regprog_T *)lalloc(sizeof(regprog_T) + regsize, TRUE);
				852	if (r == NULL)
				853	return NULL;
				854
				855	/*
				856	* Second pass: emit code.
				857	*/
				858	regcomp_start(expr, re_flags);
				859	regcode = r->program;
				860	regc(REGMAGIC);
				861	if (reg(REG_NOPAREN, &flags) == NULL)
				862	{
				863	vim_free(r);
				864	return NULL;
				865	}
				866
				867	/* Dig out information for optimizations. */
				868	r->regstart = NUL; /* Worst-case defaults. */
				869	r->reganch = 0;
				870	r->regmust = NULL;
				871	r->regmlen = 0;
				872	r->regflags = regflags;
				873	if (flags & HASNL)
				874	r->regflags \|= RF_HASNL;
				875	if (flags & HASLOOKBH)
				876	r->regflags \|= RF_LOOKBH;
				877	#ifdef FEAT_SYN_HL
				878	/* Remember whether this pattern has any \z specials in it. */
				879	r->reghasz = re_has_z;
				880	#endif
				881	scan = r->program + 1; /* First BRANCH. */
				882	if (OP(regnext(scan)) == END) /* Only one top-level choice. */
				883	{
				884	scan = OPERAND(scan);
				885
				886	/* Starting-point info. */
				887	if (OP(scan) == BOL \|\| OP(scan) == RE_BOF)
				888	{
				889	r->reganch++;
				890	scan = regnext(scan);
				891	}
				892
				893	if (OP(scan) == EXACTLY)
				894	{
				895	#ifdef FEAT_MBYTE
				896	if (has_mbyte)
				897	r->regstart = (*mb_ptr2char)(OPERAND(scan));
				898	else
				899	#endif
				900	r->regstart = *OPERAND(scan);
				901	}
				902	else if ((OP(scan) == BOW
				903	\|\| OP(scan) == EOW
				904	\|\| OP(scan) == NOTHING
				905	\|\| OP(scan) == MOPEN + 0 \|\| OP(scan) == NOPEN
				906	\|\| OP(scan) == MCLOSE + 0 \|\| OP(scan) == NCLOSE)
				907	&& OP(regnext(scan)) == EXACTLY)
				908	{
				909	#ifdef FEAT_MBYTE
				910	if (has_mbyte)
				911	r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
				912	else
				913	#endif
				914	r->regstart = *OPERAND(regnext(scan));
				915	}
				916
				917	/*
				918	* If there's something expensive in the r.e., find the longest
				919	* literal string that must appear and make it the regmust. Resolve
				920	* ties in favor of later strings, since the regstart check works
				921	* with the beginning of the r.e. and avoiding duplication
				922	* strengthens checking. Not a strong reason, but sufficient in the
				923	* absence of others.
				924	*/
				925	/*
				926	* When the r.e. starts with BOW, it is faster to look for a regmust
				927	* first. Used a lot for "#" and "*" commands. (Added by mool).
				928	*/
				929	if ((flags & SPSTART \|\| OP(scan) == BOW \|\| OP(scan) == EOW)
				930	&& !(flags & HASNL))
				931	{
				932	longest = NULL;
				933	len = 0;
				934	for (; scan != NULL; scan = regnext(scan))
				935	if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
				936	{
				937	longest = OPERAND(scan);
				938	len = (int)STRLEN(OPERAND(scan));
				939	}
				940	r->regmust = longest;
				941	r->regmlen = len;
				942	}
				943	}
				944	#ifdef DEBUG
				945	regdump(expr, r);
				946	#endif
				947	return r;
				948	}
				949
				950	/*
				951	* Setup to parse the regexp. Used once to get the length and once to do it.
				952	*/
				953	static void
				954	regcomp_start(expr, re_flags)
				955	char_u *expr;
				956	int re_flags; /* see vim_regcomp() */
				957	{
				958	initchr(expr);
				959	if (re_flags & RE_MAGIC)
				960	reg_magic = MAGIC_ON;
				961	else
				962	reg_magic = MAGIC_OFF;
				963	reg_string = (re_flags & RE_STRING);
				964
				965	num_complex_braces = 0;
				966	regnpar = 1;
				967	vim_memset(had_endbrace, 0, sizeof(had_endbrace));
				968	#ifdef FEAT_SYN_HL
				969	regnzpar = 1;
				970	re_has_z = 0;
				971	#endif
				972	regsize = 0L;
				973	regflags = 0;
				974	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				975	had_eol = FALSE;
				976	#endif
				977	}
				978
				979	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				980	/*
				981	* Check if during the previous call to vim_regcomp the EOL item "$" has been
				982	* found. This is messy, but it works fine.
				983	*/
				984	int
				985	vim_regcomp_had_eol()
				986	{
				987	return had_eol;
				988	}
				989	#endif
				990
				991	/*
				992	* reg - regular expression, i.e. main body or parenthesized thing
				993	*
				994	* Caller must absorb opening parenthesis.
				995	*
				996	* Combining parenthesis handling with the base level of regular expression
				997	* is a trifle forced, but the need to tie the tails of the branches to what
				998	* follows makes it hard to avoid.
				999	*/
				1000	static char_u *
				1001	reg(paren, flagp)
				1002	int paren; /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
				1003	int *flagp;
				1004	{
				1005	char_u *ret;
				1006	char_u *br;
				1007	char_u *ender;
				1008	int parno = 0;
				1009	int flags;
				1010
				1011	flagp = HASWIDTH; / Tentatively. */
				1012
				1013	#ifdef FEAT_SYN_HL
				1014	if (paren == REG_ZPAREN)
				1015	{
				1016	/* Make a ZOPEN node. */
				1017	if (regnzpar >= NSUBEXP)
				1018	EMSG_RET_NULL(_("E50: Too many \\z("));
				1019	parno = regnzpar;
				1020	regnzpar++;
				1021	ret = regnode(ZOPEN + parno);
				1022	}
				1023	else
				1024	#endif
				1025	if (paren == REG_PAREN)
				1026	{
				1027	/* Make a MOPEN node. */
				1028	if (regnpar >= NSUBEXP)
				1029	EMSG_M_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
				1030	parno = regnpar;
				1031	++regnpar;
				1032	ret = regnode(MOPEN + parno);
				1033	}
				1034	else if (paren == REG_NPAREN)
				1035	{
				1036	/* Make a NOPEN node. */
				1037	ret = regnode(NOPEN);
				1038	}
				1039	else
				1040	ret = NULL;
				1041
				1042	/* Pick up the branches, linking them together. */
				1043	br = regbranch(&flags);
				1044	if (br == NULL)
				1045	return NULL;
				1046	if (ret != NULL)
				1047	regtail(ret, br); /* [MZ]OPEN -> first. */
				1048	else
				1049	ret = br;
				1050	/* If one of the branches can be zero-width, the whole thing can.
				1051	* If one of the branches has * at start or matches a line-break, the
				1052	* whole thing can. */
				1053	if (!(flags & HASWIDTH))
				1054	*flagp &= ~HASWIDTH;
				1055	*flagp \|= flags & (SPSTART \| HASNL \| HASLOOKBH);
				1056	while (peekchr() == Magic('\|'))
				1057	{
				1058	skipchr();
				1059	br = regbranch(&flags);
				1060	if (br == NULL)
				1061	return NULL;
				1062	regtail(ret, br); /* BRANCH -> BRANCH. */
				1063	if (!(flags & HASWIDTH))
				1064	*flagp &= ~HASWIDTH;
				1065	*flagp \|= flags & (SPSTART \| HASNL \| HASLOOKBH);
				1066	}
				1067
				1068	/* Make a closing node, and hook it on the end. */
				1069	ender = regnode(
				1070	#ifdef FEAT_SYN_HL
				1071	paren == REG_ZPAREN ? ZCLOSE + parno :
				1072	#endif
				1073	paren == REG_PAREN ? MCLOSE + parno :
				1074	paren == REG_NPAREN ? NCLOSE : END);
				1075	regtail(ret, ender);
				1076
				1077	/* Hook the tails of the branches to the closing node. */
				1078	for (br = ret; br != NULL; br = regnext(br))
				1079	regoptail(br, ender);
				1080
				1081	/* Check for proper termination. */
				1082	if (paren != REG_NOPAREN && getchr() != Magic(')'))
				1083	{
				1084	#ifdef FEAT_SYN_HL
				1085	if (paren == REG_ZPAREN)
				1086	EMSG_RET_NULL(_("E52: Unmatched \\z("))
				1087	else
				1088	#endif
				1089	if (paren == REG_NPAREN)
				1090	EMSG_M_RET_NULL(_("E53: Unmatched %s%%("), reg_magic == MAGIC_ALL)
				1091	else
				1092	EMSG_M_RET_NULL(_("E54: Unmatched %s("), reg_magic == MAGIC_ALL)
				1093	}
				1094	else if (paren == REG_NOPAREN && peekchr() != NUL)
				1095	{
				1096	if (curchr == Magic(')'))
				1097	EMSG_M_RET_NULL(_("E55: Unmatched %s)"), reg_magic == MAGIC_ALL)
				1098	else
				1099	EMSG_RET_NULL(_(e_trailing)) /* "Can't happen". */
				1100	/* NOTREACHED */
				1101	}
				1102	/*
				1103	* Here we set the flag allowing back references to this set of
				1104	* parentheses.
				1105	*/
				1106	if (paren == REG_PAREN)
				1107	had_endbrace[parno] = TRUE; /* have seen the close paren */
				1108	return ret;
				1109	}
				1110
				1111	/*
				1112	* regbranch - one alternative of an \| operator
				1113	*
				1114	* Implements the & operator.
				1115	*/
				1116	static char_u *
				1117	regbranch(flagp)
				1118	int *flagp;
				1119	{
				1120	char_u *ret;
				1121	char_u *chain = NULL;
				1122	char_u *latest;
				1123	int flags;
				1124
				1125	flagp = WORST \| HASNL; / Tentatively. */
				1126
				1127	ret = regnode(BRANCH);
				1128	for (;;)
				1129	{
				1130	latest = regconcat(&flags);
				1131	if (latest == NULL)
				1132	return NULL;
				1133	/* If one of the branches has width, the whole thing has. If one of
				1134	* the branches anchors at start-of-line, the whole thing does.
				1135	* If one of the branches uses look-behind, the whole thing does. */
				1136	*flagp \|= flags & (HASWIDTH \| SPSTART \| HASLOOKBH);
				1137	/* If one of the branches doesn't match a line-break, the whole thing
				1138	* doesn't. */
				1139	*flagp &= ~HASNL \| (flags & HASNL);
				1140	if (chain != NULL)
				1141	regtail(chain, latest);
				1142	if (peekchr() != Magic('&'))
				1143	break;
				1144	skipchr();
				1145	regtail(latest, regnode(END)); /* operand ends */
				1146	reginsert(MATCH, latest);
				1147	chain = latest;
				1148	}
				1149
				1150	return ret;
				1151	}
				1152
				1153	/*
				1154	* regbranch - one alternative of an \| or & operator
				1155	*
				1156	* Implements the concatenation operator.
				1157	*/
				1158	static char_u *
				1159	regconcat(flagp)
				1160	int *flagp;
				1161	{
				1162	char_u *first = NULL;
				1163	char_u *chain = NULL;
				1164	char_u *latest;
				1165	int flags;
				1166	int cont = TRUE;
				1167
				1168	flagp = WORST; / Tentatively. */
				1169
				1170	while (cont)
				1171	{
				1172	switch (peekchr())
				1173	{
				1174	case NUL:
				1175	case Magic('\|'):
				1176	case Magic('&'):
				1177	case Magic(')'):
				1178	cont = FALSE;
				1179	break;
				1180	case Magic('Z'):
				1181	#ifdef FEAT_MBYTE
				1182	regflags \|= RF_ICOMBINE;
				1183	#endif
				1184	skipchr_keepstart();
				1185	break;
				1186	case Magic('c'):
				1187	regflags \|= RF_ICASE;
				1188	skipchr_keepstart();
				1189	break;
				1190	case Magic('C'):
				1191	regflags \|= RF_NOICASE;
				1192	skipchr_keepstart();
				1193	break;
				1194	case Magic('v'):
				1195	reg_magic = MAGIC_ALL;
				1196	skipchr_keepstart();
				1197	curchr = -1;
				1198	break;
				1199	case Magic('m'):
				1200	reg_magic = MAGIC_ON;
				1201	skipchr_keepstart();
				1202	curchr = -1;
				1203	break;
				1204	case Magic('M'):
				1205	reg_magic = MAGIC_OFF;
				1206	skipchr_keepstart();
				1207	curchr = -1;
				1208	break;
				1209	case Magic('V'):
				1210	reg_magic = MAGIC_NONE;
				1211	skipchr_keepstart();
				1212	curchr = -1;
				1213	break;
				1214	default:
				1215	latest = regpiece(&flags);
				1216	if (latest == NULL)
				1217	return NULL;
				1218	*flagp \|= flags & (HASWIDTH \| HASNL \| HASLOOKBH);
				1219	if (chain == NULL) /* First piece. */
				1220	*flagp \|= flags & SPSTART;
				1221	else
				1222	regtail(chain, latest);
				1223	chain = latest;
				1224	if (first == NULL)
				1225	first = latest;
				1226	break;
				1227	}
				1228	}
				1229	if (first == NULL) /* Loop ran zero times. */
				1230	first = regnode(NOTHING);
				1231	return first;
				1232	}
				1233
				1234	/*
				1235	* regpiece - something followed by possible [*+=]
				1236	*
				1237	* Note that the branching code sequences used for = and the general cases
				1238	* of * and + are somewhat optimized: they use the same NOTHING node as
				1239	* both the endmarker for their branch list and the body of the last branch.
				1240	* It might seem that this node could be dispensed with entirely, but the
				1241	* endmarker role is not redundant.
				1242	*/
				1243	static char_u *
				1244	regpiece(flagp)
				1245	int *flagp;
				1246	{
				1247	char_u *ret;
				1248	int op;
				1249	char_u *next;
				1250	int flags;
				1251	long minval;
				1252	long maxval;
				1253
				1254	ret = regatom(&flags);
				1255	if (ret == NULL)
				1256	return NULL;
				1257
				1258	op = peekchr();
				1259	if (re_multi_type(op) == NOT_MULTI)
				1260	{
				1261	*flagp = flags;
				1262	return ret;
				1263	}
				1264	if (!(flags & HASWIDTH) && re_multi_type(op) == MULTI_MULT)
				1265	{
				1266	if (op == Magic('*'))
				1267	EMSG_M_RET_NULL(_("E56: %s* operand could be empty"),
				1268	reg_magic >= MAGIC_ON);
				1269	if (op == Magic('+'))
				1270	EMSG_M_RET_NULL(_("E57: %s+ operand could be empty"),
				1271	reg_magic == MAGIC_ALL);
				1272	/* "\{}" is checked below, it's allowed when there is an upper limit */
				1273	}
				1274	/* default flags */
				1275	*flagp = (WORST \| SPSTART \| (flags & (HASNL \| HASLOOKBH)));
				1276
				1277	skipchr();
				1278	switch (op)
				1279	{
				1280	case Magic('*'):
				1281	if (flags & SIMPLE)
				1282	reginsert(STAR, ret);
				1283	else
				1284	{
				1285	/* Emit x* as (x&\|), where & means "self". */
				1286	reginsert(BRANCH, ret); /* Either x */
				1287	regoptail(ret, regnode(BACK)); /* and loop */
				1288	regoptail(ret, ret); /* back */
				1289	regtail(ret, regnode(BRANCH)); /* or */
				1290	regtail(ret, regnode(NOTHING)); /* null. */
				1291	}
				1292	break;
				1293
				1294	case Magic('+'):
				1295	if (flags & SIMPLE)
				1296	reginsert(PLUS, ret);
				1297	else
				1298	{
				1299	/* Emit x+ as x(&\|), where & means "self". */
				1300	next = regnode(BRANCH); /* Either */
				1301	regtail(ret, next);
				1302	regtail(regnode(BACK), ret); /* loop back */
				1303	regtail(next, regnode(BRANCH)); /* or */
				1304	regtail(ret, regnode(NOTHING)); /* null. */
				1305	}
				1306	*flagp = (WORST \| HASWIDTH \| (flags & (HASNL \| HASLOOKBH)));
				1307	break;
				1308
				1309	case Magic('@'):
				1310	{
				1311	int lop = END;
				1312
				1313	switch (no_Magic(getchr()))
				1314	{
				1315	case '=': lop = MATCH; break; /* \@= */
				1316	case '!': lop = NOMATCH; break; /* \@! */
				1317	case '>': lop = SUBPAT; break; /* \@> */
				1318	case '<': switch (no_Magic(getchr()))
				1319	{
				1320	case '=': lop = BEHIND; break; /* \@<= */
				1321	case '!': lop = NOBEHIND; break; /* \@<! */
				1322	}
				1323	}
				1324	if (lop == END)
				1325	EMSG_M_RET_NULL(_("E59: invalid character after %s@"),
				1326	reg_magic == MAGIC_ALL);
				1327	/* Look behind must match with behind_pos. */
				1328	if (lop == BEHIND \|\| lop == NOBEHIND)
				1329	{
				1330	regtail(ret, regnode(BHPOS));
				1331	*flagp \|= HASLOOKBH;
				1332	}
				1333	regtail(ret, regnode(END)); /* operand ends */
				1334	reginsert(lop, ret);
				1335	break;
				1336	}
				1337
				1338	case Magic('?'):
				1339	case Magic('='):
				1340	/* Emit x= as (x\|) */
				1341	reginsert(BRANCH, ret); /* Either x */
				1342	regtail(ret, regnode(BRANCH)); /* or */
				1343	next = regnode(NOTHING); /* null. */
				1344	regtail(ret, next);
				1345	regoptail(ret, next);
				1346	break;
				1347
				1348	case Magic('{'):
				1349	if (!read_limits(&minval, &maxval))
				1350	return NULL;
				1351	if (!(flags & HASWIDTH) && (maxval > minval
				1352	? maxval >= MAX_LIMIT : minval >= MAX_LIMIT))
				1353	EMSG_M_RET_NULL(_("E58: %s{ operand could be empty"),
				1354	reg_magic == MAGIC_ALL);
				1355	if (flags & SIMPLE)
				1356	{
				1357	reginsert(BRACE_SIMPLE, ret);
				1358	reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
				1359	}
				1360	else
				1361	{
				1362	if (num_complex_braces >= 10)
				1363	EMSG_M_RET_NULL(_("E60: Too many complex %s{...}s"),
				1364	reg_magic == MAGIC_ALL);
				1365	reginsert(BRACE_COMPLEX + num_complex_braces, ret);
				1366	regoptail(ret, regnode(BACK));
				1367	regoptail(ret, ret);
				1368	reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
				1369	++num_complex_braces;
				1370	}
				1371	if (minval > 0 && maxval > 0)
				1372	*flagp = (HASWIDTH \| (flags & (HASNL \| HASLOOKBH)));
				1373	break;
				1374	}
				1375	if (re_multi_type(peekchr()) != NOT_MULTI)
				1376	{
				1377	/* Can't have a multi follow a multi. */
				1378	if (peekchr() == Magic('*'))
				1379	sprintf((char )IObuff, _("E61: Nested %s"),
				1380	reg_magic >= MAGIC_ON ? "" : "\\");
				1381	else
				1382	sprintf((char *)IObuff, _("E62: Nested %s%c"),
				1383	reg_magic == MAGIC_ALL ? "" : "\\", no_Magic(peekchr()));
				1384	EMSG_RET_NULL(IObuff);
				1385	}
				1386
				1387	return ret;
				1388	}
				1389
				1390	/*
				1391	* regatom - the lowest level
				1392	*
				1393	* Optimization: gobbles an entire sequence of ordinary characters so that
				1394	* it can turn them into a single node, which is smaller to store and
				1395	* faster to run. Don't do this when one_exactly is set.
				1396	*/
				1397	static char_u *
				1398	regatom(flagp)
				1399	int *flagp;
				1400	{
				1401	char_u *ret;
				1402	int flags;
				1403	int cpo_lit; /* 'cpoptions' contains 'l' flag */
				1404	int c;
				1405	static char_u classchars = (char_u )".iIkKfFpPsSdDxXoOwWhHaAlLuU";
				1406	static int classcodes[] = {ANY, IDENT, SIDENT, KWORD, SKWORD,
				1407	FNAME, SFNAME, PRINT, SPRINT,
				1408	WHITE, NWHITE, DIGIT, NDIGIT,
				1409	HEX, NHEX, OCTAL, NOCTAL,
				1410	WORD, NWORD, HEAD, NHEAD,
				1411	ALPHA, NALPHA, LOWER, NLOWER,
				1412	UPPER, NUPPER
				1413	};
				1414	char_u *p;
				1415	int extra = 0;
				1416
				1417	flagp = WORST; / Tentatively. */
				1418	cpo_lit = (!reg_syn && vim_strchr(p_cpo, CPO_LITERAL) != NULL);
				1419
				1420	c = getchr();
				1421	switch (c)
				1422	{
				1423	case Magic('^'):
				1424	ret = regnode(BOL);
				1425	break;
				1426
				1427	case Magic('$'):
				1428	ret = regnode(EOL);
				1429	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				1430	had_eol = TRUE;
				1431	#endif
				1432	break;
				1433
				1434	case Magic('<'):
				1435	ret = regnode(BOW);
				1436	break;
				1437
				1438	case Magic('>'):
				1439	ret = regnode(EOW);
				1440	break;
				1441
				1442	case Magic('_'):
				1443	c = no_Magic(getchr());
				1444	if (c == '^') /* "\_^" is start-of-line */
				1445	{
				1446	ret = regnode(BOL);
				1447	break;
				1448	}
				1449	if (c == '$') /* "\_$" is end-of-line */
				1450	{
				1451	ret = regnode(EOL);
				1452	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				1453	had_eol = TRUE;
				1454	#endif
				1455	break;
				1456	}
				1457
				1458	extra = ADD_NL;
				1459	*flagp \|= HASNL;
				1460
				1461	/* "\_[" is character range plus newline */
				1462	if (c == '[')
				1463	goto collection;
				1464
				1465	/* "\_x" is character class plus newline */
				1466	/FALLTHROUGH/
				1467
				1468	/*
				1469	* Character classes.
				1470	*/
				1471	case Magic('.'):
				1472	case Magic('i'):
				1473	case Magic('I'):
				1474	case Magic('k'):
				1475	case Magic('K'):
				1476	case Magic('f'):
				1477	case Magic('F'):
				1478	case Magic('p'):
				1479	case Magic('P'):
				1480	case Magic('s'):
				1481	case Magic('S'):
				1482	case Magic('d'):
				1483	case Magic('D'):
				1484	case Magic('x'):
				1485	case Magic('X'):
				1486	case Magic('o'):
				1487	case Magic('O'):
				1488	case Magic('w'):
				1489	case Magic('W'):
				1490	case Magic('h'):
				1491	case Magic('H'):
				1492	case Magic('a'):
				1493	case Magic('A'):
				1494	case Magic('l'):
				1495	case Magic('L'):
				1496	case Magic('u'):
				1497	case Magic('U'):
				1498	p = vim_strchr(classchars, no_Magic(c));
				1499	if (p == NULL)
				1500	EMSG_RET_NULL(_("E63: invalid use of \\_"));
				1501	ret = regnode(classcodes[p - classchars] + extra);
				1502	*flagp \|= HASWIDTH \| SIMPLE;
				1503	break;
				1504
				1505	case Magic('n'):
				1506	if (reg_string)
				1507	{
				1508	/* In a string "\n" matches a newline character. */
				1509	ret = regnode(EXACTLY);
				1510	regc(NL);
				1511	regc(NUL);
				1512	*flagp \|= HASWIDTH \| SIMPLE;
				1513	}
				1514	else
				1515	{
				1516	/* In buffer text "\n" matches the end of a line. */
				1517	ret = regnode(NEWL);
				1518	*flagp \|= HASWIDTH \| HASNL;
				1519	}
				1520	break;
				1521
				1522	case Magic('('):
				1523	if (one_exactly)
				1524	EMSG_ONE_RET_NULL;
				1525	ret = reg(REG_PAREN, &flags);
				1526	if (ret == NULL)
				1527	return NULL;
				1528	*flagp \|= flags & (HASWIDTH \| SPSTART \| HASNL \| HASLOOKBH);
				1529	break;
				1530
				1531	case NUL:
				1532	case Magic('\|'):
				1533	case Magic('&'):
				1534	case Magic(')'):
				1535	EMSG_RET_NULL(_(e_internal)); /* Supposed to be caught earlier. */
				1536	/* NOTREACHED */
				1537
				1538	case Magic('='):
				1539	case Magic('?'):
				1540	case Magic('+'):
				1541	case Magic('@'):
				1542	case Magic('{'):
				1543	case Magic('*'):
				1544	c = no_Magic(c);
				1545	sprintf((char *)IObuff, _("E64: %s%c follows nothing"),
				1546	(c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL)
				1547	? "" : "\\", c);
				1548	EMSG_RET_NULL(IObuff);
				1549	/* NOTREACHED */
				1550
				1551	case Magic('~'): /* previous substitute pattern */
				1552	if (reg_prev_sub)
				1553	{
				1554	char_u *lp;
				1555
				1556	ret = regnode(EXACTLY);
				1557	lp = reg_prev_sub;
				1558	while (*lp != NUL)
				1559	regc(*lp++);
				1560	regc(NUL);
				1561	if (*reg_prev_sub != NUL)
				1562	{
				1563	*flagp \|= HASWIDTH;
				1564	if ((lp - reg_prev_sub) == 1)
				1565	*flagp \|= SIMPLE;
				1566	}
				1567	}
				1568	else
				1569	EMSG_RET_NULL(_(e_nopresub));
				1570	break;
				1571
				1572	case Magic('1'):
				1573	case Magic('2'):
				1574	case Magic('3'):
				1575	case Magic('4'):
				1576	case Magic('5'):
				1577	case Magic('6'):
				1578	case Magic('7'):
				1579	case Magic('8'):
				1580	case Magic('9'):
				1581	{
				1582	int refnum;
				1583
				1584	refnum = c - Magic('0');
				1585	/*
				1586	* Check if the back reference is legal. We must have seen the
				1587	* close brace.
				1588	* TODO: Should also check that we don't refer to something
				1589	* that is repeated (+*=): what instance of the repetition
				1590	* should we match?
				1591	*/
				1592	if (!had_endbrace[refnum])
				1593	{
				1594	/* Trick: check if "@<=" or "@<!" follows, in which case
				1595	* the \1 can appear before the referenced match. */
				1596	for (p = regparse; *p != NUL; ++p)
				1597	if (p[0] == '@' && p[1] == '<'
				1598	&& (p[2] == '!' \|\| p[2] == '='))
				1599	break;
				1600	if (*p == NUL)
				1601	EMSG_RET_NULL(_("E65: Illegal back reference"));
				1602	}
				1603	ret = regnode(BACKREF + refnum);
				1604	}
				1605	break;
				1606
				1607	#ifdef FEAT_SYN_HL
				1608	case Magic('z'):
				1609	{
				1610	c = no_Magic(getchr());
				1611	switch (c)
				1612	{
				1613	case '(': if (reg_do_extmatch != REX_SET)
				1614	EMSG_RET_NULL(_("E66: \\z( not allowed here"));
				1615	if (one_exactly)
				1616	EMSG_ONE_RET_NULL;
				1617	ret = reg(REG_ZPAREN, &flags);
				1618	if (ret == NULL)
				1619	return NULL;
				1620	*flagp \|= flags & (HASWIDTH\|SPSTART\|HASNL\|HASLOOKBH);
				1621	re_has_z = REX_SET;
				1622	break;
				1623
				1624	case '1':
				1625	case '2':
				1626	case '3':
				1627	case '4':
				1628	case '5':
				1629	case '6':
				1630	case '7':
				1631	case '8':
				1632	case '9': if (reg_do_extmatch != REX_USE)
				1633	EMSG_RET_NULL(_("E67: \\z1 et al. not allowed here"));
				1634	ret = regnode(ZREF + c - '0');
				1635	re_has_z = REX_USE;
				1636	break;
				1637
				1638	case 's': ret = regnode(MOPEN + 0);
				1639	break;
				1640
				1641	case 'e': ret = regnode(MCLOSE + 0);
				1642	break;
				1643
				1644	default: EMSG_RET_NULL(_("E68: Invalid character after \\z"));
				1645	}
				1646	}
				1647	break;
				1648	#endif
				1649
				1650	case Magic('%'):
				1651	{
				1652	c = no_Magic(getchr());
				1653	switch (c)
				1654	{
				1655	/* () without a back reference */
				1656	case '(':
				1657	if (one_exactly)
				1658	EMSG_ONE_RET_NULL;
				1659	ret = reg(REG_NPAREN, &flags);
				1660	if (ret == NULL)
				1661	return NULL;
				1662	*flagp \|= flags & (HASWIDTH \| SPSTART \| HASNL \| HASLOOKBH);
				1663	break;
				1664
				1665	/* Catch \%^ and \%$ regardless of where they appear in the
				1666	* pattern -- regardless of whether or not it makes sense. */
				1667	case '^':
				1668	ret = regnode(RE_BOF);
				1669	break;
				1670
				1671	case '$':
				1672	ret = regnode(RE_EOF);
				1673	break;
				1674
				1675	case '#':
				1676	ret = regnode(CURSOR);
				1677	break;
				1678
				1679	/* \%[abc]: Emit as a list of branches, all ending at the last
				1680	* branch which matches nothing. */
				1681	case '[':
				1682	if (one_exactly) /* doesn't nest */
				1683	EMSG_ONE_RET_NULL;
				1684	{
				1685	char_u *lastbranch;
				1686	char_u *lastnode = NULL;
				1687	char_u *br;
				1688
				1689	ret = NULL;
				1690	while ((c = getchr()) != ']')
				1691	{
				1692	if (c == NUL)
				1693	EMSG_M_RET_NULL(_("E69: Missing ] after %s%%["),
				1694	reg_magic == MAGIC_ALL);
				1695	br = regnode(BRANCH);
				1696	if (ret == NULL)
				1697	ret = br;
				1698	else
				1699	regtail(lastnode, br);
				1700
				1701	ungetchr();
				1702	one_exactly = TRUE;
				1703	lastnode = regatom(flagp);
				1704	one_exactly = FALSE;
				1705	if (lastnode == NULL)
				1706	return NULL;
				1707	}
				1708	if (ret == NULL)
				1709	EMSG_M_RET_NULL(_("E70: Empty %s%%[]"),
				1710	reg_magic == MAGIC_ALL);
				1711	lastbranch = regnode(BRANCH);
				1712	br = regnode(NOTHING);
				1713	if (ret != JUST_CALC_SIZE)
				1714	{
				1715	regtail(lastnode, br);
				1716	regtail(lastbranch, br);
				1717	/* connect all branches to the NOTHING
				1718	* branch at the end */
				1719	for (br = ret; br != lastnode; )
				1720	{
				1721	if (OP(br) == BRANCH)
				1722	{
				1723	regtail(br, lastbranch);
				1724	br = OPERAND(br);
				1725	}
				1726	else
				1727	br = regnext(br);
				1728	}
				1729	}
				1730	*flagp &= ~HASWIDTH;
				1731	break;
				1732	}
				1733
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	1734	case 'd': /* %d123 decimal */
				1735	case 'o': /* %o123 octal */
				1736	case 'x': /* %xab hex 2 */
				1737	case 'u': /* %uabcd hex 4 */
				1738	case 'U': /* %U1234abcd hex 8 */
				1739	{
				1740	int i;
				1741
				1742	switch (c)
				1743	{
				1744	case 'd': i = getdecchrs(); break;
				1745	case 'o': i = getoctchrs(); break;
				1746	case 'x': i = gethexchrs(2); break;
				1747	case 'u': i = gethexchrs(4); break;
				1748	case 'U': i = gethexchrs(8); break;
				1749	default: i = -1; break;
				1750	}
				1751
				1752	if (i < 0)
				1753	EMSG_M_RET_NULL(
				1754	_("E678: Invalid character after %s%%[dxouU]"),
				1755	reg_magic == MAGIC_ALL);
				1756	ret = regnode(EXACTLY);
				1757	if (i == 0)
				1758	regc(0x0a);
				1759	else
				1760	#ifdef FEAT_MBYTE
				1761	regmbc(i);
				1762	#else
				1763	regc(i);
				1764	#endif
				1765	regc(NUL);
				1766	*flagp \|= HASWIDTH;
				1767	break;
				1768	}
				1769
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1770	default:
				1771	if (VIM_ISDIGIT(c) \|\| c == '<' \|\| c == '>')
				1772	{
				1773	long_u n = 0;
				1774	int cmp;
				1775
				1776	cmp = c;
				1777	if (cmp == '<' \|\| cmp == '>')
				1778	c = getchr();
				1779	while (VIM_ISDIGIT(c))
				1780	{
				1781	n = n * 10 + (c - '0');
				1782	c = getchr();
				1783	}
				1784	if (c == 'l' \|\| c == 'c' \|\| c == 'v')
				1785	{
				1786	if (c == 'l')
				1787	ret = regnode(RE_LNUM);
				1788	else if (c == 'c')
				1789	ret = regnode(RE_COL);
				1790	else
				1791	ret = regnode(RE_VCOL);
				1792	if (ret == JUST_CALC_SIZE)
				1793	regsize += 5;
				1794	else
				1795	{
				1796	/* put the number and the optional
				1797	* comparator after the opcode */
				1798	regcode = re_put_long(regcode, n);
				1799	*regcode++ = cmp;
				1800	}
				1801	break;
				1802	}
				1803	}
				1804
				1805	EMSG_M_RET_NULL(_("E71: Invalid character after %s%%"),
				1806	reg_magic == MAGIC_ALL);
				1807	}
				1808	}
				1809	break;
				1810
				1811	case Magic('['):
				1812	collection:
				1813	{
				1814	char_u *lp;
				1815
				1816	/*
				1817	* If there is no matching ']', we assume the '[' is a normal
				1818	* character. This makes 'incsearch' and ":help [" work.
				1819	*/
				1820	lp = skip_anyof(regparse);
				1821	if (lp == ']') / there is a matching ']' */
				1822	{
				1823	int startc = -1; /* > 0 when next '-' is a range */
				1824	int endc;
				1825
				1826	/*
				1827	* In a character class, different parsing rules apply.
				1828	* Not even \ is special anymore, nothing is.
				1829	*/
				1830	if (regparse == '^') / Complement of range. */
				1831	{
				1832	ret = regnode(ANYBUT + extra);
				1833	regparse++;
				1834	}
				1835	else
				1836	ret = regnode(ANYOF + extra);
				1837
				1838	/* At the start ']' and '-' mean the literal character. */
				1839	if (regparse == ']' \|\| regparse == '-')
				1840	regc(*regparse++);
				1841
				1842	while (regparse != NUL && regparse != ']')
				1843	{
				1844	if (*regparse == '-')
				1845	{
				1846	++regparse;
				1847	/* The '-' is not used for a range at the end and
				1848	* after or before a '\n'. */
				1849	if (regparse == ']' \|\| regparse == NUL
				1850	\|\| startc == -1
				1851	\|\| (regparse[0] == '\\' && regparse[1] == 'n'))
				1852	{
				1853	regc('-');
				1854	startc = '-'; /* [--x] is a range */
				1855	}
				1856	else
				1857	{
				1858	#ifdef FEAT_MBYTE
				1859	if (has_mbyte)
				1860	endc = mb_ptr2char_adv(&regparse);
				1861	else
				1862	#endif
				1863	endc = *regparse++;
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	1864
				1865	/* Handle \o40, \x20 and \u20AC style sequences */
				1866	if (endc == '\\' && !cpo_lit)
				1867	endc = coll_get_char();
				1868
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1869	if (startc > endc)
				1870	EMSG_RET_NULL(_(e_invrange));
				1871	#ifdef FEAT_MBYTE
				1872	if (has_mbyte && ((*mb_char2len)(startc) > 1
				1873	\|\| (*mb_char2len)(endc) > 1))
				1874	{
				1875	/* Limit to a range of 256 chars */
				1876	if (endc > startc + 256)
				1877	EMSG_RET_NULL(_(e_invrange));
				1878	while (++startc <= endc)
				1879	regmbc(startc);
				1880	}
				1881	else
				1882	#endif
				1883	{
				1884	#ifdef EBCDIC
				1885	int alpha_only = FALSE;
				1886
				1887	/* for alphabetical range skip the gaps
				1888	* 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'. */
				1889	if (isalpha(startc) && isalpha(endc))
				1890	alpha_only = TRUE;
				1891	#endif
				1892	while (++startc <= endc)
				1893	#ifdef EBCDIC
				1894	if (!alpha_only \|\| isalpha(startc))
				1895	#endif
				1896	regc(startc);
				1897	}
				1898	startc = -1;
				1899	}
				1900	}
				1901	/*
				1902	* Only "\]", "\^", "\]" and "\\" are special in Vi. Vim
				1903	* accepts "\t", "\e", etc., but only when the 'l' flag in
				1904	* 'cpoptions' is not included.
				1905	*/
				1906	else if (*regparse == '\\'
				1907	&& (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
				1908	\|\| (!cpo_lit
				1909	&& vim_strchr(REGEXP_ABBR,
				1910	regparse[1]) != NULL)))
				1911	{
				1912	regparse++;
				1913	if (*regparse == 'n')
				1914	{
				1915	/* '\n' in range: also match NL */
				1916	if (ret != JUST_CALC_SIZE)
				1917	{
				1918	if (*ret == ANYBUT)
				1919	*ret = ANYBUT + ADD_NL;
				1920	else if (*ret == ANYOF)
				1921	*ret = ANYOF + ADD_NL;
				1922	/* else: must have had a \n already */
				1923	}
				1924	*flagp \|= HASNL;
				1925	regparse++;
				1926	startc = -1;
				1927	}
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	1928	else if (*regparse == 'd'
				1929	\|\| *regparse == 'o'
				1930	\|\| *regparse == 'x'
				1931	\|\| *regparse == 'u'
				1932	\|\| *regparse == 'U')
				1933	{
				1934	startc = coll_get_char();
				1935	if (startc == 0)
				1936	regc(0x0a);
				1937	else
				1938	#ifdef FEAT_MBYTE
				1939	regmbc(startc);
				1940	#else
				1941	regc(startc);
				1942	#endif
				1943	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1944	else
				1945	{
				1946	startc = backslash_trans(*regparse++);
				1947	regc(startc);
				1948	}
				1949	}
				1950	else if (*regparse == '[')
				1951	{
				1952	int c_class;
				1953	int cu;
				1954
				1955	c_class = skip_class_name(&regparse);
				1956	startc = -1;
				1957	/* Characters assumed to be 8 bits! */
				1958	switch (c_class)
				1959	{
				1960	case CLASS_NONE:
				1961	/* literal '[', allow [[-x] as a range */
				1962	startc = *regparse++;
				1963	regc(startc);
				1964	break;
				1965	case CLASS_ALNUM:
				1966	for (cu = 1; cu <= 255; cu++)
				1967	if (isalnum(cu))
				1968	regc(cu);
				1969	break;
				1970	case CLASS_ALPHA:
				1971	for (cu = 1; cu <= 255; cu++)
				1972	if (isalpha(cu))
				1973	regc(cu);
				1974	break;
				1975	case CLASS_BLANK:
				1976	regc(' ');
				1977	regc('\t');
				1978	break;
				1979	case CLASS_CNTRL:
				1980	for (cu = 1; cu <= 255; cu++)
				1981	if (iscntrl(cu))
				1982	regc(cu);
				1983	break;
				1984	case CLASS_DIGIT:
				1985	for (cu = 1; cu <= 255; cu++)
				1986	if (VIM_ISDIGIT(cu))
				1987	regc(cu);
				1988	break;
				1989	case CLASS_GRAPH:
				1990	for (cu = 1; cu <= 255; cu++)
				1991	if (isgraph(cu))
				1992	regc(cu);
				1993	break;
				1994	case CLASS_LOWER:
				1995	for (cu = 1; cu <= 255; cu++)
				1996	if (islower(cu))
				1997	regc(cu);
				1998	break;
				1999	case CLASS_PRINT:
				2000	for (cu = 1; cu <= 255; cu++)
				2001	if (vim_isprintc(cu))
				2002	regc(cu);
				2003	break;
				2004	case CLASS_PUNCT:
				2005	for (cu = 1; cu <= 255; cu++)
				2006	if (ispunct(cu))
				2007	regc(cu);
				2008	break;
				2009	case CLASS_SPACE:
				2010	for (cu = 9; cu <= 13; cu++)
				2011	regc(cu);
				2012	regc(' ');
				2013	break;
				2014	case CLASS_UPPER:
				2015	for (cu = 1; cu <= 255; cu++)
				2016	if (isupper(cu))
				2017	regc(cu);
				2018	break;
				2019	case CLASS_XDIGIT:
				2020	for (cu = 1; cu <= 255; cu++)
				2021	if (vim_isxdigit(cu))
				2022	regc(cu);
				2023	break;
				2024	case CLASS_TAB:
				2025	regc('\t');
				2026	break;
				2027	case CLASS_RETURN:
				2028	regc('\r');
				2029	break;
				2030	case CLASS_BACKSPACE:
				2031	regc('\b');
				2032	break;
				2033	case CLASS_ESCAPE:
				2034	regc('\033');
				2035	break;
				2036	}
				2037	}
				2038	else
				2039	{
				2040	#ifdef FEAT_MBYTE
				2041	if (has_mbyte)
				2042	{
				2043	int len;
				2044
				2045	/* produce a multibyte character, including any
				2046	* following composing characters */
				2047	startc = mb_ptr2char(regparse);
				2048	len = (*mb_ptr2len_check)(regparse);
				2049	if (enc_utf8 && utf_char2len(startc) != len)
				2050	startc = -1; /* composing chars */
				2051	while (--len >= 0)
				2052	regc(*regparse++);
				2053	}
				2054	else
				2055	#endif
				2056	{
				2057	startc = *regparse++;
				2058	regc(startc);
				2059	}
				2060	}
				2061	}
				2062	regc(NUL);
				2063	prevchr_len = 1; /* last char was the ']' */
				2064	if (*regparse != ']')
				2065	EMSG_RET_NULL(_(e_toomsbra)); /* Cannot happen? */
				2066	skipchr(); /* let's be friends with the lexer again */
				2067	*flagp \|= HASWIDTH \| SIMPLE;
				2068	break;
				2069	}
				2070	}
				2071	/* FALLTHROUGH */
				2072
				2073	default:
				2074	{
				2075	int len;
				2076
				2077	#ifdef FEAT_MBYTE
				2078	/* A multi-byte character is handled as a separate atom if it's
				2079	* before a multi. */
				2080	if (has_mbyte && (*mb_char2len)(c) > 1
				2081	&& re_multi_type(peekchr()) != NOT_MULTI)
				2082	{
				2083	ret = regnode(MULTIBYTECODE);
				2084	regmbc(c);
				2085	*flagp \|= HASWIDTH \| SIMPLE;
				2086	break;
				2087	}
				2088	#endif
				2089
				2090	ret = regnode(EXACTLY);
				2091
				2092	/*
				2093	* Append characters as long as:
				2094	* - there is no following multi, we then need the character in
				2095	* front of it as a single character operand
				2096	* - not running into a Magic character
				2097	* - "one_exactly" is not set
				2098	* But always emit at least one character. Might be a Multi,
				2099	* e.g., a "[" without matching "]".
				2100	*/
				2101	for (len = 0; c != NUL && (len == 0
				2102	\|\| (re_multi_type(peekchr()) == NOT_MULTI
				2103	&& !one_exactly
				2104	&& !is_Magic(c))); ++len)
				2105	{
				2106	c = no_Magic(c);
				2107	#ifdef FEAT_MBYTE
				2108	if (has_mbyte)
				2109	{
				2110	regmbc(c);
				2111	if (enc_utf8)
				2112	{
				2113	int off;
				2114	int l;
				2115
				2116	/* Need to get composing character too, directly
				2117	* access regparse for that, because skipchr() skips
				2118	* over composing chars. */
				2119	ungetchr();
				2120	if (*regparse == '\\' && regparse[1] != NUL)
				2121	off = 1;
				2122	else
				2123	off = 0;
				2124	for (;;)
				2125	{
				2126	l = utf_ptr2len_check(regparse + off);
				2127	if (!UTF_COMPOSINGLIKE(regparse + off,
				2128	regparse + off + l))
				2129	break;
				2130	off += l;
				2131	regmbc(utf_ptr2char(regparse + off));
				2132	}
				2133	skipchr();
				2134	}
				2135	}
				2136	else
				2137	#endif
				2138	regc(c);
				2139	c = getchr();
				2140	}
				2141	ungetchr();
				2142
				2143	regc(NUL);
				2144	*flagp \|= HASWIDTH;
				2145	if (len == 1)
				2146	*flagp \|= SIMPLE;
				2147	}
				2148	break;
				2149	}
				2150
				2151	return ret;
				2152	}
				2153
				2154	/*
				2155	* emit a node
				2156	* Return pointer to generated code.
				2157	*/
				2158	static char_u *
				2159	regnode(op)
				2160	int op;
				2161	{
				2162	char_u *ret;
				2163
				2164	ret = regcode;
				2165	if (ret == JUST_CALC_SIZE)
				2166	regsize += 3;
				2167	else
				2168	{
				2169	*regcode++ = op;
				2170	regcode++ = NUL; / Null "next" pointer. */
				2171	*regcode++ = NUL;
				2172	}
				2173	return ret;
				2174	}
				2175
				2176	/*
				2177	* Emit (if appropriate) a byte of code
				2178	*/
				2179	static void
				2180	regc(b)
				2181	int b;
				2182	{
				2183	if (regcode == JUST_CALC_SIZE)
				2184	regsize++;
				2185	else
				2186	*regcode++ = b;
				2187	}
				2188
				2189	#ifdef FEAT_MBYTE
				2190	/*
				2191	* Emit (if appropriate) a multi-byte character of code
				2192	*/
				2193	static void
				2194	regmbc(c)
				2195	int c;
				2196	{
				2197	if (regcode == JUST_CALC_SIZE)
				2198	regsize += (*mb_char2len)(c);
				2199	else
				2200	regcode += (*mb_char2bytes)(c, regcode);
				2201	}
				2202	#endif
				2203
				2204	/*
				2205	* reginsert - insert an operator in front of already-emitted operand
				2206	*
				2207	* Means relocating the operand.
				2208	*/
				2209	static void
				2210	reginsert(op, opnd)
				2211	int op;
				2212	char_u *opnd;
				2213	{
				2214	char_u *src;
				2215	char_u *dst;
				2216	char_u *place;
				2217
				2218	if (regcode == JUST_CALC_SIZE)
				2219	{
				2220	regsize += 3;
				2221	return;
				2222	}
				2223	src = regcode;
				2224	regcode += 3;
				2225	dst = regcode;
				2226	while (src > opnd)
				2227	--dst = --src;
				2228
				2229	place = opnd; /* Op node, where operand used to be. */
				2230	*place++ = op;
				2231	*place++ = NUL;
				2232	*place = NUL;
				2233	}
				2234
				2235	/*
				2236	* reginsert_limits - insert an operator in front of already-emitted operand.
				2237	* The operator has the given limit values as operands. Also set next pointer.
				2238	*
				2239	* Means relocating the operand.
				2240	*/
				2241	static void
				2242	reginsert_limits(op, minval, maxval, opnd)
				2243	int op;
				2244	long minval;
				2245	long maxval;
				2246	char_u *opnd;
				2247	{
				2248	char_u *src;
				2249	char_u *dst;
				2250	char_u *place;
				2251
				2252	if (regcode == JUST_CALC_SIZE)
				2253	{
				2254	regsize += 11;
				2255	return;
				2256	}
				2257	src = regcode;
				2258	regcode += 11;
				2259	dst = regcode;
				2260	while (src > opnd)
				2261	--dst = --src;
				2262
				2263	place = opnd; /* Op node, where operand used to be. */
				2264	*place++ = op;
				2265	*place++ = NUL;
				2266	*place++ = NUL;
				2267	place = re_put_long(place, (long_u)minval);
				2268	place = re_put_long(place, (long_u)maxval);
				2269	regtail(opnd, place);
				2270	}
				2271
				2272	/*
				2273	* Write a long as four bytes at "p" and return pointer to the next char.
				2274	*/
				2275	static char_u *
				2276	re_put_long(p, val)
				2277	char_u *p;
				2278	long_u val;
				2279	{
				2280	*p++ = (char_u) ((val >> 24) & 0377);
				2281	*p++ = (char_u) ((val >> 16) & 0377);
				2282	*p++ = (char_u) ((val >> 8) & 0377);
				2283	*p++ = (char_u) (val & 0377);
				2284	return p;
				2285	}
				2286
				2287	/*
				2288	* regtail - set the next-pointer at the end of a node chain
				2289	*/
				2290	static void
				2291	regtail(p, val)
				2292	char_u *p;
				2293	char_u *val;
				2294	{
				2295	char_u *scan;
				2296	char_u *temp;
				2297	int offset;
				2298
				2299	if (p == JUST_CALC_SIZE)
				2300	return;
				2301
				2302	/* Find last node. */
				2303	scan = p;
				2304	for (;;)
				2305	{
				2306	temp = regnext(scan);
				2307	if (temp == NULL)
				2308	break;
				2309	scan = temp;
				2310	}
				2311
				2312	if (OP(scan) == BACK)
				2313	offset = (int)(scan - val);
				2314	else
				2315	offset = (int)(val - scan);
				2316	*(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
				2317	*(scan + 2) = (char_u) (offset & 0377);
				2318	}
				2319
				2320	/*
				2321	* regoptail - regtail on item after a BRANCH; nop if none
				2322	*/
				2323	static void
				2324	regoptail(p, val)
				2325	char_u *p;
				2326	char_u *val;
				2327	{
				2328	/* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
				2329	if (p == NULL \|\| p == JUST_CALC_SIZE
				2330	\|\| (OP(p) != BRANCH
				2331	&& (OP(p) < BRACE_COMPLEX \|\| OP(p) > BRACE_COMPLEX + 9)))
				2332	return;
				2333	regtail(OPERAND(p), val);
				2334	}
				2335
				2336	/*
				2337	* getchr() - get the next character from the pattern. We know about
				2338	* magic and such, so therefore we need a lexical analyzer.
				2339	*/
				2340
				2341	/* static int curchr; */
				2342	static int prevprevchr;
				2343	static int prevchr;
				2344	static int nextchr; /* used for ungetchr() */
				2345	/*
				2346	* Note: prevchr is sometimes -1 when we are not at the start,
				2347	* eg in /[ ^I]^ the pattern was never found even if it existed, because ^ was
				2348	* taken to be magic -- webb
				2349	*/
				2350	static int at_start; /* True when on the first character */
				2351	static int prev_at_start; /* True when on the second character */
				2352
				2353	static void
				2354	initchr(str)
				2355	char_u *str;
				2356	{
				2357	regparse = str;
				2358	prevchr_len = 0;
				2359	curchr = prevprevchr = prevchr = nextchr = -1;
				2360	at_start = TRUE;
				2361	prev_at_start = FALSE;
				2362	}
				2363
				2364	static int
				2365	peekchr()
				2366	{
				2367	if (curchr == -1)
				2368	{
				2369	switch (curchr = regparse[0])
				2370	{
				2371	case '.':
				2372	case '[':
				2373	case '~':
				2374	/* magic when 'magic' is on */
				2375	if (reg_magic >= MAGIC_ON)
				2376	curchr = Magic(curchr);
				2377	break;
				2378	case '(':
				2379	case ')':
				2380	case '{':
				2381	case '%':
				2382	case '+':
				2383	case '=':
				2384	case '?':
				2385	case '@':
				2386	case '!':
				2387	case '&':
				2388	case '\|':
				2389	case '<':
				2390	case '>':
				2391	case '#': /* future ext. */
				2392	case '"': /* future ext. */
				2393	case '\'': /* future ext. */
				2394	case ',': /* future ext. */
				2395	case '-': /* future ext. */
				2396	case ':': /* future ext. */
				2397	case ';': /* future ext. */
				2398	case '`': /* future ext. */
				2399	case '/': /* Can't be used in / command */
				2400	/* magic only after "\v" */
				2401	if (reg_magic == MAGIC_ALL)
				2402	curchr = Magic(curchr);
				2403	break;
				2404	case '*':
				2405	/* * is not magic as the very first character, eg "?*ptr" and when
				2406	* after '^', eg "/^ptr" /
				2407	if (reg_magic >= MAGIC_ON && !at_start
				2408	&& !(prev_at_start && prevchr == Magic('^')))
				2409	curchr = Magic('*');
				2410	break;
				2411	case '^':
				2412	/* '^' is only magic as the very first character and if it's after
				2413	* "\(", "\\|", "\&' or "\n" */
				2414	if (reg_magic >= MAGIC_OFF
				2415	&& (at_start
				2416	\|\| reg_magic == MAGIC_ALL
				2417	\|\| prevchr == Magic('(')
				2418	\|\| prevchr == Magic('\|')
				2419	\|\| prevchr == Magic('&')
				2420	\|\| prevchr == Magic('n')
				2421	\|\| (no_Magic(prevchr) == '('
				2422	&& prevprevchr == Magic('%'))))
				2423	{
				2424	curchr = Magic('^');
				2425	at_start = TRUE;
				2426	prev_at_start = FALSE;
				2427	}
				2428	break;
				2429	case '$':
				2430	/* '$' is only magic as the very last char and if it's in front of
				2431	* either "\\|", "\)", "\&", or "\n" */
				2432	if (reg_magic >= MAGIC_OFF)
				2433	{
				2434	char_u *p = regparse + 1;
				2435
				2436	/* ignore \c \C \m and \M after '$' */
				2437	while (p[0] == '\\' && (p[1] == 'c' \|\| p[1] == 'C'
				2438	\|\| p[1] == 'm' \|\| p[1] == 'M' \|\| p[1] == 'Z'))
				2439	p += 2;
				2440	if (p[0] == NUL
				2441	\|\| (p[0] == '\\'
				2442	&& (p[1] == '\|' \|\| p[1] == '&' \|\| p[1] == ')'
				2443	\|\| p[1] == 'n'))
				2444	\|\| reg_magic == MAGIC_ALL)
				2445	curchr = Magic('$');
				2446	}
				2447	break;
				2448	case '\\':
				2449	{
				2450	int c = regparse[1];
				2451
				2452	if (c == NUL)
				2453	curchr = '\\'; /* trailing '\' */
				2454	else if (
				2455	#ifdef EBCDIC
				2456	vim_strchr(META, c)
				2457	#else
				2458	c <= '~' && META_flags[c]
				2459	#endif
				2460	)
				2461	{
				2462	/*
				2463	* META contains everything that may be magic sometimes,
				2464	* except ^ and $ ("\^" and "\$" are only magic after
				2465	* "\v"). We now fetch the next character and toggle its
				2466	* magicness. Therefore, \ is so meta-magic that it is
				2467	* not in META.
				2468	*/
				2469	curchr = -1;
				2470	prev_at_start = at_start;
				2471	at_start = FALSE; /* be able to say "/\ptr" /
				2472	++regparse;
				2473	peekchr();
				2474	--regparse;
				2475	curchr = toggle_Magic(curchr);
				2476	}
				2477	else if (vim_strchr(REGEXP_ABBR, c))
				2478	{
				2479	/*
				2480	* Handle abbreviations, like "\t" for TAB -- webb
				2481	*/
				2482	curchr = backslash_trans(c);
				2483	}
				2484	else if (reg_magic == MAGIC_NONE && (c == '$' \|\| c == '^'))
				2485	curchr = toggle_Magic(c);
				2486	else
				2487	{
				2488	/*
				2489	* Next character can never be (made) magic?
				2490	* Then backslashing it won't do anything.
				2491	*/
				2492	#ifdef FEAT_MBYTE
				2493	if (has_mbyte)
				2494	curchr = (*mb_ptr2char)(regparse + 1);
				2495	else
				2496	#endif
				2497	curchr = c;
				2498	}
				2499	break;
				2500	}
				2501
				2502	#ifdef FEAT_MBYTE
				2503	default:
				2504	if (has_mbyte)
				2505	curchr = (*mb_ptr2char)(regparse);
				2506	#endif
				2507	}
				2508	}
				2509
				2510	return curchr;
				2511	}
				2512
				2513	/*
				2514	* Eat one lexed character. Do this in a way that we can undo it.
				2515	*/
				2516	static void
				2517	skipchr()
				2518	{
				2519	/* peekchr() eats a backslash, do the same here */
				2520	if (*regparse == '\\')
				2521	prevchr_len = 1;
				2522	else
				2523	prevchr_len = 0;
				2524	if (regparse[prevchr_len] != NUL)
				2525	{
				2526	#ifdef FEAT_MBYTE
				2527	if (has_mbyte)
				2528	prevchr_len += (*mb_ptr2len_check)(regparse + prevchr_len);
				2529	else
				2530	#endif
				2531	++prevchr_len;
				2532	}
				2533	regparse += prevchr_len;
				2534	prev_at_start = at_start;
				2535	at_start = FALSE;
				2536	prevprevchr = prevchr;
				2537	prevchr = curchr;
				2538	curchr = nextchr; /* use previously unget char, or -1 */
				2539	nextchr = -1;
				2540	}
				2541
				2542	/*
				2543	* Skip a character while keeping the value of prev_at_start for at_start.
				2544	* prevchr and prevprevchr are also kept.
				2545	*/
				2546	static void
				2547	skipchr_keepstart()
				2548	{
				2549	int as = prev_at_start;
				2550	int pr = prevchr;
				2551	int prpr = prevprevchr;
				2552
				2553	skipchr();
				2554	at_start = as;
				2555	prevchr = pr;
				2556	prevprevchr = prpr;
				2557	}
				2558
				2559	static int
				2560	getchr()
				2561	{
				2562	int chr = peekchr();
				2563
				2564	skipchr();
				2565	return chr;
				2566	}
				2567
				2568	/*
				2569	* put character back. Works only once!
				2570	*/
				2571	static void
				2572	ungetchr()
				2573	{
				2574	nextchr = curchr;
				2575	curchr = prevchr;
				2576	prevchr = prevprevchr;
				2577	at_start = prev_at_start;
				2578	prev_at_start = FALSE;
				2579
				2580	/* Backup regparse, so that it's at the same position as before the
				2581	* getchr(). */
				2582	regparse -= prevchr_len;
				2583	}
				2584
				2585	/*
Bram Moolenaar	7b0294c	2004-10-11 10:16:09 +0000	[diff] [blame]	2586	* Get and return the value of the hex string at the current position.
				2587	* Return -1 if there is no valid hex number.
				2588	* The position is updated:
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	2589	* blahblah\%x20asdf
				2590	* before-^ ^-after
				2591	* The parameter controls the maximum number of input characters. This will be
				2592	* 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
				2593	*/
				2594	static int
				2595	gethexchrs(maxinputlen)
				2596	int maxinputlen;
				2597	{
				2598	int nr = 0;
				2599	int c;
				2600	int i;
				2601
				2602	for (i = 0; i < maxinputlen; ++i)
				2603	{
				2604	c = regparse[0];
				2605	if (!vim_isxdigit(c))
				2606	break;
				2607	nr <<= 4;
				2608	nr \|= hex2nr(c);
				2609	++regparse;
				2610	}
				2611
				2612	if (i == 0)
				2613	return -1;
				2614	return nr;
				2615	}
				2616
				2617	/*
				2618	* get and return the value of the decimal string immediately after the
				2619	* current position. Return -1 for invalid. Consumes all digits.
				2620	*/
				2621	static int
				2622	getdecchrs()
				2623	{
				2624	int nr = 0;
				2625	int c;
				2626	int i;
				2627
				2628	for (i = 0; ; ++i)
				2629	{
				2630	c = regparse[0];
				2631	if (c < '0' \|\| c > '9')
				2632	break;
				2633	nr *= 10;
				2634	nr += c - '0';
				2635	++regparse;
				2636	}
				2637
				2638	if (i == 0)
				2639	return -1;
				2640	return nr;
				2641	}
				2642
				2643	/*
				2644	* get and return the value of the octal string immediately after the current
				2645	* position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
				2646	* numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
				2647	* treat 8 or 9 as recognised characters. Position is updated:
				2648	* blahblah\%o210asdf
				2649	* before-^ ^-after
				2650	*/
				2651	static int
				2652	getoctchrs()
				2653	{
				2654	int nr = 0;
				2655	int c;
				2656	int i;
				2657
				2658	for (i = 0; i < 3 && nr < 040; ++i)
				2659	{
				2660	c = regparse[0];
				2661	if (c < '0' \|\| c > '7')
				2662	break;
				2663	nr <<= 3;
				2664	nr \|= hex2nr(c);
				2665	++regparse;
				2666	}
				2667
				2668	if (i == 0)
				2669	return -1;
				2670	return nr;
				2671	}
				2672
				2673	/*
				2674	* Get a number after a backslash that is inside [].
				2675	* When nothing is recognized return a backslash.
				2676	*/
				2677	static int
				2678	coll_get_char()
				2679	{
				2680	int nr = -1;
				2681
				2682	switch (*regparse++)
				2683	{
				2684	case 'd': nr = getdecchrs(); break;
				2685	case 'o': nr = getoctchrs(); break;
				2686	case 'x': nr = gethexchrs(2); break;
				2687	case 'u': nr = gethexchrs(4); break;
				2688	case 'U': nr = gethexchrs(8); break;
				2689	}
				2690	if (nr < 0)
				2691	{
				2692	/* If getting the number fails be backwards compatible: the character
				2693	* is a backslash. */
				2694	--regparse;
				2695	nr = '\\';
				2696	}
				2697	return nr;
				2698	}
				2699
				2700	/*
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2701	* read_limits - Read two integers to be taken as a minimum and maximum.
				2702	* If the first character is '-', then the range is reversed.
				2703	* Should end with 'end'. If minval is missing, zero is default, if maxval is
				2704	* missing, a very big number is the default.
				2705	*/
				2706	static int
				2707	read_limits(minval, maxval)
				2708	long *minval;
				2709	long *maxval;
				2710	{
				2711	int reverse = FALSE;
				2712	char_u *first_char;
				2713	long tmp;
				2714
				2715	if (*regparse == '-')
				2716	{
				2717	/* Starts with '-', so reverse the range later */
				2718	regparse++;
				2719	reverse = TRUE;
				2720	}
				2721	first_char = regparse;
				2722	*minval = getdigits(&regparse);
				2723	if (regparse == ',') / There is a comma */
				2724	{
				2725	if (vim_isdigit(*++regparse))
				2726	*maxval = getdigits(&regparse);
				2727	else
				2728	*maxval = MAX_LIMIT;
				2729	}
				2730	else if (VIM_ISDIGIT(*first_char))
				2731	maxval = minval; /* It was \{n} or \{-n} */
				2732	else
				2733	maxval = MAX_LIMIT; / It was \{} or \{-} */
				2734	if (*regparse == '\\')
				2735	regparse++; /* Allow either \{...} or \{...\} */
				2736	if (regparse != '}' \|\| (maxval == 0 && *minval == 0))
				2737	{
				2738	sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"),
				2739	reg_magic == MAGIC_ALL ? "" : "\\");
				2740	EMSG_RET_FAIL(IObuff);
				2741	}
				2742
				2743	/*
				2744	* Reverse the range if there was a '-', or make sure it is in the right
				2745	* order otherwise.
				2746	*/
				2747	if ((!reverse && minval > maxval) \|\| (reverse && minval < maxval))
				2748	{
				2749	tmp = *minval;
				2750	minval = maxval;
				2751	*maxval = tmp;
				2752	}
				2753	skipchr(); /* let's be friends with the lexer again */
				2754	return OK;
				2755	}
				2756
				2757	/*
				2758	* vim_regexec and friends
				2759	*/
				2760
				2761	/*
				2762	* Global work variables for vim_regexec().
				2763	*/
				2764
				2765	/* The current match-position is remembered with these variables: */
				2766	static linenr_T reglnum; /* line number, relative to first line */
				2767	static char_u regline; / start of current line */
				2768	static char_u reginput; / current input, points into "regline" */
				2769
				2770	static int need_clear_subexpr; /* subexpressions still need to be
				2771	* cleared */
				2772	#ifdef FEAT_SYN_HL
				2773	static int need_clear_zsubexpr = FALSE; /* extmatch subexpressions
				2774	* still need to be cleared */
				2775	#endif
				2776
				2777	static int out_of_stack; /* TRUE when ran out of stack space */
				2778
				2779	/*
				2780	* Structure used to save the current input state, when it needs to be
				2781	* restored after trying a match. Used by reg_save() and reg_restore().
				2782	*/
				2783	typedef struct
				2784	{
				2785	union
				2786	{
				2787	char_u ptr; / reginput pointer, for single-line regexp */
				2788	lpos_T pos; /* reginput pos, for multi-line regexp */
				2789	} rs_u;
				2790	} regsave_T;
				2791
				2792	/* struct to save start/end pointer/position in for */
				2793	typedef struct
				2794	{
				2795	union
				2796	{
				2797	char_u *ptr;
				2798	lpos_T pos;
				2799	} se_u;
				2800	} save_se_T;
				2801
				2802	static char_u *reg_getline __ARGS((linenr_T lnum));
				2803	static long vim_regexec_both __ARGS((char_u *line, colnr_T col));
				2804	static long regtry __ARGS((regprog_T *prog, colnr_T col));
				2805	static void cleanup_subexpr __ARGS((void));
				2806	#ifdef FEAT_SYN_HL
				2807	static void cleanup_zsubexpr __ARGS((void));
				2808	#endif
				2809	static void reg_nextline __ARGS((void));
				2810	static void reg_save __ARGS((regsave_T *save));
				2811	static void reg_restore __ARGS((regsave_T *save));
				2812	static int reg_save_equal __ARGS((regsave_T *save));
				2813	static void save_se_multi __ARGS((save_se_T savep, lpos_T posp));
				2814	static void save_se_one __ARGS((save_se_T savep, char_u *pp));
				2815
				2816	/* Save the sub-expressions before attempting a match. */
				2817	#define save_se(savep, posp, pp) \
				2818	REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
				2819
				2820	/* After a failed match restore the sub-expressions. */
				2821	#define restore_se(savep, posp, pp) { \
				2822	if (REG_MULTI) \
				2823	*(posp) = (savep)->se_u.pos; \
				2824	else \
				2825	*(pp) = (savep)->se_u.ptr; }
				2826
				2827	static int re_num_cmp __ARGS((long_u val, char_u *scan));
				2828	static int regmatch __ARGS((char_u *prog));
				2829	static int regrepeat __ARGS((char_u *p, long maxcount));
				2830
				2831	#ifdef DEBUG
				2832	int regnarrate = 0;
				2833	#endif
				2834
				2835	/*
				2836	* Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
				2837	* Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
				2838	* contains '\c' or '\C' the value is overruled.
				2839	*/
				2840	static int ireg_ic;
				2841
				2842	#ifdef FEAT_MBYTE
				2843	/*
				2844	* Similar to ireg_ic, but only for 'combining' characters. Set with \Z flag
				2845	* in the regexp. Defaults to false, always.
				2846	*/
				2847	static int ireg_icombine;
				2848	#endif
				2849
				2850	/*
				2851	* Sometimes need to save a copy of a line. Since alloc()/free() is very
				2852	* slow, we keep one allocated piece of memory and only re-allocate it when
				2853	* it's too small. It's freed in vim_regexec_both() when finished.
				2854	*/
				2855	static char_u *reg_tofree;
				2856	static unsigned reg_tofreelen;
				2857
				2858	/*
				2859	* These variables are set when executing a regexp to speed up the execution.
				2860	* Which ones are set depends on whethere a single-line or multi-line match is
				2861	* done:
				2862	* single-line multi-line
				2863	* reg_match &regmatch_T NULL
				2864	* reg_mmatch NULL &regmmatch_T
				2865	* reg_startp reg_match->startp <invalid>
				2866	* reg_endp reg_match->endp <invalid>
				2867	* reg_startpos <invalid> reg_mmatch->startpos
				2868	* reg_endpos <invalid> reg_mmatch->endpos
				2869	* reg_win NULL window in which to search
				2870	* reg_buf <invalid> buffer in which to search
				2871	* reg_firstlnum <invalid> first line in which to search
				2872	* reg_maxline 0 last line nr
				2873	* reg_line_lbr FALSE or TRUE FALSE
				2874	*/
				2875	static regmatch_T *reg_match;
				2876	static regmmatch_T *reg_mmatch;
				2877	static char_u **reg_startp = NULL;
				2878	static char_u **reg_endp = NULL;
				2879	static lpos_T *reg_startpos = NULL;
				2880	static lpos_T *reg_endpos = NULL;
				2881	static win_T *reg_win;
				2882	static buf_T *reg_buf;
				2883	static linenr_T reg_firstlnum;
				2884	static linenr_T reg_maxline;
				2885	static int reg_line_lbr; /* "\n" in string is line break */
				2886
				2887	/*
				2888	* Get pointer to the line "lnum", which is relative to "reg_firstlnum".
				2889	*/
				2890	static char_u *
				2891	reg_getline(lnum)
				2892	linenr_T lnum;
				2893	{
				2894	/* when looking behind for a match/no-match lnum is negative. But we
				2895	* can't go before line 1 */
				2896	if (reg_firstlnum + lnum < 1)
				2897	return NULL;
				2898	return ml_get_buf(reg_buf, reg_firstlnum + lnum, FALSE);
				2899	}
				2900
				2901	static regsave_T behind_pos;
				2902
				2903	#ifdef FEAT_SYN_HL
				2904	static char_u reg_startzp[NSUBEXP]; / Workspace to mark beginning */
				2905	static char_u reg_endzp[NSUBEXP]; / and end of \z(...\) matches */
				2906	static lpos_T reg_startzpos[NSUBEXP]; /* idem, beginning pos */
				2907	static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
				2908	#endif
				2909
				2910	/* TRUE if using multi-line regexp. */
				2911	#define REG_MULTI (reg_match == NULL)
				2912
				2913	/*
				2914	* Match a regexp against a string.
				2915	* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
				2916	* Uses curbuf for line count and 'iskeyword'.
				2917	*
				2918	* Return TRUE if there is a match, FALSE if not.
				2919	*/
				2920	int
				2921	vim_regexec(rmp, line, col)
				2922	regmatch_T *rmp;
				2923	char_u line; / string to match against */
				2924	colnr_T col; /* column to start looking for match */
				2925	{
				2926	reg_match = rmp;
				2927	reg_mmatch = NULL;
				2928	reg_maxline = 0;
				2929	reg_line_lbr = FALSE;
				2930	reg_win = NULL;
				2931	ireg_ic = rmp->rm_ic;
				2932	#ifdef FEAT_MBYTE
				2933	ireg_icombine = FALSE;
				2934	#endif
				2935	return (vim_regexec_both(line, col) != 0);
				2936	}
				2937
				2938	#if defined(FEAT_MODIFY_FNAME) \|\| defined(FEAT_EVAL) \|\| defined(PROTO)
				2939	/*
				2940	* Like vim_regexec(), but consider a "\n" in "line" to be a line break.
				2941	*/
				2942	int
				2943	vim_regexec_nl(rmp, line, col)
				2944	regmatch_T *rmp;
				2945	char_u line; / string to match against */
				2946	colnr_T col; /* column to start looking for match */
				2947	{
				2948	reg_match = rmp;
				2949	reg_mmatch = NULL;
				2950	reg_maxline = 0;
				2951	reg_line_lbr = TRUE;
				2952	reg_win = NULL;
				2953	ireg_ic = rmp->rm_ic;
				2954	#ifdef FEAT_MBYTE
				2955	ireg_icombine = FALSE;
				2956	#endif
				2957	return (vim_regexec_both(line, col) != 0);
				2958	}
				2959	#endif
				2960
				2961	/*
				2962	* Match a regexp against multiple lines.
				2963	* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
				2964	* Uses curbuf for line count and 'iskeyword'.
				2965	*
				2966	* Return zero if there is no match. Return number of lines contained in the
				2967	* match otherwise.
				2968	*/
				2969	long
				2970	vim_regexec_multi(rmp, win, buf, lnum, col)
				2971	regmmatch_T *rmp;
				2972	win_T win; / window in which to search or NULL */
				2973	buf_T buf; / buffer in which to search */
				2974	linenr_T lnum; /* nr of line to start looking for match */
				2975	colnr_T col; /* column to start looking for match */
				2976	{
				2977	long r;
				2978	buf_T *save_curbuf = curbuf;
				2979
				2980	reg_match = NULL;
				2981	reg_mmatch = rmp;
				2982	reg_buf = buf;
				2983	reg_win = win;
				2984	reg_firstlnum = lnum;
				2985	reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
				2986	reg_line_lbr = FALSE;
				2987	ireg_ic = rmp->rmm_ic;
				2988	#ifdef FEAT_MBYTE
				2989	ireg_icombine = FALSE;
				2990	#endif
				2991
				2992	/* Need to switch to buffer "buf" to make vim_iswordc() work. */
				2993	curbuf = buf;
				2994	r = vim_regexec_both(NULL, col);
				2995	curbuf = save_curbuf;
				2996
				2997	return r;
				2998	}
				2999
				3000	/*
				3001	* Match a regexp against a string ("line" points to the string) or multiple
				3002	* lines ("line" is NULL, use reg_getline()).
				3003	*/
				3004	#ifdef HAVE_SETJMP_H
				3005	static long
				3006	vim_regexec_both(line_arg, col_arg)
				3007	char_u *line_arg;
				3008	colnr_T col_arg; /* column to start looking for match */
				3009	#else
				3010	static long
				3011	vim_regexec_both(line, col)
				3012	char_u *line;
				3013	colnr_T col; /* column to start looking for match */
				3014	#endif
				3015	{
				3016	regprog_T *prog;
				3017	char_u *s;
				3018	long retval;
				3019	#ifdef HAVE_SETJMP_H
				3020	char_u *line;
				3021	colnr_T col;
				3022	#endif
				3023
				3024	reg_tofree = NULL;
				3025
				3026	#ifdef HAVE_TRY_EXCEPT
				3027	__try
				3028	{
				3029	#endif
				3030
				3031	#ifdef HAVE_SETJMP_H
				3032	/*
				3033	* Matching with a regexp may cause a very deep recursive call of
				3034	* regmatch(). Vim will crash when running out of stack space. Catch
				3035	* this here if the system supports it.
				3036	*/
				3037	mch_startjmp();
				3038	if (SETJMP(lc_jump_env) != 0)
				3039	{
				3040	mch_didjmp();
				3041	# ifdef SIGHASARG
				3042	if (lc_signal != SIGINT)
				3043	# endif
				3044	EMSG(_("E361: Crash intercepted; regexp too complex?"));
				3045	retval = 0L;
				3046	goto theend;
				3047	}
				3048
				3049	/* Trick to avoid "might be clobbered by `longjmp'" warning from gcc. */
				3050	line = line_arg;
				3051	col = col_arg;
				3052	#endif
				3053	retval = 0L;
				3054
				3055	if (REG_MULTI)
				3056	{
				3057	prog = reg_mmatch->regprog;
				3058	line = reg_getline((linenr_T)0);
				3059	reg_startpos = reg_mmatch->startpos;
				3060	reg_endpos = reg_mmatch->endpos;
				3061	}
				3062	else
				3063	{
				3064	prog = reg_match->regprog;
				3065	reg_startp = reg_match->startp;
				3066	reg_endp = reg_match->endp;
				3067	}
				3068
				3069	/* Be paranoid... */
				3070	if (prog == NULL \|\| line == NULL)
				3071	{
				3072	EMSG(_(e_null));
				3073	goto theend;
				3074	}
				3075
				3076	/* Check validity of program. */
				3077	if (prog_magic_wrong())
				3078	goto theend;
				3079
				3080	/* If pattern contains "\c" or "\C": overrule value of ireg_ic */
				3081	if (prog->regflags & RF_ICASE)
				3082	ireg_ic = TRUE;
				3083	else if (prog->regflags & RF_NOICASE)
				3084	ireg_ic = FALSE;
				3085
				3086	#ifdef FEAT_MBYTE
				3087	/* If pattern contains "\Z" overrule value of ireg_icombine */
				3088	if (prog->regflags & RF_ICOMBINE)
				3089	ireg_icombine = TRUE;
				3090	#endif
				3091
				3092	/* If there is a "must appear" string, look for it. */
				3093	if (prog->regmust != NULL)
				3094	{
				3095	int c;
				3096
				3097	#ifdef FEAT_MBYTE
				3098	if (has_mbyte)
				3099	c = (*mb_ptr2char)(prog->regmust);
				3100	else
				3101	#endif
				3102	c = *prog->regmust;
				3103	s = line + col;
				3104	while ((s = cstrchr(s, c)) != NULL)
				3105	{
				3106	if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
				3107	break; /* Found it. */
				3108	#ifdef FEAT_MBYTE
				3109	if (has_mbyte)
				3110	s += (*mb_ptr2len_check)(s);
				3111	else
				3112	#endif
				3113	++s;
				3114	}
				3115	if (s == NULL) /* Not present. */
				3116	goto theend;
				3117	}
				3118
				3119	regline = line;
				3120	reglnum = 0;
				3121	out_of_stack = FALSE;
				3122
				3123	/* Simplest case: Anchored match need be tried only once. */
				3124	if (prog->reganch)
				3125	{
				3126	int c;
				3127
				3128	#ifdef FEAT_MBYTE
				3129	if (has_mbyte)
				3130	c = (*mb_ptr2char)(regline + col);
				3131	else
				3132	#endif
				3133	c = regline[col];
				3134	if (prog->regstart == NUL
				3135	\|\| prog->regstart == c
				3136	\|\| (ireg_ic && ((
				3137	#ifdef FEAT_MBYTE
				3138	(enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
				3139	\|\| (c < 255 && prog->regstart < 255 &&
				3140	#endif
				3141	TOLOWER_LOC(prog->regstart) == TOLOWER_LOC(c)))))
				3142	retval = regtry(prog, col);
				3143	else
				3144	retval = 0;
				3145	}
				3146	else
				3147	{
				3148	/* Messy cases: unanchored match. */
				3149	while (!got_int && !out_of_stack)
				3150	{
				3151	if (prog->regstart != NUL)
				3152	{
				3153	/* Skip until the char we know it must start with. */
				3154	s = cstrchr(regline + col, prog->regstart);
				3155	if (s == NULL)
				3156	{
				3157	retval = 0;
				3158	break;
				3159	}
				3160	col = (int)(s - regline);
				3161	}
				3162
				3163	retval = regtry(prog, col);
				3164	if (retval > 0)
				3165	break;
				3166
				3167	/* if not currently on the first line, get it again */
				3168	if (reglnum != 0)
				3169	{
				3170	regline = reg_getline((linenr_T)0);
				3171	reglnum = 0;
				3172	}
				3173	if (regline[col] == NUL)
				3174	break;
				3175	#ifdef FEAT_MBYTE
				3176	if (has_mbyte)
				3177	col += (*mb_ptr2len_check)(regline + col);
				3178	else
				3179	#endif
				3180	++col;
				3181	}
				3182	}
				3183
				3184	if (out_of_stack)
				3185	EMSG(_("E363: pattern caused out-of-stack error"));
				3186
				3187	#ifdef HAVE_TRY_EXCEPT
				3188	}
				3189	__except(EXCEPTION_EXECUTE_HANDLER)
				3190	{
				3191	if (GetExceptionCode() == EXCEPTION_STACK_OVERFLOW)
				3192	{
				3193	RESETSTKOFLW();
				3194	EMSG(_("E363: pattern caused out-of-stack error"));
				3195	}
				3196	else
				3197	EMSG(_("E361: Crash intercepted; regexp too complex?"));
				3198	retval = 0L;
				3199	}
				3200	#endif
				3201
				3202	theend:
				3203	/* Didn't find a match. */
				3204	vim_free(reg_tofree);
				3205	#ifdef HAVE_SETJMP_H
				3206	mch_endjmp();
				3207	#endif
				3208	return retval;
				3209	}
				3210
				3211	#ifdef FEAT_SYN_HL
				3212	static reg_extmatch_T *make_extmatch __ARGS((void));
				3213
				3214	/*
				3215	* Create a new extmatch and mark it as referenced once.
				3216	*/
				3217	static reg_extmatch_T *
				3218	make_extmatch()
				3219	{
				3220	reg_extmatch_T *em;
				3221
				3222	em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T));
				3223	if (em != NULL)
				3224	em->refcnt = 1;
				3225	return em;
				3226	}
				3227
				3228	/*
				3229	* Add a reference to an extmatch.
				3230	*/
				3231	reg_extmatch_T *
				3232	ref_extmatch(em)
				3233	reg_extmatch_T *em;
				3234	{
				3235	if (em != NULL)
				3236	em->refcnt++;
				3237	return em;
				3238	}
				3239
				3240	/*
				3241	* Remove a reference to an extmatch. If there are no references left, free
				3242	* the info.
				3243	*/
				3244	void
				3245	unref_extmatch(em)
				3246	reg_extmatch_T *em;
				3247	{
				3248	int i;
				3249
				3250	if (em != NULL && --em->refcnt <= 0)
				3251	{
				3252	for (i = 0; i < NSUBEXP; ++i)
				3253	vim_free(em->matches[i]);
				3254	vim_free(em);
				3255	}
				3256	}
				3257	#endif
				3258
				3259	/*
				3260	* regtry - try match of "prog" with at regline["col"].
				3261	* Returns 0 for failure, number of lines contained in the match otherwise.
				3262	*/
				3263	static long
				3264	regtry(prog, col)
				3265	regprog_T *prog;
				3266	colnr_T col;
				3267	{
				3268	reginput = regline + col;
				3269	need_clear_subexpr = TRUE;
				3270	#ifdef FEAT_SYN_HL
				3271	/* Clear the external match subpointers if necessary. */
				3272	if (prog->reghasz == REX_SET)
				3273	need_clear_zsubexpr = TRUE;
				3274	#endif
				3275
				3276	if (regmatch(prog->program + 1))
				3277	{
				3278	cleanup_subexpr();
				3279	if (REG_MULTI)
				3280	{
				3281	if (reg_startpos[0].lnum < 0)
				3282	{
				3283	reg_startpos[0].lnum = 0;
				3284	reg_startpos[0].col = col;
				3285	}
				3286	if (reg_endpos[0].lnum < 0)
				3287	{
				3288	reg_endpos[0].lnum = reglnum;
				3289	reg_endpos[0].col = (int)(reginput - regline);
				3290	}
				3291	else
				3292	/* Use line number of "\ze". */
				3293	reglnum = reg_endpos[0].lnum;
				3294	}
				3295	else
				3296	{
				3297	if (reg_startp[0] == NULL)
				3298	reg_startp[0] = regline + col;
				3299	if (reg_endp[0] == NULL)
				3300	reg_endp[0] = reginput;
				3301	}
				3302	#ifdef FEAT_SYN_HL
				3303	/* Package any found \z(...\) matches for export. Default is none. */
				3304	unref_extmatch(re_extmatch_out);
				3305	re_extmatch_out = NULL;
				3306
				3307	if (prog->reghasz == REX_SET)
				3308	{
				3309	int i;
				3310
				3311	cleanup_zsubexpr();
				3312	re_extmatch_out = make_extmatch();
				3313	for (i = 0; i < NSUBEXP; i++)
				3314	{
				3315	if (REG_MULTI)
				3316	{
				3317	/* Only accept single line matches. */
				3318	if (reg_startzpos[i].lnum >= 0
				3319	&& reg_endzpos[i].lnum == reg_startzpos[i].lnum)
				3320	re_extmatch_out->matches[i] =
				3321	vim_strnsave(reg_getline(reg_startzpos[i].lnum)
				3322	+ reg_startzpos[i].col,
				3323	reg_endzpos[i].col - reg_startzpos[i].col);
				3324	}
				3325	else
				3326	{
				3327	if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
				3328	re_extmatch_out->matches[i] =
				3329	vim_strnsave(reg_startzp[i],
				3330	(int)(reg_endzp[i] - reg_startzp[i]));
				3331	}
				3332	}
				3333	}
				3334	#endif
				3335	return 1 + reglnum;
				3336	}
				3337	return 0;
				3338	}
				3339
				3340	#ifdef FEAT_MBYTE
				3341	/* multi-byte: advance reginput with a function */
				3342	# define ADVANCE_REGINPUT() advance_reginput()
				3343
				3344	static void advance_reginput __ARGS((void));
				3345	static int reg_prev_class __ARGS((void));
				3346
				3347	static void
				3348	advance_reginput()
				3349	{
				3350	if (has_mbyte)
				3351	reginput += (*mb_ptr2len_check)(reginput);
				3352	else
				3353	++reginput;
				3354	}
				3355
				3356	/*
				3357	* Get class of previous character.
				3358	*/
				3359	static int
				3360	reg_prev_class()
				3361	{
				3362	if (reginput > regline)
				3363	return mb_get_class(reginput - 1
				3364	- (*mb_head_off)(regline, reginput - 1));
				3365	return -1;
				3366	}
				3367
				3368	#else
				3369	/* No multi-byte: It's too simple to make a function for. */
				3370	# define ADVANCE_REGINPUT() ++reginput
				3371	#endif
				3372
				3373	/*
				3374	* The arguments from BRACE_LIMITS are stored here. They are actually local
				3375	* to regmatch(), but they are here to reduce the amount of stack space used
				3376	* (it can be called recursively many times).
				3377	*/
				3378	static long bl_minval;
				3379	static long bl_maxval;
				3380
				3381	/*
				3382	* regmatch - main matching routine
				3383	*
				3384	* Conceptually the strategy is simple: Check to see whether the current
				3385	* node matches, call self recursively to see whether the rest matches,
				3386	* and then act accordingly. In practice we make some effort to avoid
				3387	* recursion, in particular by going through "ordinary" nodes (that don't
				3388	* need to know whether the rest of the match failed) by a loop instead of
				3389	* by recursion.
				3390	*
				3391	* Returns TRUE when there is a match. Leaves reginput and reglnum just after
				3392	* the last matched character.
				3393	* Returns FALSE when there is no match. Leaves reginput and reglnum in an
				3394	* undefined state!
				3395	*/
				3396	static int
				3397	regmatch(scan)
				3398	char_u scan; / Current node. */
				3399	{
				3400	char_u next; / Next node. */
				3401	int op;
				3402	int c;
				3403
				3404	#ifdef HAVE_GETRLIMIT
				3405	/* Check if we are running out of stack space. Could be caused by
				3406	* recursively calling ourselves. */
				3407	if (out_of_stack \|\| mch_stackcheck((char *)&op) == FAIL)
				3408	{
				3409	out_of_stack = TRUE;
				3410	return FALSE;
				3411	}
				3412	#endif
				3413
				3414	/* Some patterns my cause a long time to match, even though they are not
				3415	* illegal. E.g., "$[a-z]\+$\+Q". Allow breaking them with CTRL-C. */
				3416	fast_breakcheck();
				3417
				3418	#ifdef DEBUG
				3419	if (scan != NULL && regnarrate)
				3420	{
				3421	mch_errmsg(regprop(scan));
				3422	mch_errmsg("(\n");
				3423	}
				3424	#endif
				3425	while (scan != NULL)
				3426	{
				3427	if (got_int \|\| out_of_stack)
				3428	return FALSE;
				3429	#ifdef DEBUG
				3430	if (regnarrate)
				3431	{
				3432	mch_errmsg(regprop(scan));
				3433	mch_errmsg("...\n");
				3434	# ifdef FEAT_SYN_HL
				3435	if (re_extmatch_in != NULL)
				3436	{
				3437	int i;
				3438
				3439	mch_errmsg(_("External submatches:\n"));
				3440	for (i = 0; i < NSUBEXP; i++)
				3441	{
				3442	mch_errmsg(" \"");
				3443	if (re_extmatch_in->matches[i] != NULL)
				3444	mch_errmsg(re_extmatch_in->matches[i]);
				3445	mch_errmsg("\"\n");
				3446	}
				3447	}
				3448	# endif
				3449	}
				3450	#endif
				3451	next = regnext(scan);
				3452
				3453	op = OP(scan);
				3454	/* Check for character class with NL added. */
				3455	if (WITH_NL(op) && *reginput == NUL && reglnum < reg_maxline)
				3456	{
				3457	reg_nextline();
				3458	}
				3459	else if (reg_line_lbr && WITH_NL(op) && *reginput == '\n')
				3460	{
				3461	ADVANCE_REGINPUT();
				3462	}
				3463	else
				3464	{
				3465	if (WITH_NL(op))
				3466	op -= ADD_NL;
				3467	#ifdef FEAT_MBYTE
				3468	if (has_mbyte)
				3469	c = (*mb_ptr2char)(reginput);
				3470	else
				3471	#endif
				3472	c = *reginput;
				3473	switch (op)
				3474	{
				3475	case BOL:
				3476	if (reginput != regline)
				3477	return FALSE;
				3478	break;
				3479
				3480	case EOL:
				3481	if (c != NUL)
				3482	return FALSE;
				3483	break;
				3484
				3485	case RE_BOF:
				3486	/* Passing -1 to the getline() function provided for the search
				3487	* should always return NULL if the current line is the first
				3488	* line of the file. */
				3489	if (reglnum != 0 \|\| reginput != regline
				3490	\|\| (REG_MULTI && reg_getline((linenr_T)-1) != NULL))
				3491	return FALSE;
				3492	break;
				3493
				3494	case RE_EOF:
				3495	if (reglnum != reg_maxline \|\| c != NUL)
				3496	return FALSE;
				3497	break;
				3498
				3499	case CURSOR:
				3500	/* Check if the buffer is in a window and compare the
				3501	* reg_win->w_cursor position to the match position. */
				3502	if (reg_win == NULL
				3503	\|\| (reglnum + reg_firstlnum != reg_win->w_cursor.lnum)
				3504	\|\| ((colnr_T)(reginput - regline) != reg_win->w_cursor.col))
				3505	return FALSE;
				3506	break;
				3507
				3508	case RE_LNUM:
				3509	if (!REG_MULTI \|\| !re_num_cmp((long_u)(reglnum + reg_firstlnum),
				3510	scan))
				3511	return FALSE;
				3512	break;
				3513
				3514	case RE_COL:
				3515	if (!re_num_cmp((long_u)(reginput - regline) + 1, scan))
				3516	return FALSE;
				3517	break;
				3518
				3519	case RE_VCOL:
				3520	if (!re_num_cmp((long_u)win_linetabsize(
				3521	reg_win == NULL ? curwin : reg_win,
				3522	regline, (colnr_T)(reginput - regline)) + 1, scan))
				3523	return FALSE;
				3524	break;
				3525
				3526	case BOW: /* \<word; reginput points to w */
				3527	if (c == NUL) /* Can't match at end of line */
				3528	return FALSE;
				3529	#ifdef FEAT_MBYTE
				3530	if (has_mbyte)
				3531	{
				3532	int this_class;
				3533
				3534	/* Get class of current and previous char (if it exists). */
				3535	this_class = mb_get_class(reginput);
				3536	if (this_class <= 1)
				3537	return FALSE; /* not on a word at all */
				3538	if (reg_prev_class() == this_class)
				3539	return FALSE; /* previous char is in same word */
				3540	}
				3541	#endif
				3542	else
				3543	{
				3544	if (!vim_iswordc(c)
				3545	\|\| (reginput > regline && vim_iswordc(reginput[-1])))
				3546	return FALSE;
				3547	}
				3548	break;
				3549
				3550	case EOW: /* word\>; reginput points after d */
				3551	if (reginput == regline) /* Can't match at start of line */
				3552	return FALSE;
				3553	#ifdef FEAT_MBYTE
				3554	if (has_mbyte)
				3555	{
				3556	int this_class, prev_class;
				3557
				3558	/* Get class of current and previous char (if it exists). */
				3559	this_class = mb_get_class(reginput);
				3560	prev_class = reg_prev_class();
				3561	if (this_class == prev_class)
				3562	return FALSE;
				3563	if (prev_class == 0 \|\| prev_class == 1)
				3564	return FALSE;
				3565	}
				3566	else
				3567	#endif
				3568	{
				3569	if (!vim_iswordc(reginput[-1]))
				3570	return FALSE;
				3571	if (reginput[0] != NUL && vim_iswordc(c))
				3572	return FALSE;
				3573	}
				3574	break; /* Matched with EOW */
				3575
				3576	case ANY:
				3577	if (c == NUL)
				3578	return FALSE;
				3579	ADVANCE_REGINPUT();
				3580	break;
				3581
				3582	case IDENT:
				3583	if (!vim_isIDc(c))
				3584	return FALSE;
				3585	ADVANCE_REGINPUT();
				3586	break;
				3587
				3588	case SIDENT:
				3589	if (VIM_ISDIGIT(*reginput) \|\| !vim_isIDc(c))
				3590	return FALSE;
				3591	ADVANCE_REGINPUT();
				3592	break;
				3593
				3594	case KWORD:
				3595	if (!vim_iswordp(reginput))
				3596	return FALSE;
				3597	ADVANCE_REGINPUT();
				3598	break;
				3599
				3600	case SKWORD:
				3601	if (VIM_ISDIGIT(*reginput) \|\| !vim_iswordp(reginput))
				3602	return FALSE;
				3603	ADVANCE_REGINPUT();
				3604	break;
				3605
				3606	case FNAME:
				3607	if (!vim_isfilec(c))
				3608	return FALSE;
				3609	ADVANCE_REGINPUT();
				3610	break;
				3611
				3612	case SFNAME:
				3613	if (VIM_ISDIGIT(*reginput) \|\| !vim_isfilec(c))
				3614	return FALSE;
				3615	ADVANCE_REGINPUT();
				3616	break;
				3617
				3618	case PRINT:
				3619	if (ptr2cells(reginput) != 1)
				3620	return FALSE;
				3621	ADVANCE_REGINPUT();
				3622	break;
				3623
				3624	case SPRINT:
				3625	if (VIM_ISDIGIT(*reginput) \|\| ptr2cells(reginput) != 1)
				3626	return FALSE;
				3627	ADVANCE_REGINPUT();
				3628	break;
				3629
				3630	case WHITE:
				3631	if (!vim_iswhite(c))
				3632	return FALSE;
				3633	ADVANCE_REGINPUT();
				3634	break;
				3635
				3636	case NWHITE:
				3637	if (c == NUL \|\| vim_iswhite(c))
				3638	return FALSE;
				3639	ADVANCE_REGINPUT();
				3640	break;
				3641
				3642	case DIGIT:
				3643	if (!ri_digit(c))
				3644	return FALSE;
				3645	ADVANCE_REGINPUT();
				3646	break;
				3647
				3648	case NDIGIT:
				3649	if (c == NUL \|\| ri_digit(c))
				3650	return FALSE;
				3651	ADVANCE_REGINPUT();
				3652	break;
				3653
				3654	case HEX:
				3655	if (!ri_hex(c))
				3656	return FALSE;
				3657	ADVANCE_REGINPUT();
				3658	break;
				3659
				3660	case NHEX:
				3661	if (c == NUL \|\| ri_hex(c))
				3662	return FALSE;
				3663	ADVANCE_REGINPUT();
				3664	break;
				3665
				3666	case OCTAL:
				3667	if (!ri_octal(c))
				3668	return FALSE;
				3669	ADVANCE_REGINPUT();
				3670	break;
				3671
				3672	case NOCTAL:
				3673	if (c == NUL \|\| ri_octal(c))
				3674	return FALSE;
				3675	ADVANCE_REGINPUT();
				3676	break;
				3677
				3678	case WORD:
				3679	if (!ri_word(c))
				3680	return FALSE;
				3681	ADVANCE_REGINPUT();
				3682	break;
				3683
				3684	case NWORD:
				3685	if (c == NUL \|\| ri_word(c))
				3686	return FALSE;
				3687	ADVANCE_REGINPUT();
				3688	break;
				3689
				3690	case HEAD:
				3691	if (!ri_head(c))
				3692	return FALSE;
				3693	ADVANCE_REGINPUT();
				3694	break;
				3695
				3696	case NHEAD:
				3697	if (c == NUL \|\| ri_head(c))
				3698	return FALSE;
				3699	ADVANCE_REGINPUT();
				3700	break;
				3701
				3702	case ALPHA:
				3703	if (!ri_alpha(c))
				3704	return FALSE;
				3705	ADVANCE_REGINPUT();
				3706	break;
				3707
				3708	case NALPHA:
				3709	if (c == NUL \|\| ri_alpha(c))
				3710	return FALSE;
				3711	ADVANCE_REGINPUT();
				3712	break;
				3713
				3714	case LOWER:
				3715	if (!ri_lower(c))
				3716	return FALSE;
				3717	ADVANCE_REGINPUT();
				3718	break;
				3719
				3720	case NLOWER:
				3721	if (c == NUL \|\| ri_lower(c))
				3722	return FALSE;
				3723	ADVANCE_REGINPUT();
				3724	break;
				3725
				3726	case UPPER:
				3727	if (!ri_upper(c))
				3728	return FALSE;
				3729	ADVANCE_REGINPUT();
				3730	break;
				3731
				3732	case NUPPER:
				3733	if (c == NUL \|\| ri_upper(c))
				3734	return FALSE;
				3735	ADVANCE_REGINPUT();
				3736	break;
				3737
				3738	case EXACTLY:
				3739	{
				3740	int len;
				3741	char_u *opnd;
				3742
				3743	opnd = OPERAND(scan);
				3744	/* Inline the first byte, for speed. */
				3745	if (opnd != reginput
				3746	&& (!ireg_ic \|\| (
				3747	#ifdef FEAT_MBYTE
				3748	!enc_utf8 &&
				3749	#endif
				3750	TOLOWER_LOC(opnd) != TOLOWER_LOC(reginput))))
				3751	return FALSE;
				3752	if (*opnd == NUL)
				3753	{
				3754	/* match empty string always works; happens when "~" is
				3755	* empty. */
				3756	}
				3757	else if (opnd[1] == NUL
				3758	#ifdef FEAT_MBYTE
				3759	&& !(enc_utf8 && ireg_ic)
				3760	#endif
				3761	)
				3762	++reginput; /* matched a single char */
				3763	else
				3764	{
				3765	len = (int)STRLEN(opnd);
				3766	/* Need to match first byte again for multi-byte. */
				3767	if (cstrncmp(opnd, reginput, &len) != 0)
				3768	return FALSE;
				3769	#ifdef FEAT_MBYTE
				3770	/* Check for following composing character. */
				3771	if (enc_utf8 && UTF_COMPOSINGLIKE(reginput, reginput + len))
				3772	{
				3773	/* raaron: This code makes a composing character get
				3774	* ignored, which is the correct behavior (sometimes)
				3775	* for voweled Hebrew texts. */
				3776	if (!ireg_icombine)
				3777	return FALSE;
				3778	}
				3779	else
				3780	#endif
				3781	reginput += len;
				3782	}
				3783	}
				3784	break;
				3785
				3786	case ANYOF:
				3787	case ANYBUT:
				3788	if (c == NUL)
				3789	return FALSE;
				3790	if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
				3791	return FALSE;
				3792	ADVANCE_REGINPUT();
				3793	break;
				3794
				3795	#ifdef FEAT_MBYTE
				3796	case MULTIBYTECODE:
				3797	if (has_mbyte)
				3798	{
				3799	int i, len;
				3800	char_u *opnd;
				3801
				3802	opnd = OPERAND(scan);
				3803	/* Safety check (just in case 'encoding' was changed since
				3804	* compiling the program). */
				3805	if ((len = (*mb_ptr2len_check)(opnd)) < 2)
				3806	return FALSE;
				3807	for (i = 0; i < len; ++i)
				3808	if (opnd[i] != reginput[i])
				3809	return FALSE;
				3810	reginput += len;
				3811	}
				3812	else
				3813	return FALSE;
				3814	break;
				3815	#endif
				3816
				3817	case NOTHING:
				3818	break;
				3819
				3820	case BACK:
				3821	break;
				3822
				3823	case MOPEN + 0: /* Match start: \zs */
				3824	case MOPEN + 1: /* \( */
				3825	case MOPEN + 2:
				3826	case MOPEN + 3:
				3827	case MOPEN + 4:
				3828	case MOPEN + 5:
				3829	case MOPEN + 6:
				3830	case MOPEN + 7:
				3831	case MOPEN + 8:
				3832	case MOPEN + 9:
				3833	{
				3834	int no;
				3835	save_se_T save;
				3836
				3837	no = op - MOPEN;
				3838	cleanup_subexpr();
				3839	save_se(&save, &reg_startpos[no], &reg_startp[no]);
				3840
				3841	if (regmatch(next))
				3842	return TRUE;
				3843
				3844	restore_se(&save, &reg_startpos[no], &reg_startp[no]);
				3845	return FALSE;
				3846	}
				3847	/* break; Not Reached */
				3848
				3849	case NOPEN: /* \%( */
				3850	case NCLOSE: /* \) after \%( */
				3851	if (regmatch(next))
				3852	return TRUE;
				3853	return FALSE;
				3854	/* break; Not Reached */
				3855
				3856	#ifdef FEAT_SYN_HL
				3857	case ZOPEN + 1:
				3858	case ZOPEN + 2:
				3859	case ZOPEN + 3:
				3860	case ZOPEN + 4:
				3861	case ZOPEN + 5:
				3862	case ZOPEN + 6:
				3863	case ZOPEN + 7:
				3864	case ZOPEN + 8:
				3865	case ZOPEN + 9:
				3866	{
				3867	int no;
				3868	save_se_T save;
				3869
				3870	no = op - ZOPEN;
				3871	cleanup_zsubexpr();
				3872	save_se(&save, &reg_startzpos[no], &reg_startzp[no]);
				3873
				3874	if (regmatch(next))
				3875	return TRUE;
				3876
				3877	restore_se(&save, &reg_startzpos[no], &reg_startzp[no]);
				3878	return FALSE;
				3879	}
				3880	/* break; Not Reached */
				3881	#endif
				3882
				3883	case MCLOSE + 0: /* Match end: \ze */
				3884	case MCLOSE + 1: /* \) */
				3885	case MCLOSE + 2:
				3886	case MCLOSE + 3:
				3887	case MCLOSE + 4:
				3888	case MCLOSE + 5:
				3889	case MCLOSE + 6:
				3890	case MCLOSE + 7:
				3891	case MCLOSE + 8:
				3892	case MCLOSE + 9:
				3893	{
				3894	int no;
				3895	save_se_T save;
				3896
				3897	no = op - MCLOSE;
				3898	cleanup_subexpr();
				3899	save_se(&save, &reg_endpos[no], &reg_endp[no]);
				3900
				3901	if (regmatch(next))
				3902	return TRUE;
				3903
				3904	restore_se(&save, &reg_endpos[no], &reg_endp[no]);
				3905	return FALSE;
				3906	}
				3907	/* break; Not Reached */
				3908
				3909	#ifdef FEAT_SYN_HL
				3910	case ZCLOSE + 1: /* \) after \z( */
				3911	case ZCLOSE + 2:
				3912	case ZCLOSE + 3:
				3913	case ZCLOSE + 4:
				3914	case ZCLOSE + 5:
				3915	case ZCLOSE + 6:
				3916	case ZCLOSE + 7:
				3917	case ZCLOSE + 8:
				3918	case ZCLOSE + 9:
				3919	{
				3920	int no;
				3921	save_se_T save;
				3922
				3923	no = op - ZCLOSE;
				3924	cleanup_zsubexpr();
				3925	save_se(&save, &reg_endzpos[no], &reg_endzp[no]);
				3926
				3927	if (regmatch(next))
				3928	return TRUE;
				3929
				3930	restore_se(&save, &reg_endzpos[no], &reg_endzp[no]);
				3931	return FALSE;
				3932	}
				3933	/* break; Not Reached */
				3934	#endif
				3935
				3936	case BACKREF + 1:
				3937	case BACKREF + 2:
				3938	case BACKREF + 3:
				3939	case BACKREF + 4:
				3940	case BACKREF + 5:
				3941	case BACKREF + 6:
				3942	case BACKREF + 7:
				3943	case BACKREF + 8:
				3944	case BACKREF + 9:
				3945	{
				3946	int no;
				3947	int len;
				3948	linenr_T clnum;
				3949	colnr_T ccol;
				3950	char_u *p;
				3951
				3952	no = op - BACKREF;
				3953	cleanup_subexpr();
				3954	if (!REG_MULTI) /* Single-line regexp */
				3955	{
				3956	if (reg_endp[no] == NULL)
				3957	{
				3958	/* Backref was not set: Match an empty string. */
				3959	len = 0;
				3960	}
				3961	else
				3962	{
				3963	/* Compare current input with back-ref in the same
				3964	* line. */
				3965	len = (int)(reg_endp[no] - reg_startp[no]);
				3966	if (cstrncmp(reg_startp[no], reginput, &len) != 0)
				3967	return FALSE;
				3968	}
				3969	}
				3970	else /* Multi-line regexp */
				3971	{
				3972	if (reg_endpos[no].lnum < 0)
				3973	{
				3974	/* Backref was not set: Match an empty string. */
				3975	len = 0;
				3976	}
				3977	else
				3978	{
				3979	if (reg_startpos[no].lnum == reglnum
				3980	&& reg_endpos[no].lnum == reglnum)
				3981	{
				3982	/* Compare back-ref within the current line. */
				3983	len = reg_endpos[no].col - reg_startpos[no].col;
				3984	if (cstrncmp(regline + reg_startpos[no].col,
				3985	reginput, &len) != 0)
				3986	return FALSE;
				3987	}
				3988	else
				3989	{
				3990	/* Messy situation: Need to compare between two
				3991	* lines. */
				3992	ccol = reg_startpos[no].col;
				3993	clnum = reg_startpos[no].lnum;
				3994	for (;;)
				3995	{
				3996	/* Since getting one line may invalidate
				3997	* the other, need to make copy. Slow! */
				3998	if (regline != reg_tofree)
				3999	{
				4000	len = (int)STRLEN(regline);
				4001	if (reg_tofree == NULL
				4002	\|\| len >= (int)reg_tofreelen)
				4003	{
				4004	len += 50; /* get some extra */
				4005	vim_free(reg_tofree);
				4006	reg_tofree = alloc(len);
				4007	if (reg_tofree == NULL)
				4008	return FALSE; /* out of memory! */
				4009	reg_tofreelen = len;
				4010	}
				4011	STRCPY(reg_tofree, regline);
				4012	reginput = reg_tofree
				4013	+ (reginput - regline);
				4014	regline = reg_tofree;
				4015	}
				4016
				4017	/* Get the line to compare with. */
				4018	p = reg_getline(clnum);
				4019	if (clnum == reg_endpos[no].lnum)
				4020	len = reg_endpos[no].col - ccol;
				4021	else
				4022	len = (int)STRLEN(p + ccol);
				4023
				4024	if (cstrncmp(p + ccol, reginput, &len) != 0)
				4025	return FALSE; /* doesn't match */
				4026	if (clnum == reg_endpos[no].lnum)
				4027	break; /* match and at end! */
				4028	if (reglnum == reg_maxline)
				4029	return FALSE; /* text too short */
				4030
				4031	/* Advance to next line. */
				4032	reg_nextline();
				4033	++clnum;
				4034	ccol = 0;
				4035	if (got_int \|\| out_of_stack)
				4036	return FALSE;
				4037	}
				4038
				4039	/* found a match! Note that regline may now point
				4040	* to a copy of the line, that should not matter. */
				4041	}
				4042	}
				4043	}
				4044
				4045	/* Matched the backref, skip over it. */
				4046	reginput += len;
				4047	}
				4048	break;
				4049
				4050	#ifdef FEAT_SYN_HL
				4051	case ZREF + 1:
				4052	case ZREF + 2:
				4053	case ZREF + 3:
				4054	case ZREF + 4:
				4055	case ZREF + 5:
				4056	case ZREF + 6:
				4057	case ZREF + 7:
				4058	case ZREF + 8:
				4059	case ZREF + 9:
				4060	{
				4061	int no;
				4062	int len;
				4063
				4064	cleanup_zsubexpr();
				4065	no = op - ZREF;
				4066	if (re_extmatch_in != NULL
				4067	&& re_extmatch_in->matches[no] != NULL)
				4068	{
				4069	len = (int)STRLEN(re_extmatch_in->matches[no]);
				4070	if (cstrncmp(re_extmatch_in->matches[no],
				4071	reginput, &len) != 0)
				4072	return FALSE;
				4073	reginput += len;
				4074	}
				4075	else
				4076	{
				4077	/* Backref was not set: Match an empty string. */
				4078	}
				4079	}
				4080	break;
				4081	#endif
				4082
				4083	case BRANCH:
				4084	{
				4085	if (OP(next) != BRANCH) /* No choice. */
				4086	next = OPERAND(scan); /* Avoid recursion. */
				4087	else
				4088	{
				4089	regsave_T save;
				4090
				4091	do
				4092	{
				4093	reg_save(&save);
				4094	if (regmatch(OPERAND(scan)))
				4095	return TRUE;
				4096	reg_restore(&save);
				4097	scan = regnext(scan);
				4098	} while (scan != NULL && OP(scan) == BRANCH);
				4099	return FALSE;
				4100	/* NOTREACHED */
				4101	}
				4102	}
				4103	break;
				4104
				4105	case BRACE_LIMITS:
				4106	{
				4107	int no;
				4108
				4109	if (OP(next) == BRACE_SIMPLE)
				4110	{
				4111	bl_minval = OPERAND_MIN(scan);
				4112	bl_maxval = OPERAND_MAX(scan);
				4113	}
				4114	else if (OP(next) >= BRACE_COMPLEX
				4115	&& OP(next) < BRACE_COMPLEX + 10)
				4116	{
				4117	no = OP(next) - BRACE_COMPLEX;
				4118	brace_min[no] = OPERAND_MIN(scan);
				4119	brace_max[no] = OPERAND_MAX(scan);
				4120	brace_count[no] = 0;
				4121	}
				4122	else
				4123	{
				4124	EMSG(_(e_internal)); /* Shouldn't happen */
				4125	return FALSE;
				4126	}
				4127	}
				4128	break;
				4129
				4130	case BRACE_COMPLEX + 0:
				4131	case BRACE_COMPLEX + 1:
				4132	case BRACE_COMPLEX + 2:
				4133	case BRACE_COMPLEX + 3:
				4134	case BRACE_COMPLEX + 4:
				4135	case BRACE_COMPLEX + 5:
				4136	case BRACE_COMPLEX + 6:
				4137	case BRACE_COMPLEX + 7:
				4138	case BRACE_COMPLEX + 8:
				4139	case BRACE_COMPLEX + 9:
				4140	{
				4141	int no;
				4142	regsave_T save;
				4143
				4144	no = op - BRACE_COMPLEX;
				4145	++brace_count[no];
				4146
				4147	/* If not matched enough times yet, try one more */
				4148	if (brace_count[no] <= (brace_min[no] <= brace_max[no]
				4149	? brace_min[no] : brace_max[no]))
				4150	{
				4151	reg_save(&save);
				4152	if (regmatch(OPERAND(scan)))
				4153	return TRUE;
				4154	reg_restore(&save);
				4155	--brace_count[no]; /* failed, decrement match count */
				4156	return FALSE;
				4157	}
				4158
				4159	/* If matched enough times, may try matching some more */
				4160	if (brace_min[no] <= brace_max[no])
				4161	{
				4162	/* Range is the normal way around, use longest match */
				4163	if (brace_count[no] <= brace_max[no])
				4164	{
				4165	reg_save(&save);
				4166	if (regmatch(OPERAND(scan)))
				4167	return TRUE; /* matched some more times */
				4168	reg_restore(&save);
				4169	--brace_count[no]; /* matched just enough times */
				4170	/* continue with the items after \{} */
				4171	}
				4172	}
				4173	else
				4174	{
				4175	/* Range is backwards, use shortest match first */
				4176	if (brace_count[no] <= brace_min[no])
				4177	{
				4178	reg_save(&save);
				4179	if (regmatch(next))
				4180	return TRUE;
				4181	reg_restore(&save);
				4182	next = OPERAND(scan);
				4183	/* must try to match one more item */
				4184	}
				4185	}
				4186	}
				4187	break;
				4188
				4189	case BRACE_SIMPLE:
				4190	case STAR:
				4191	case PLUS:
				4192	{
				4193	int nextb; /* next byte */
				4194	int nextb_ic; /* next byte reverse case */
				4195	long count;
				4196	regsave_T save;
				4197	long minval;
				4198	long maxval;
				4199
				4200	/*
				4201	* Lookahead to avoid useless match attempts when we know
				4202	* what character comes next.
				4203	*/
				4204	if (OP(next) == EXACTLY)
				4205	{
				4206	nextb = *OPERAND(next);
				4207	if (ireg_ic)
				4208	{
				4209	if (isupper(nextb))
				4210	nextb_ic = TOLOWER_LOC(nextb);
				4211	else
				4212	nextb_ic = TOUPPER_LOC(nextb);
				4213	}
				4214	else
				4215	nextb_ic = nextb;
				4216	}
				4217	else
				4218	{
				4219	nextb = NUL;
				4220	nextb_ic = NUL;
				4221	}
				4222	if (op != BRACE_SIMPLE)
				4223	{
				4224	minval = (op == STAR) ? 0 : 1;
				4225	maxval = MAX_LIMIT;
				4226	}
				4227	else
				4228	{
				4229	minval = bl_minval;
				4230	maxval = bl_maxval;
				4231	}
				4232
				4233	/*
				4234	* When maxval > minval, try matching as much as possible, up
				4235	* to maxval. When maxval < minval, try matching at least the
				4236	* minimal number (since the range is backwards, that's also
				4237	* maxval!).
				4238	*/
				4239	count = regrepeat(OPERAND(scan), maxval);
				4240	if (got_int)
				4241	return FALSE;
				4242	if (minval <= maxval)
				4243	{
				4244	/* Range is the normal way around, use longest match */
				4245	while (count >= minval)
				4246	{
				4247	/* If it could match, try it. */
				4248	if (nextb == NUL \|\| *reginput == nextb
				4249	\|\| *reginput == nextb_ic)
				4250	{
				4251	reg_save(&save);
				4252	if (regmatch(next))
				4253	return TRUE;
				4254	reg_restore(&save);
				4255	}
				4256	/* Couldn't or didn't match -- back up one char. */
				4257	if (--count < minval)
				4258	break;
				4259	if (reginput == regline)
				4260	{
				4261	/* backup to last char of previous line */
				4262	--reglnum;
				4263	regline = reg_getline(reglnum);
				4264	/* Just in case regrepeat() didn't count right. */
				4265	if (regline == NULL)
				4266	return FALSE;
				4267	reginput = regline + STRLEN(regline);
				4268	fast_breakcheck();
				4269	if (got_int \|\| out_of_stack)
				4270	return FALSE;
				4271	}
				4272	else
				4273	{
				4274	--reginput;
				4275	#ifdef FEAT_MBYTE
				4276	if (has_mbyte)
				4277	reginput -= (*mb_head_off)(regline, reginput);
				4278	#endif
				4279	}
				4280	}
				4281	}
				4282	else
				4283	{
				4284	/* Range is backwards, use shortest match first.
				4285	* Careful: maxval and minval are exchanged! */
				4286	if (count < maxval)
				4287	return FALSE;
				4288	for (;;)
				4289	{
				4290	/* If it could work, try it. */
				4291	if (nextb == NUL \|\| *reginput == nextb
				4292	\|\| *reginput == nextb_ic)
				4293	{
				4294	reg_save(&save);
				4295	if (regmatch(next))
				4296	return TRUE;
				4297	reg_restore(&save);
				4298	}
				4299	/* Couldn't or didn't match: try advancing one char. */
				4300	if (count == minval
				4301	\|\| regrepeat(OPERAND(scan), 1L) == 0)
				4302	break;
				4303	++count;
				4304	if (got_int \|\| out_of_stack)
				4305	return FALSE;
				4306	}
				4307	}
				4308	return FALSE;
				4309	}
				4310	/* break; Not Reached */
				4311
				4312	case NOMATCH:
				4313	{
				4314	regsave_T save;
				4315
				4316	/* If the operand matches, we fail. Otherwise backup and
				4317	* continue with the next item. */
				4318	reg_save(&save);
				4319	if (regmatch(OPERAND(scan)))
				4320	return FALSE;
				4321	reg_restore(&save);
				4322	}
				4323	break;
				4324
				4325	case MATCH:
				4326	case SUBPAT:
				4327	{
				4328	regsave_T save;
				4329
				4330	/* If the operand doesn't match, we fail. Otherwise backup
				4331	* and continue with the next item. */
				4332	reg_save(&save);
				4333	if (!regmatch(OPERAND(scan)))
				4334	return FALSE;
				4335	if (op == MATCH) /* zero-width */
				4336	reg_restore(&save);
				4337	}
				4338	break;
				4339
				4340	case BEHIND:
				4341	case NOBEHIND:
				4342	{
				4343	regsave_T save_after, save_start;
				4344	regsave_T save_behind_pos;
				4345	int needmatch = (op == BEHIND);
				4346
				4347	/*
				4348	* Look back in the input of the operand matches or not. This
				4349	* must be done at every position in the input and checking if
				4350	* the match ends at the current position.
				4351	* First check if the next item matches, that's probably
				4352	* faster.
				4353	*/
				4354	reg_save(&save_start);
				4355	if (regmatch(next))
				4356	{
				4357	/* save the position after the found match for next */
				4358	reg_save(&save_after);
				4359
				4360	/* start looking for a match with operand at the current
				4361	* postion. Go back one character until we find the
				4362	* result, hitting the start of the line or the previous
				4363	* line (for multi-line matching).
				4364	* Set behind_pos to where the match should end, BHPOS
				4365	* will match it. */
				4366	save_behind_pos = behind_pos;
				4367	behind_pos = save_start;
				4368	for (;;)
				4369	{
				4370	reg_restore(&save_start);
				4371	if (regmatch(OPERAND(scan))
				4372	&& reg_save_equal(&behind_pos))
				4373	{
				4374	behind_pos = save_behind_pos;
				4375	/* found a match that ends where "next" started */
				4376	if (needmatch)
				4377	{
				4378	reg_restore(&save_after);
				4379	return TRUE;
				4380	}
				4381	return FALSE;
				4382	}
				4383	/*
				4384	* No match: Go back one character. May go to
				4385	* previous line once.
				4386	*/
				4387	if (REG_MULTI)
				4388	{
				4389	if (save_start.rs_u.pos.col == 0)
				4390	{
				4391	if (save_start.rs_u.pos.lnum
				4392	< behind_pos.rs_u.pos.lnum
				4393	\|\| reg_getline(
				4394	--save_start.rs_u.pos.lnum) == NULL)
				4395	break;
				4396	reg_restore(&save_start);
				4397	save_start.rs_u.pos.col =
				4398	(colnr_T)STRLEN(regline);
				4399	}
				4400	else
				4401	--save_start.rs_u.pos.col;
				4402	}
				4403	else
				4404	{
				4405	if (save_start.rs_u.ptr == regline)
				4406	break;
				4407	--save_start.rs_u.ptr;
				4408	}
				4409	}
				4410
				4411	/* NOBEHIND succeeds when no match was found */
				4412	behind_pos = save_behind_pos;
				4413	if (!needmatch)
				4414	{
				4415	reg_restore(&save_after);
				4416	return TRUE;
				4417	}
				4418	}
				4419	return FALSE;
				4420	}
				4421
				4422	case BHPOS:
				4423	if (REG_MULTI)
				4424	{
				4425	if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline)
				4426	\|\| behind_pos.rs_u.pos.lnum != reglnum)
				4427	return FALSE;
				4428	}
				4429	else if (behind_pos.rs_u.ptr != reginput)
				4430	return FALSE;
				4431	break;
				4432
				4433	case NEWL:
				4434	if ((c != NUL \|\| reglnum == reg_maxline)
				4435	&& (c != '\n' \|\| !reg_line_lbr))
				4436	return FALSE;
				4437	if (reg_line_lbr)
				4438	ADVANCE_REGINPUT();
				4439	else
				4440	reg_nextline();
				4441	break;
				4442
				4443	case END:
				4444	return TRUE; /* Success! */
				4445
				4446	default:
				4447	EMSG(_(e_re_corr));
				4448	#ifdef DEBUG
				4449	printf("Illegal op code %d\n", op);
				4450	#endif
				4451	return FALSE;
				4452	}
				4453	}
				4454
				4455	scan = next;
				4456	}
				4457
				4458	/*
				4459	* We get here only if there's trouble -- normally "case END" is the
				4460	* terminating point.
				4461	*/
				4462	EMSG(_(e_re_corr));
				4463	#ifdef DEBUG
				4464	printf("Premature EOL\n");
				4465	#endif
				4466	return FALSE;
				4467	}
				4468
				4469	#ifdef FEAT_MBYTE
				4470	# define ADVANCE_P(x) if (has_mbyte) x += (*mb_ptr2len_check)(x); else ++x
				4471	#else
				4472	# define ADVANCE_P(x) ++x
				4473	#endif
				4474
				4475	/*
				4476	* regrepeat - repeatedly match something simple, return how many.
				4477	* Advances reginput (and reglnum) to just after the matched chars.
				4478	*/
				4479	static int
				4480	regrepeat(p, maxcount)
				4481	char_u *p;
				4482	long maxcount; /* maximum number of matches allowed */
				4483	{
				4484	long count = 0;
				4485	char_u *scan;
				4486	char_u *opnd;
				4487	int mask;
				4488	int testval = 0;
				4489
				4490	scan = reginput; /* Make local copy of reginput for speed. */
				4491	opnd = OPERAND(p);
				4492	switch (OP(p))
				4493	{
				4494	case ANY:
				4495	case ANY + ADD_NL:
				4496	while (count < maxcount)
				4497	{
				4498	/* Matching anything means we continue until end-of-line (or
				4499	* end-of-file for ANY + ADD_NL), only limited by maxcount. */
				4500	while (*scan != NUL && count < maxcount)
				4501	{
				4502	++count;
				4503	ADVANCE_P(scan);
				4504	}
				4505	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline \|\| count == maxcount)
				4506	break;
				4507	++count; /* count the line-break */
				4508	reg_nextline();
				4509	scan = reginput;
				4510	if (got_int)
				4511	break;
				4512	}
				4513	break;
				4514
				4515	case IDENT:
				4516	case IDENT + ADD_NL:
				4517	testval = TRUE;
				4518	/FALLTHROUGH/
				4519	case SIDENT:
				4520	case SIDENT + ADD_NL:
				4521	while (count < maxcount)
				4522	{
				4523	if (vim_isIDc(scan) && (testval \|\| !VIM_ISDIGIT(scan)))
				4524	{
				4525	ADVANCE_P(scan);
				4526	}
				4527	else if (*scan == NUL)
				4528	{
				4529	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4530	break;
				4531	reg_nextline();
				4532	scan = reginput;
				4533	if (got_int)
				4534	break;
				4535	}
				4536	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4537	++scan;
				4538	else
				4539	break;
				4540	++count;
				4541	}
				4542	break;
				4543
				4544	case KWORD:
				4545	case KWORD + ADD_NL:
				4546	testval = TRUE;
				4547	/FALLTHROUGH/
				4548	case SKWORD:
				4549	case SKWORD + ADD_NL:
				4550	while (count < maxcount)
				4551	{
				4552	if (vim_iswordp(scan) && (testval \|\| !VIM_ISDIGIT(*scan)))
				4553	{
				4554	ADVANCE_P(scan);
				4555	}
				4556	else if (*scan == NUL)
				4557	{
				4558	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4559	break;
				4560	reg_nextline();
				4561	scan = reginput;
				4562	if (got_int)
				4563	break;
				4564	}
				4565	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4566	++scan;
				4567	else
				4568	break;
				4569	++count;
				4570	}
				4571	break;
				4572
				4573	case FNAME:
				4574	case FNAME + ADD_NL:
				4575	testval = TRUE;
				4576	/FALLTHROUGH/
				4577	case SFNAME:
				4578	case SFNAME + ADD_NL:
				4579	while (count < maxcount)
				4580	{
				4581	if (vim_isfilec(scan) && (testval \|\| !VIM_ISDIGIT(scan)))
				4582	{
				4583	ADVANCE_P(scan);
				4584	}
				4585	else if (*scan == NUL)
				4586	{
				4587	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4588	break;
				4589	reg_nextline();
				4590	scan = reginput;
				4591	if (got_int)
				4592	break;
				4593	}
				4594	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4595	++scan;
				4596	else
				4597	break;
				4598	++count;
				4599	}
				4600	break;
				4601
				4602	case PRINT:
				4603	case PRINT + ADD_NL:
				4604	testval = TRUE;
				4605	/FALLTHROUGH/
				4606	case SPRINT:
				4607	case SPRINT + ADD_NL:
				4608	while (count < maxcount)
				4609	{
				4610	if (*scan == NUL)
				4611	{
				4612	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4613	break;
				4614	reg_nextline();
				4615	scan = reginput;
				4616	if (got_int)
				4617	break;
				4618	}
				4619	else if (ptr2cells(scan) == 1 && (testval \|\| !VIM_ISDIGIT(*scan)))
				4620	{
				4621	ADVANCE_P(scan);
				4622	}
				4623	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4624	++scan;
				4625	else
				4626	break;
				4627	++count;
				4628	}
				4629	break;
				4630
				4631	case WHITE:
				4632	case WHITE + ADD_NL:
				4633	testval = mask = RI_WHITE;
				4634	do_class:
				4635	while (count < maxcount)
				4636	{
				4637	#ifdef FEAT_MBYTE
				4638	int l;
				4639	#endif
				4640	if (*scan == NUL)
				4641	{
				4642	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4643	break;
				4644	reg_nextline();
				4645	scan = reginput;
				4646	if (got_int)
				4647	break;
				4648	}
				4649	#ifdef FEAT_MBYTE
				4650	else if (has_mbyte && (l = (*mb_ptr2len_check)(scan)) > 1)
				4651	{
				4652	if (testval != 0)
				4653	break;
				4654	scan += l;
				4655	}
				4656	#endif
				4657	else if ((class_tab[*scan] & mask) == testval)
				4658	++scan;
				4659	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4660	++scan;
				4661	else
				4662	break;
				4663	++count;
				4664	}
				4665	break;
				4666
				4667	case NWHITE:
				4668	case NWHITE + ADD_NL:
				4669	mask = RI_WHITE;
				4670	goto do_class;
				4671	case DIGIT:
				4672	case DIGIT + ADD_NL:
				4673	testval = mask = RI_DIGIT;
				4674	goto do_class;
				4675	case NDIGIT:
				4676	case NDIGIT + ADD_NL:
				4677	mask = RI_DIGIT;
				4678	goto do_class;
				4679	case HEX:
				4680	case HEX + ADD_NL:
				4681	testval = mask = RI_HEX;
				4682	goto do_class;
				4683	case NHEX:
				4684	case NHEX + ADD_NL:
				4685	mask = RI_HEX;
				4686	goto do_class;
				4687	case OCTAL:
				4688	case OCTAL + ADD_NL:
				4689	testval = mask = RI_OCTAL;
				4690	goto do_class;
				4691	case NOCTAL:
				4692	case NOCTAL + ADD_NL:
				4693	mask = RI_OCTAL;
				4694	goto do_class;
				4695	case WORD:
				4696	case WORD + ADD_NL:
				4697	testval = mask = RI_WORD;
				4698	goto do_class;
				4699	case NWORD:
				4700	case NWORD + ADD_NL:
				4701	mask = RI_WORD;
				4702	goto do_class;
				4703	case HEAD:
				4704	case HEAD + ADD_NL:
				4705	testval = mask = RI_HEAD;
				4706	goto do_class;
				4707	case NHEAD:
				4708	case NHEAD + ADD_NL:
				4709	mask = RI_HEAD;
				4710	goto do_class;
				4711	case ALPHA:
				4712	case ALPHA + ADD_NL:
				4713	testval = mask = RI_ALPHA;
				4714	goto do_class;
				4715	case NALPHA:
				4716	case NALPHA + ADD_NL:
				4717	mask = RI_ALPHA;
				4718	goto do_class;
				4719	case LOWER:
				4720	case LOWER + ADD_NL:
				4721	testval = mask = RI_LOWER;
				4722	goto do_class;
				4723	case NLOWER:
				4724	case NLOWER + ADD_NL:
				4725	mask = RI_LOWER;
				4726	goto do_class;
				4727	case UPPER:
				4728	case UPPER + ADD_NL:
				4729	testval = mask = RI_UPPER;
				4730	goto do_class;
				4731	case NUPPER:
				4732	case NUPPER + ADD_NL:
				4733	mask = RI_UPPER;
				4734	goto do_class;
				4735
				4736	case EXACTLY:
				4737	{
				4738	int cu, cl;
				4739
				4740	/* This doesn't do a multi-byte character, because a MULTIBYTECODE
				4741	* would have been used for it. */
				4742	if (ireg_ic)
				4743	{
				4744	cu = TOUPPER_LOC(*opnd);
				4745	cl = TOLOWER_LOC(*opnd);
				4746	while (count < maxcount && (scan == cu \|\| scan == cl))
				4747	{
				4748	count++;
				4749	scan++;
				4750	}
				4751	}
				4752	else
				4753	{
				4754	cu = *opnd;
				4755	while (count < maxcount && *scan == cu)
				4756	{
				4757	count++;
				4758	scan++;
				4759	}
				4760	}
				4761	break;
				4762	}
				4763
				4764	#ifdef FEAT_MBYTE
				4765	case MULTIBYTECODE:
				4766	{
				4767	int i, len, cf = 0;
				4768
				4769	/* Safety check (just in case 'encoding' was changed since
				4770	* compiling the program). */
				4771	if ((len = (*mb_ptr2len_check)(opnd)) > 1)
				4772	{
				4773	if (ireg_ic && enc_utf8)
				4774	cf = utf_fold(utf_ptr2char(opnd));
				4775	while (count < maxcount)
				4776	{
				4777	for (i = 0; i < len; ++i)
				4778	if (opnd[i] != scan[i])
				4779	break;
				4780	if (i < len && (!ireg_ic \|\| !enc_utf8
				4781	\|\| utf_fold(utf_ptr2char(scan)) != cf))
				4782	break;
				4783	scan += len;
				4784	++count;
				4785	}
				4786	}
				4787	}
				4788	break;
				4789	#endif
				4790
				4791	case ANYOF:
				4792	case ANYOF + ADD_NL:
				4793	testval = TRUE;
				4794	/FALLTHROUGH/
				4795
				4796	case ANYBUT:
				4797	case ANYBUT + ADD_NL:
				4798	while (count < maxcount)
				4799	{
				4800	#ifdef FEAT_MBYTE
				4801	int len;
				4802	#endif
				4803	if (*scan == NUL)
				4804	{
				4805	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4806	break;
				4807	reg_nextline();
				4808	scan = reginput;
				4809	if (got_int)
				4810	break;
				4811	}
				4812	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4813	++scan;
				4814	#ifdef FEAT_MBYTE
				4815	else if (has_mbyte && (len = (*mb_ptr2len_check)(scan)) > 1)
				4816	{
				4817	if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
				4818	break;
				4819	scan += len;
				4820	}
				4821	#endif
				4822	else
				4823	{
				4824	if ((cstrchr(opnd, *scan) == NULL) == testval)
				4825	break;
				4826	++scan;
				4827	}
				4828	++count;
				4829	}
				4830	break;
				4831
				4832	case NEWL:
				4833	while (count < maxcount
				4834	&& ((*scan == NUL && reglnum < reg_maxline)
				4835	\|\| (*scan == '\n' && reg_line_lbr)))
				4836	{
				4837	count++;
				4838	if (reg_line_lbr)
				4839	ADVANCE_REGINPUT();
				4840	else
				4841	reg_nextline();
				4842	scan = reginput;
				4843	if (got_int)
				4844	break;
				4845	}
				4846	break;
				4847
				4848	default: /* Oh dear. Called inappropriately. */
				4849	EMSG(_(e_re_corr));
				4850	#ifdef DEBUG
				4851	printf("Called regrepeat with op code %d\n", OP(p));
				4852	#endif
				4853	break;
				4854	}
				4855
				4856	reginput = scan;
				4857
				4858	return (int)count;
				4859	}
				4860
				4861	/*
				4862	* regnext - dig the "next" pointer out of a node
				4863	*/
				4864	static char_u *
				4865	regnext(p)
				4866	char_u *p;
				4867	{
				4868	int offset;
				4869
				4870	if (p == JUST_CALC_SIZE)
				4871	return NULL;
				4872
				4873	offset = NEXT(p);
				4874	if (offset == 0)
				4875	return NULL;
				4876
				4877	if (OP(p) == BACK)
				4878	return p - offset;
				4879	else
				4880	return p + offset;
				4881	}
				4882
				4883	/*
				4884	* Check the regexp program for its magic number.
				4885	* Return TRUE if it's wrong.
				4886	*/
				4887	static int
				4888	prog_magic_wrong()
				4889	{
				4890	if (UCHARAT(REG_MULTI
				4891	? reg_mmatch->regprog->program
				4892	: reg_match->regprog->program) != REGMAGIC)
				4893	{
				4894	EMSG(_(e_re_corr));
				4895	return TRUE;
				4896	}
				4897	return FALSE;
				4898	}
				4899
				4900	/*
				4901	* Cleanup the subexpressions, if this wasn't done yet.
				4902	* This construction is used to clear the subexpressions only when they are
				4903	* used (to increase speed).
				4904	*/
				4905	static void
				4906	cleanup_subexpr()
				4907	{
				4908	if (need_clear_subexpr)
				4909	{
				4910	if (REG_MULTI)
				4911	{
				4912	/* Use 0xff to set lnum to -1 */
				4913	vim_memset(reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				4914	vim_memset(reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				4915	}
				4916	else
				4917	{
				4918	vim_memset(reg_startp, 0, sizeof(char_u ) NSUBEXP);
				4919	vim_memset(reg_endp, 0, sizeof(char_u ) NSUBEXP);
				4920	}
				4921	need_clear_subexpr = FALSE;
				4922	}
				4923	}
				4924
				4925	#ifdef FEAT_SYN_HL
				4926	static void
				4927	cleanup_zsubexpr()
				4928	{
				4929	if (need_clear_zsubexpr)
				4930	{
				4931	if (REG_MULTI)
				4932	{
				4933	/* Use 0xff to set lnum to -1 */
				4934	vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				4935	vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				4936	}
				4937	else
				4938	{
				4939	vim_memset(reg_startzp, 0, sizeof(char_u ) NSUBEXP);
				4940	vim_memset(reg_endzp, 0, sizeof(char_u ) NSUBEXP);
				4941	}
				4942	need_clear_zsubexpr = FALSE;
				4943	}
				4944	}
				4945	#endif
				4946
				4947	/*
				4948	* Advance reglnum, regline and reginput to the next line.
				4949	*/
				4950	static void
				4951	reg_nextline()
				4952	{
				4953	regline = reg_getline(++reglnum);
				4954	reginput = regline;
				4955	fast_breakcheck();
				4956	}
				4957
				4958	/*
				4959	* Save the input line and position in a regsave_T.
				4960	*/
				4961	static void
				4962	reg_save(save)
				4963	regsave_T *save;
				4964	{
				4965	if (REG_MULTI)
				4966	{
				4967	save->rs_u.pos.col = (colnr_T)(reginput - regline);
				4968	save->rs_u.pos.lnum = reglnum;
				4969	}
				4970	else
				4971	save->rs_u.ptr = reginput;
				4972	}
				4973
				4974	/*
				4975	* Restore the input line and position from a regsave_T.
				4976	*/
				4977	static void
				4978	reg_restore(save)
				4979	regsave_T *save;
				4980	{
				4981	if (REG_MULTI)
				4982	{
				4983	if (reglnum != save->rs_u.pos.lnum)
				4984	{
				4985	/* only call reg_getline() when the line number changed to save
				4986	* a bit of time */
				4987	reglnum = save->rs_u.pos.lnum;
				4988	regline = reg_getline(reglnum);
				4989	}
				4990	reginput = regline + save->rs_u.pos.col;
				4991	}
				4992	else
				4993	reginput = save->rs_u.ptr;
				4994	}
				4995
				4996	/*
				4997	* Return TRUE if current position is equal to saved position.
				4998	*/
				4999	static int
				5000	reg_save_equal(save)
				5001	regsave_T *save;
				5002	{
				5003	if (REG_MULTI)
				5004	return reglnum == save->rs_u.pos.lnum
				5005	&& reginput == regline + save->rs_u.pos.col;
				5006	return reginput == save->rs_u.ptr;
				5007	}
				5008
				5009	/*
				5010	* Tentatively set the sub-expression start to the current position (after
				5011	* calling regmatch() they will have changed). Need to save the existing
				5012	* values for when there is no match.
				5013	* Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
				5014	* depending on REG_MULTI.
				5015	*/
				5016	static void
				5017	save_se_multi(savep, posp)
				5018	save_se_T *savep;
				5019	lpos_T *posp;
				5020	{
				5021	savep->se_u.pos = *posp;
				5022	posp->lnum = reglnum;
				5023	posp->col = (colnr_T)(reginput - regline);
				5024	}
				5025
				5026	static void
				5027	save_se_one(savep, pp)
				5028	save_se_T *savep;
				5029	char_u **pp;
				5030	{
				5031	savep->se_u.ptr = *pp;
				5032	*pp = reginput;
				5033	}
				5034
				5035	/*
				5036	* Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
				5037	*/
				5038	static int
				5039	re_num_cmp(val, scan)
				5040	long_u val;
				5041	char_u *scan;
				5042	{
				5043	long_u n = OPERAND_MIN(scan);
				5044
				5045	if (OPERAND_CMP(scan) == '>')
				5046	return val > n;
				5047	if (OPERAND_CMP(scan) == '<')
				5048	return val < n;
				5049	return val == n;
				5050	}
				5051
				5052
				5053	#ifdef DEBUG
				5054
				5055	/*
				5056	* regdump - dump a regexp onto stdout in vaguely comprehensible form
				5057	*/
				5058	static void
				5059	regdump(pattern, r)
				5060	char_u *pattern;
				5061	regprog_T *r;
				5062	{
				5063	char_u *s;
				5064	int op = EXACTLY; /* Arbitrary non-END op. */
				5065	char_u *next;
				5066	char_u *end = NULL;
				5067
				5068	printf("\r\nregcomp(%s):\r\n", pattern);
				5069
				5070	s = r->program + 1;
				5071	/*
				5072	* Loop until we find the END that isn't before a referred next (an END
				5073	* can also appear in a NOMATCH operand).
				5074	*/
				5075	while (op != END \|\| s <= end)
				5076	{
				5077	op = OP(s);
				5078	printf("%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
				5079	next = regnext(s);
				5080	if (next == NULL) /* Next ptr. */
				5081	printf("(0)");
				5082	else
				5083	printf("(%d)", (int)((s - r->program) + (next - s)));
				5084	if (end < next)
				5085	end = next;
				5086	if (op == BRACE_LIMITS)
				5087	{
				5088	/* Two short ints */
				5089	printf(" minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
				5090	s += 8;
				5091	}
				5092	s += 3;
				5093	if (op == ANYOF \|\| op == ANYOF + ADD_NL
				5094	\|\| op == ANYBUT \|\| op == ANYBUT + ADD_NL
				5095	\|\| op == EXACTLY)
				5096	{
				5097	/* Literal string, where present. */
				5098	while (*s != NUL)
				5099	printf("%c", *s++);
				5100	s++;
				5101	}
				5102	printf("\r\n");
				5103	}
				5104
				5105	/* Header fields of interest. */
				5106	if (r->regstart != NUL)
				5107	printf("start `%s' 0x%x; ", r->regstart < 256
				5108	? (char *)transchar(r->regstart)
				5109	: "multibyte", r->regstart);
				5110	if (r->reganch)
				5111	printf("anchored; ");
				5112	if (r->regmust != NULL)
				5113	printf("must have \"%s\"", r->regmust);
				5114	printf("\r\n");
				5115	}
				5116
				5117	/*
				5118	* regprop - printable representation of opcode
				5119	*/
				5120	static char_u *
				5121	regprop(op)
				5122	char_u *op;
				5123	{
				5124	char_u *p;
				5125	static char_u buf[50];
				5126
				5127	(void) strcpy(buf, ":");
				5128
				5129	switch (OP(op))
				5130	{
				5131	case BOL:
				5132	p = "BOL";
				5133	break;
				5134	case EOL:
				5135	p = "EOL";
				5136	break;
				5137	case RE_BOF:
				5138	p = "BOF";
				5139	break;
				5140	case RE_EOF:
				5141	p = "EOF";
				5142	break;
				5143	case CURSOR:
				5144	p = "CURSOR";
				5145	break;
				5146	case RE_LNUM:
				5147	p = "RE_LNUM";
				5148	break;
				5149	case RE_COL:
				5150	p = "RE_COL";
				5151	break;
				5152	case RE_VCOL:
				5153	p = "RE_VCOL";
				5154	break;
				5155	case BOW:
				5156	p = "BOW";
				5157	break;
				5158	case EOW:
				5159	p = "EOW";
				5160	break;
				5161	case ANY:
				5162	p = "ANY";
				5163	break;
				5164	case ANY + ADD_NL:
				5165	p = "ANY+NL";
				5166	break;
				5167	case ANYOF:
				5168	p = "ANYOF";
				5169	break;
				5170	case ANYOF + ADD_NL:
				5171	p = "ANYOF+NL";
				5172	break;
				5173	case ANYBUT:
				5174	p = "ANYBUT";
				5175	break;
				5176	case ANYBUT + ADD_NL:
				5177	p = "ANYBUT+NL";
				5178	break;
				5179	case IDENT:
				5180	p = "IDENT";
				5181	break;
				5182	case IDENT + ADD_NL:
				5183	p = "IDENT+NL";
				5184	break;
				5185	case SIDENT:
				5186	p = "SIDENT";
				5187	break;
				5188	case SIDENT + ADD_NL:
				5189	p = "SIDENT+NL";
				5190	break;
				5191	case KWORD:
				5192	p = "KWORD";
				5193	break;
				5194	case KWORD + ADD_NL:
				5195	p = "KWORD+NL";
				5196	break;
				5197	case SKWORD:
				5198	p = "SKWORD";
				5199	break;
				5200	case SKWORD + ADD_NL:
				5201	p = "SKWORD+NL";
				5202	break;
				5203	case FNAME:
				5204	p = "FNAME";
				5205	break;
				5206	case FNAME + ADD_NL:
				5207	p = "FNAME+NL";
				5208	break;
				5209	case SFNAME:
				5210	p = "SFNAME";
				5211	break;
				5212	case SFNAME + ADD_NL:
				5213	p = "SFNAME+NL";
				5214	break;
				5215	case PRINT:
				5216	p = "PRINT";
				5217	break;
				5218	case PRINT + ADD_NL:
				5219	p = "PRINT+NL";
				5220	break;
				5221	case SPRINT:
				5222	p = "SPRINT";
				5223	break;
				5224	case SPRINT + ADD_NL:
				5225	p = "SPRINT+NL";
				5226	break;
				5227	case WHITE:
				5228	p = "WHITE";
				5229	break;
				5230	case WHITE + ADD_NL:
				5231	p = "WHITE+NL";
				5232	break;
				5233	case NWHITE:
				5234	p = "NWHITE";
				5235	break;
				5236	case NWHITE + ADD_NL:
				5237	p = "NWHITE+NL";
				5238	break;
				5239	case DIGIT:
				5240	p = "DIGIT";
				5241	break;
				5242	case DIGIT + ADD_NL:
				5243	p = "DIGIT+NL";
				5244	break;
				5245	case NDIGIT:
				5246	p = "NDIGIT";
				5247	break;
				5248	case NDIGIT + ADD_NL:
				5249	p = "NDIGIT+NL";
				5250	break;
				5251	case HEX:
				5252	p = "HEX";
				5253	break;
				5254	case HEX + ADD_NL:
				5255	p = "HEX+NL";
				5256	break;
				5257	case NHEX:
				5258	p = "NHEX";
				5259	break;
				5260	case NHEX + ADD_NL:
				5261	p = "NHEX+NL";
				5262	break;
				5263	case OCTAL:
				5264	p = "OCTAL";
				5265	break;
				5266	case OCTAL + ADD_NL:
				5267	p = "OCTAL+NL";
				5268	break;
				5269	case NOCTAL:
				5270	p = "NOCTAL";
				5271	break;
				5272	case NOCTAL + ADD_NL:
				5273	p = "NOCTAL+NL";
				5274	break;
				5275	case WORD:
				5276	p = "WORD";
				5277	break;
				5278	case WORD + ADD_NL:
				5279	p = "WORD+NL";
				5280	break;
				5281	case NWORD:
				5282	p = "NWORD";
				5283	break;
				5284	case NWORD + ADD_NL:
				5285	p = "NWORD+NL";
				5286	break;
				5287	case HEAD:
				5288	p = "HEAD";
				5289	break;
				5290	case HEAD + ADD_NL:
				5291	p = "HEAD+NL";
				5292	break;
				5293	case NHEAD:
				5294	p = "NHEAD";
				5295	break;
				5296	case NHEAD + ADD_NL:
				5297	p = "NHEAD+NL";
				5298	break;
				5299	case ALPHA:
				5300	p = "ALPHA";
				5301	break;
				5302	case ALPHA + ADD_NL:
				5303	p = "ALPHA+NL";
				5304	break;
				5305	case NALPHA:
				5306	p = "NALPHA";
				5307	break;
				5308	case NALPHA + ADD_NL:
				5309	p = "NALPHA+NL";
				5310	break;
				5311	case LOWER:
				5312	p = "LOWER";
				5313	break;
				5314	case LOWER + ADD_NL:
				5315	p = "LOWER+NL";
				5316	break;
				5317	case NLOWER:
				5318	p = "NLOWER";
				5319	break;
				5320	case NLOWER + ADD_NL:
				5321	p = "NLOWER+NL";
				5322	break;
				5323	case UPPER:
				5324	p = "UPPER";
				5325	break;
				5326	case UPPER + ADD_NL:
				5327	p = "UPPER+NL";
				5328	break;
				5329	case NUPPER:
				5330	p = "NUPPER";
				5331	break;
				5332	case NUPPER + ADD_NL:
				5333	p = "NUPPER+NL";
				5334	break;
				5335	case BRANCH:
				5336	p = "BRANCH";
				5337	break;
				5338	case EXACTLY:
				5339	p = "EXACTLY";
				5340	break;
				5341	case NOTHING:
				5342	p = "NOTHING";
				5343	break;
				5344	case BACK:
				5345	p = "BACK";
				5346	break;
				5347	case END:
				5348	p = "END";
				5349	break;
				5350	case MOPEN + 0:
				5351	p = "MATCH START";
				5352	break;
				5353	case MOPEN + 1:
				5354	case MOPEN + 2:
				5355	case MOPEN + 3:
				5356	case MOPEN + 4:
				5357	case MOPEN + 5:
				5358	case MOPEN + 6:
				5359	case MOPEN + 7:
				5360	case MOPEN + 8:
				5361	case MOPEN + 9:
				5362	sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
				5363	p = NULL;
				5364	break;
				5365	case MCLOSE + 0:
				5366	p = "MATCH END";
				5367	break;
				5368	case MCLOSE + 1:
				5369	case MCLOSE + 2:
				5370	case MCLOSE + 3:
				5371	case MCLOSE + 4:
				5372	case MCLOSE + 5:
				5373	case MCLOSE + 6:
				5374	case MCLOSE + 7:
				5375	case MCLOSE + 8:
				5376	case MCLOSE + 9:
				5377	sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
				5378	p = NULL;
				5379	break;
				5380	case BACKREF + 1:
				5381	case BACKREF + 2:
				5382	case BACKREF + 3:
				5383	case BACKREF + 4:
				5384	case BACKREF + 5:
				5385	case BACKREF + 6:
				5386	case BACKREF + 7:
				5387	case BACKREF + 8:
				5388	case BACKREF + 9:
				5389	sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
				5390	p = NULL;
				5391	break;
				5392	case NOPEN:
				5393	p = "NOPEN";
				5394	break;
				5395	case NCLOSE:
				5396	p = "NCLOSE";
				5397	break;
				5398	#ifdef FEAT_SYN_HL
				5399	case ZOPEN + 1:
				5400	case ZOPEN + 2:
				5401	case ZOPEN + 3:
				5402	case ZOPEN + 4:
				5403	case ZOPEN + 5:
				5404	case ZOPEN + 6:
				5405	case ZOPEN + 7:
				5406	case ZOPEN + 8:
				5407	case ZOPEN + 9:
				5408	sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
				5409	p = NULL;
				5410	break;
				5411	case ZCLOSE + 1:
				5412	case ZCLOSE + 2:
				5413	case ZCLOSE + 3:
				5414	case ZCLOSE + 4:
				5415	case ZCLOSE + 5:
				5416	case ZCLOSE + 6:
				5417	case ZCLOSE + 7:
				5418	case ZCLOSE + 8:
				5419	case ZCLOSE + 9:
				5420	sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
				5421	p = NULL;
				5422	break;
				5423	case ZREF + 1:
				5424	case ZREF + 2:
				5425	case ZREF + 3:
				5426	case ZREF + 4:
				5427	case ZREF + 5:
				5428	case ZREF + 6:
				5429	case ZREF + 7:
				5430	case ZREF + 8:
				5431	case ZREF + 9:
				5432	sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
				5433	p = NULL;
				5434	break;
				5435	#endif
				5436	case STAR:
				5437	p = "STAR";
				5438	break;
				5439	case PLUS:
				5440	p = "PLUS";
				5441	break;
				5442	case NOMATCH:
				5443	p = "NOMATCH";
				5444	break;
				5445	case MATCH:
				5446	p = "MATCH";
				5447	break;
				5448	case BEHIND:
				5449	p = "BEHIND";
				5450	break;
				5451	case NOBEHIND:
				5452	p = "NOBEHIND";
				5453	break;
				5454	case SUBPAT:
				5455	p = "SUBPAT";
				5456	break;
				5457	case BRACE_LIMITS:
				5458	p = "BRACE_LIMITS";
				5459	break;
				5460	case BRACE_SIMPLE:
				5461	p = "BRACE_SIMPLE";
				5462	break;
				5463	case BRACE_COMPLEX + 0:
				5464	case BRACE_COMPLEX + 1:
				5465	case BRACE_COMPLEX + 2:
				5466	case BRACE_COMPLEX + 3:
				5467	case BRACE_COMPLEX + 4:
				5468	case BRACE_COMPLEX + 5:
				5469	case BRACE_COMPLEX + 6:
				5470	case BRACE_COMPLEX + 7:
				5471	case BRACE_COMPLEX + 8:
				5472	case BRACE_COMPLEX + 9:
				5473	sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
				5474	p = NULL;
				5475	break;
				5476	#ifdef FEAT_MBYTE
				5477	case MULTIBYTECODE:
				5478	p = "MULTIBYTECODE";
				5479	break;
				5480	#endif
				5481	case NEWL:
				5482	p = "NEWL";
				5483	break;
				5484	default:
				5485	sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
				5486	p = NULL;
				5487	break;
				5488	}
				5489	if (p != NULL)
				5490	(void) strcat(buf, p);
				5491	return buf;
				5492	}
				5493	#endif
				5494
				5495	#ifdef FEAT_MBYTE
				5496	static void mb_decompose __ARGS((int c, int c1, int c2, int *c3));
				5497
				5498	typedef struct
				5499	{
				5500	int a, b, c;
				5501	} decomp_T;
				5502
				5503
				5504	/* 0xfb20 - 0xfb4f */
				5505	decomp_T decomp_table[0xfb4f-0xfb20+1] =
				5506	{
				5507	{0x5e2,0,0}, /* 0xfb20 alt ayin */
				5508	{0x5d0,0,0}, /* 0xfb21 alt alef */
				5509	{0x5d3,0,0}, /* 0xfb22 alt dalet */
				5510	{0x5d4,0,0}, /* 0xfb23 alt he */
				5511	{0x5db,0,0}, /* 0xfb24 alt kaf */
				5512	{0x5dc,0,0}, /* 0xfb25 alt lamed */
				5513	{0x5dd,0,0}, /* 0xfb26 alt mem-sofit */
				5514	{0x5e8,0,0}, /* 0xfb27 alt resh */
				5515	{0x5ea,0,0}, /* 0xfb28 alt tav */
				5516	{'+', 0, 0}, /* 0xfb29 alt plus */
				5517	{0x5e9, 0x5c1, 0}, /* 0xfb2a shin+shin-dot */
				5518	{0x5e9, 0x5c2, 0}, /* 0xfb2b shin+sin-dot */
				5519	{0x5e9, 0x5c1, 0x5bc}, /* 0xfb2c shin+shin-dot+dagesh */
				5520	{0x5e9, 0x5c2, 0x5bc}, /* 0xfb2d shin+sin-dot+dagesh */
				5521	{0x5d0, 0x5b7, 0}, /* 0xfb2e alef+patah */
				5522	{0x5d0, 0x5b8, 0}, /* 0xfb2f alef+qamats */
				5523	{0x5d0, 0x5b4, 0}, /* 0xfb30 alef+hiriq */
				5524	{0x5d1, 0x5bc, 0}, /* 0xfb31 bet+dagesh */
				5525	{0x5d2, 0x5bc, 0}, /* 0xfb32 gimel+dagesh */
				5526	{0x5d3, 0x5bc, 0}, /* 0xfb33 dalet+dagesh */
				5527	{0x5d4, 0x5bc, 0}, /* 0xfb34 he+dagesh */
				5528	{0x5d5, 0x5bc, 0}, /* 0xfb35 vav+dagesh */
				5529	{0x5d6, 0x5bc, 0}, /* 0xfb36 zayin+dagesh */
				5530	{0xfb37, 0, 0}, /* 0xfb37 -- UNUSED */
				5531	{0x5d8, 0x5bc, 0}, /* 0xfb38 tet+dagesh */
				5532	{0x5d9, 0x5bc, 0}, /* 0xfb39 yud+dagesh */
				5533	{0x5da, 0x5bc, 0}, /* 0xfb3a kaf sofit+dagesh */
				5534	{0x5db, 0x5bc, 0}, /* 0xfb3b kaf+dagesh */
				5535	{0x5dc, 0x5bc, 0}, /* 0xfb3c lamed+dagesh */
				5536	{0xfb3d, 0, 0}, /* 0xfb3d -- UNUSED */
				5537	{0x5de, 0x5bc, 0}, /* 0xfb3e mem+dagesh */
				5538	{0xfb3f, 0, 0}, /* 0xfb3f -- UNUSED */
				5539	{0x5e0, 0x5bc, 0}, /* 0xfb40 nun+dagesh */
				5540	{0x5e1, 0x5bc, 0}, /* 0xfb41 samech+dagesh */
				5541	{0xfb42, 0, 0}, /* 0xfb42 -- UNUSED */
				5542	{0x5e3, 0x5bc, 0}, /* 0xfb43 pe sofit+dagesh */
				5543	{0x5e4, 0x5bc,0}, /* 0xfb44 pe+dagesh */
				5544	{0xfb45, 0, 0}, /* 0xfb45 -- UNUSED */
				5545	{0x5e6, 0x5bc, 0}, /* 0xfb46 tsadi+dagesh */
				5546	{0x5e7, 0x5bc, 0}, /* 0xfb47 qof+dagesh */
				5547	{0x5e8, 0x5bc, 0}, /* 0xfb48 resh+dagesh */
				5548	{0x5e9, 0x5bc, 0}, /* 0xfb49 shin+dagesh */
				5549	{0x5ea, 0x5bc, 0}, /* 0xfb4a tav+dagesh */
				5550	{0x5d5, 0x5b9, 0}, /* 0xfb4b vav+holam */
				5551	{0x5d1, 0x5bf, 0}, /* 0xfb4c bet+rafe */
				5552	{0x5db, 0x5bf, 0}, /* 0xfb4d kaf+rafe */
				5553	{0x5e4, 0x5bf, 0}, /* 0xfb4e pe+rafe */
				5554	{0x5d0, 0x5dc, 0} /* 0xfb4f alef-lamed */
				5555	};
				5556
				5557	static void
				5558	mb_decompose(c, c1, c2, c3)
				5559	int c, c1, c2, *c3;
				5560	{
				5561	decomp_T d;
				5562
				5563	if (c >= 0x4b20 && c <= 0xfb4f)
				5564	{
				5565	d = decomp_table[c - 0xfb20];
				5566	*c1 = d.a;
				5567	*c2 = d.b;
				5568	*c3 = d.c;
				5569	}
				5570	else
				5571	{
				5572	*c1 = c;
				5573	c2 = c3 = 0;
				5574	}
				5575	}
				5576	#endif
				5577
				5578	/*
				5579	* Compare two strings, ignore case if ireg_ic set.
				5580	* Return 0 if strings match, non-zero otherwise.
				5581	* Correct the length "*n" when composing characters are ignored.
				5582	*/
				5583	static int
				5584	cstrncmp(s1, s2, n)
				5585	char_u s1, s2;
				5586	int *n;
				5587	{
				5588	int result;
				5589
				5590	if (!ireg_ic)
				5591	result = STRNCMP(s1, s2, *n);
				5592	else
				5593	result = MB_STRNICMP(s1, s2, *n);
				5594
				5595	#ifdef FEAT_MBYTE
				5596	/* if it failed and it's utf8 and we want to combineignore: */
				5597	if (result != 0 && enc_utf8 && ireg_icombine)
				5598	{
				5599	char_u str1, str2;
				5600	int c1, c2, c11, c12;
				5601	int ix;
				5602	int junk;
				5603
				5604	/* we have to handle the strcmp ourselves, since it is necessary to
				5605	* deal with the composing characters by ignoring them: */
				5606	str1 = s1;
				5607	str2 = s2;
				5608	c1 = c2 = 0;
				5609	for (ix = 0; ix < *n; )
				5610	{
				5611	c1 = mb_ptr2char_adv(&str1);
				5612	c2 = mb_ptr2char_adv(&str2);
				5613	ix += utf_char2len(c1);
				5614
				5615	/* decompose the character if necessary, into 'base' characters
				5616	* because I don't care about Arabic, I will hard-code the Hebrew
				5617	* which I do care about! So sue me... */
				5618	if (c1 != c2 && (!ireg_ic \|\| utf_fold(c1) != utf_fold(c2)))
				5619	{
				5620	/* decomposition necessary? */
				5621	mb_decompose(c1, &c11, &junk, &junk);
				5622	mb_decompose(c2, &c12, &junk, &junk);
				5623	c1 = c11;
				5624	c2 = c12;
				5625	if (c11 != c12 && (!ireg_ic \|\| utf_fold(c11) != utf_fold(c12)))
				5626	break;
				5627	}
				5628	}
				5629	result = c2 - c1;
				5630	if (result == 0)
				5631	*n = (int)(str2 - s2);
				5632	}
				5633	#endif
				5634
				5635	return result;
				5636	}
				5637
				5638	/*
				5639	* cstrchr: This function is used a lot for simple searches, keep it fast!
				5640	*/
				5641	static char_u *
				5642	cstrchr(s, c)
				5643	char_u *s;
				5644	int c;
				5645	{
				5646	char_u *p;
				5647	int cc;
				5648
				5649	if (!ireg_ic
				5650	#ifdef FEAT_MBYTE
				5651	\|\| (!enc_utf8 && mb_char2len(c) > 1)
				5652	#endif
				5653	)
				5654	return vim_strchr(s, c);
				5655
				5656	/* tolower() and toupper() can be slow, comparing twice should be a lot
				5657	* faster (esp. when using MS Visual C++!).
				5658	* For UTF-8 need to use folded case. */
				5659	#ifdef FEAT_MBYTE
				5660	if (enc_utf8 && c > 0x80)
				5661	cc = utf_fold(c);
				5662	else
				5663	#endif
				5664	if (isupper(c))
				5665	cc = TOLOWER_LOC(c);
				5666	else if (islower(c))
				5667	cc = TOUPPER_LOC(c);
				5668	else
				5669	return vim_strchr(s, c);
				5670
				5671	#ifdef FEAT_MBYTE
				5672	if (has_mbyte)
				5673	{
				5674	for (p = s; p != NUL; p += (mb_ptr2len_check)(p))
				5675	{
				5676	if (enc_utf8 && c > 0x80)
				5677	{
				5678	if (utf_fold(utf_ptr2char(p)) == cc)
				5679	return p;
				5680	}
				5681	else if (p == c \|\| p == cc)
				5682	return p;
				5683	}
				5684	}
				5685	else
				5686	#endif
				5687	/* Faster version for when there are no multi-byte characters. */
				5688	for (p = s; *p != NUL; ++p)
				5689	if (p == c \|\| p == cc)
				5690	return p;
				5691
				5692	return NULL;
				5693	}
				5694
				5695	/***************************************************************
				5696	* regsub stuff *
				5697	***************************************************************/
				5698
				5699	/* This stuff below really confuses cc on an SGI -- webb */
				5700	#ifdef __sgi
				5701	# undef __ARGS
				5702	# define __ARGS(x) ()
				5703	#endif
				5704
				5705	/*
				5706	* We should define ftpr as a pointer to a function returning a pointer to
				5707	* a function returning a pointer to a function ...
				5708	* This is impossible, so we declare a pointer to a function returning a
				5709	* pointer to a function returning void. This should work for all compilers.
				5710	*/
				5711	typedef void ((fptr) __ARGS((char_u *, int)))();
				5712
				5713	static fptr do_upper __ARGS((char_u *, int));
				5714	static fptr do_Upper __ARGS((char_u *, int));
				5715	static fptr do_lower __ARGS((char_u *, int));
				5716	static fptr do_Lower __ARGS((char_u *, int));
				5717
				5718	static int vim_regsub_both __ARGS((char_u source, char_u dest, int copy, int magic, int backslash));
				5719
				5720	static fptr
				5721	do_upper(d, c)
				5722	char_u *d;
				5723	int c;
				5724	{
				5725	*d = TOUPPER_LOC(c);
				5726
				5727	return (fptr)NULL;
				5728	}
				5729
				5730	static fptr
				5731	do_Upper(d, c)
				5732	char_u *d;
				5733	int c;
				5734	{
				5735	*d = TOUPPER_LOC(c);
				5736
				5737	return (fptr)do_Upper;
				5738	}
				5739
				5740	static fptr
				5741	do_lower(d, c)
				5742	char_u *d;
				5743	int c;
				5744	{
				5745	*d = TOLOWER_LOC(c);
				5746
				5747	return (fptr)NULL;
				5748	}
				5749
				5750	static fptr
				5751	do_Lower(d, c)
				5752	char_u *d;
				5753	int c;
				5754	{
				5755	*d = TOLOWER_LOC(c);
				5756
				5757	return (fptr)do_Lower;
				5758	}
				5759
				5760	/*
				5761	* regtilde(): Replace tildes in the pattern by the old pattern.
				5762	*
				5763	* Short explanation of the tilde: It stands for the previous replacement
				5764	* pattern. If that previous pattern also contains a ~ we should go back a
				5765	* step further... But we insert the previous pattern into the current one
				5766	* and remember that.
				5767	* This still does not handle the case where "magic" changes. TODO?
				5768	*
				5769	* The tildes are parsed once before the first call to vim_regsub().
				5770	*/
				5771	char_u *
				5772	regtilde(source, magic)
				5773	char_u *source;
				5774	int magic;
				5775	{
				5776	char_u *newsub = source;
				5777	char_u *tmpsub;
				5778	char_u *p;
				5779	int len;
				5780	int prevlen;
				5781
				5782	for (p = newsub; *p; ++p)
				5783	{
				5784	if ((p == '~' && magic) \|\| (p == '\\' && *(p + 1) == '~' && !magic))
				5785	{
				5786	if (reg_prev_sub != NULL)
				5787	{
				5788	/* length = len(newsub) - 1 + len(prev_sub) + 1 */
				5789	prevlen = (int)STRLEN(reg_prev_sub);
				5790	tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen));
				5791	if (tmpsub != NULL)
				5792	{
				5793	/* copy prefix */
				5794	len = (int)(p - newsub); /* not including ~ */
				5795	mch_memmove(tmpsub, newsub, (size_t)len);
				5796	/* interpretate tilde */
				5797	mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
				5798	/* copy postfix */
				5799	if (!magic)
				5800	++p; /* back off \ */
				5801	STRCPY(tmpsub + len + prevlen, p + 1);
				5802
				5803	if (newsub != source) /* already allocated newsub */
				5804	vim_free(newsub);
				5805	newsub = tmpsub;
				5806	p = newsub + len + prevlen;
				5807	}
				5808	}
				5809	else if (magic)
				5810	STRCPY(p, p + 1); /* remove '~' */
				5811	else
				5812	STRCPY(p, p + 2); /* remove '\~' */
				5813	--p;
				5814	}
				5815	else
				5816	{
				5817	if (p == '\\' && p[1]) / skip escaped characters */
				5818	++p;
				5819	#ifdef FEAT_MBYTE
				5820	if (has_mbyte)
				5821	p += (*mb_ptr2len_check)(p) - 1;
				5822	#endif
				5823	}
				5824	}
				5825
				5826	vim_free(reg_prev_sub);
				5827	if (newsub != source) /* newsub was allocated, just keep it */
				5828	reg_prev_sub = newsub;
				5829	else /* no ~ found, need to save newsub */
				5830	reg_prev_sub = vim_strsave(newsub);
				5831	return newsub;
				5832	}
				5833
				5834	#ifdef FEAT_EVAL
				5835	static int can_f_submatch = FALSE; /* TRUE when submatch() can be used */
				5836
				5837	/* These pointers are used instead of reg_match and reg_mmatch for
				5838	* reg_submatch(). Needed for when the substitution string is an expression
				5839	* that contains a call to substitute() and submatch(). */
				5840	static regmatch_T *submatch_match;
				5841	static regmmatch_T *submatch_mmatch;
				5842	#endif
				5843
				5844	#if defined(FEAT_MODIFY_FNAME) \|\| defined(FEAT_EVAL) \|\| defined(PROTO)
				5845	/*
				5846	* vim_regsub() - perform substitutions after a vim_regexec() or
				5847	* vim_regexec_multi() match.
				5848	*
				5849	* If "copy" is TRUE really copy into "dest".
				5850	* If "copy" is FALSE nothing is copied, this is just to find out the length
				5851	* of the result.
				5852	*
				5853	* If "backslash" is TRUE, a backslash will be removed later, need to double
				5854	* them to keep them, and insert a backslash before a CR to avoid it being
				5855	* replaced with a line break later.
				5856	*
				5857	* Note: The matched text must not change between the call of
				5858	* vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
				5859	* references invalid!
				5860	*
				5861	* Returns the size of the replacement, including terminating NUL.
				5862	*/
				5863	int
				5864	vim_regsub(rmp, source, dest, copy, magic, backslash)
				5865	regmatch_T *rmp;
				5866	char_u *source;
				5867	char_u *dest;
				5868	int copy;
				5869	int magic;
				5870	int backslash;
				5871	{
				5872	reg_match = rmp;
				5873	reg_mmatch = NULL;
				5874	reg_maxline = 0;
				5875	return vim_regsub_both(source, dest, copy, magic, backslash);
				5876	}
				5877	#endif
				5878
				5879	int
				5880	vim_regsub_multi(rmp, lnum, source, dest, copy, magic, backslash)
				5881	regmmatch_T *rmp;
				5882	linenr_T lnum;
				5883	char_u *source;
				5884	char_u *dest;
				5885	int copy;
				5886	int magic;
				5887	int backslash;
				5888	{
				5889	reg_match = NULL;
				5890	reg_mmatch = rmp;
				5891	reg_buf = curbuf; /* always works on the current buffer! */
				5892	reg_firstlnum = lnum;
				5893	reg_maxline = curbuf->b_ml.ml_line_count - lnum;
				5894	return vim_regsub_both(source, dest, copy, magic, backslash);
				5895	}
				5896
				5897	static int
				5898	vim_regsub_both(source, dest, copy, magic, backslash)
				5899	char_u *source;
				5900	char_u *dest;
				5901	int copy;
				5902	int magic;
				5903	int backslash;
				5904	{
				5905	char_u *src;
				5906	char_u *dst;
				5907	char_u *s;
				5908	int c;
				5909	int no = -1;
				5910	fptr func = (fptr)NULL;
				5911	linenr_T clnum = 0; /* init for GCC */
				5912	int len = 0; /* init for GCC */
				5913	#ifdef FEAT_EVAL
				5914	static char_u *eval_result = NULL;
				5915	#endif
				5916	#ifdef FEAT_MBYTE
				5917	int l;
				5918	#endif
				5919
				5920
				5921	/* Be paranoid... */
				5922	if (source == NULL \|\| dest == NULL)
				5923	{
				5924	EMSG(_(e_null));
				5925	return 0;
				5926	}
				5927	if (prog_magic_wrong())
				5928	return 0;
				5929	src = source;
				5930	dst = dest;
				5931
				5932	/*
				5933	* When the substitute part starts with "\=" evaluate it as an expression.
				5934	*/
				5935	if (source[0] == '\\' && source[1] == '='
				5936	#ifdef FEAT_EVAL
				5937	&& !can_f_submatch /* can't do this recursively */
				5938	#endif
				5939	)
				5940	{
				5941	#ifdef FEAT_EVAL
				5942	/* To make sure that the length doesn't change between checking the
				5943	* length and copying the string, and to speed up things, the
				5944	* resulting string is saved from the call with "copy" == FALSE to the
				5945	* call with "copy" == TRUE. */
				5946	if (copy)
				5947	{
				5948	if (eval_result != NULL)
				5949	{
				5950	STRCPY(dest, eval_result);
				5951	dst += STRLEN(eval_result);
				5952	vim_free(eval_result);
				5953	eval_result = NULL;
				5954	}
				5955	}
				5956	else
				5957	{
				5958	linenr_T save_reg_maxline;
				5959	win_T *save_reg_win;
				5960	int save_ireg_ic;
				5961
				5962	vim_free(eval_result);
				5963
				5964	/* The expression may contain substitute(), which calls us
				5965	* recursively. Make sure submatch() gets the text from the first
				5966	* level. Don't need to save "reg_buf", because
				5967	* vim_regexec_multi() can't be called recursively. */
				5968	submatch_match = reg_match;
				5969	submatch_mmatch = reg_mmatch;
				5970	save_reg_maxline = reg_maxline;
				5971	save_reg_win = reg_win;
				5972	save_ireg_ic = ireg_ic;
				5973	can_f_submatch = TRUE;
				5974
				5975	eval_result = eval_to_string(source + 2, NULL);
				5976	if (eval_result != NULL)
				5977	{
				5978	for (s = eval_result; *s != NUL; ++s)
				5979	{
				5980	/* Change NL to CR, so that it becomes a line break.
				5981	* Skip over a backslashed character. */
				5982	if (*s == NL)
				5983	*s = CAR;
				5984	else if (*s == '\\' && s[1] != NUL)
				5985	++s;
				5986	#ifdef FEAT_MBYTE
				5987	if (has_mbyte)
				5988	s += (*mb_ptr2len_check)(s) - 1;
				5989	#endif
				5990	}
				5991
				5992	dst += STRLEN(eval_result);
				5993	}
				5994
				5995	reg_match = submatch_match;
				5996	reg_mmatch = submatch_mmatch;
				5997	reg_maxline = save_reg_maxline;
				5998	reg_win = save_reg_win;
				5999	ireg_ic = save_ireg_ic;
				6000	can_f_submatch = FALSE;
				6001	}
				6002	#endif
				6003	}
				6004	else
				6005	while ((c = *src++) != NUL)
				6006	{
				6007	if (c == '&' && magic)
				6008	no = 0;
				6009	else if (c == '\\' && *src != NUL)
				6010	{
				6011	if (*src == '&' && !magic)
				6012	{
				6013	++src;
				6014	no = 0;
				6015	}
				6016	else if ('0' <= src && src <= '9')
				6017	{
				6018	no = *src++ - '0';
				6019	}
				6020	else if (vim_strchr((char_u )"uUlLeE", src))
				6021	{
				6022	switch (*src++)
				6023	{
				6024	case 'u': func = (fptr)do_upper;
				6025	continue;
				6026	case 'U': func = (fptr)do_Upper;
				6027	continue;
				6028	case 'l': func = (fptr)do_lower;
				6029	continue;
				6030	case 'L': func = (fptr)do_Lower;
				6031	continue;
				6032	case 'e':
				6033	case 'E': func = (fptr)NULL;
				6034	continue;
				6035	}
				6036	}
				6037	}
				6038	if (no < 0) /* Ordinary character. */
				6039	{
				6040	if (c == '\\' && *src != NUL)
				6041	{
				6042	/* Check for abbreviations -- webb */
				6043	switch (*src)
				6044	{
				6045	case 'r': c = CAR; ++src; break;
				6046	case 'n': c = NL; ++src; break;
				6047	case 't': c = TAB; ++src; break;
				6048	/* Oh no! \e already has meaning in subst pat :-( */
				6049	/* case 'e': c = ESC; ++src; break; */
				6050	case 'b': c = Ctrl_H; ++src; break;
				6051
				6052	/* If "backslash" is TRUE the backslash will be removed
				6053	* later. Used to insert a literal CR. */
				6054	default: if (backslash)
				6055	{
				6056	if (copy)
				6057	*dst = '\\';
				6058	++dst;
				6059	}
				6060	c = *src++;
				6061	}
				6062	}
				6063
				6064	/* Write to buffer, if copy is set. */
				6065	#ifdef FEAT_MBYTE
				6066	if (has_mbyte && (l = (*mb_ptr2len_check)(src - 1)) > 1)
				6067	{
				6068	/* TODO: should use "func" here. */
				6069	if (copy)
				6070	mch_memmove(dst, src - 1, l);
				6071	dst += l - 1;
				6072	src += l - 1;
				6073	}
				6074	else
				6075	{
				6076	#endif
				6077	if (copy)
				6078	{
				6079	if (func == (fptr)NULL) /* just copy */
				6080	*dst = c;
				6081	else /* change case */
				6082	func = (fptr)(func(dst, c));
				6083	/* Turbo C complains without the typecast */
				6084	}
				6085	#ifdef FEAT_MBYTE
				6086	}
				6087	#endif
				6088	dst++;
				6089	}
				6090	else
				6091	{
				6092	if (REG_MULTI)
				6093	{
				6094	clnum = reg_mmatch->startpos[no].lnum;
				6095	if (clnum < 0 \|\| reg_mmatch->endpos[no].lnum < 0)
				6096	s = NULL;
				6097	else
				6098	{
				6099	s = reg_getline(clnum) + reg_mmatch->startpos[no].col;
				6100	if (reg_mmatch->endpos[no].lnum == clnum)
				6101	len = reg_mmatch->endpos[no].col
				6102	- reg_mmatch->startpos[no].col;
				6103	else
				6104	len = (int)STRLEN(s);
				6105	}
				6106	}
				6107	else
				6108	{
				6109	s = reg_match->startp[no];
				6110	if (reg_match->endp[no] == NULL)
				6111	s = NULL;
				6112	else
				6113	len = (int)(reg_match->endp[no] - s);
				6114	}
				6115	if (s != NULL)
				6116	{
				6117	for (;;)
				6118	{
				6119	if (len == 0)
				6120	{
				6121	if (REG_MULTI)
				6122	{
				6123	if (reg_mmatch->endpos[no].lnum == clnum)
				6124	break;
				6125	if (copy)
				6126	*dst = CAR;
				6127	++dst;
				6128	s = reg_getline(++clnum);
				6129	if (reg_mmatch->endpos[no].lnum == clnum)
				6130	len = reg_mmatch->endpos[no].col;
				6131	else
				6132	len = (int)STRLEN(s);
				6133	}
				6134	else
				6135	break;
				6136	}
				6137	else if (s == NUL) / we hit NUL. */
				6138	{
				6139	if (copy)
				6140	EMSG(_(e_re_damg));
				6141	goto exit;
				6142	}
				6143	else
				6144	{
				6145	if (backslash && (s == CAR \|\| s == '\\'))
				6146	{
				6147	/*
				6148	* Insert a backslash in front of a CR, otherwise
				6149	* it will be replaced by a line break.
				6150	* Number of backslashes will be halved later,
				6151	* double them here.
				6152	*/
				6153	if (copy)
				6154	{
				6155	dst[0] = '\\';
				6156	dst[1] = *s;
				6157	}
				6158	dst += 2;
				6159	}
				6160	#ifdef FEAT_MBYTE
				6161	else if (has_mbyte && (l = (*mb_ptr2len_check)(s)) > 1)
				6162	{
				6163	/* TODO: should use "func" here. */
				6164	if (copy)
				6165	mch_memmove(dst, s, l);
				6166	dst += l;
				6167	s += l - 1;
				6168	len -= l - 1;
				6169	}
				6170	#endif
				6171	else
				6172	{
				6173	if (copy)
				6174	{
				6175	if (func == (fptr)NULL) /* just copy */
				6176	dst = s;
				6177	else /* change case */
				6178	func = (fptr)(func(dst, *s));
				6179	/* Turbo C complains without the typecast */
				6180	}
				6181	++dst;
				6182	}
				6183	++s;
				6184	--len;
				6185	}
				6186	}
				6187	}
				6188	no = -1;
				6189	}
				6190	}
				6191	if (copy)
				6192	*dst = NUL;
				6193
				6194	exit:
				6195	return (int)((dst - dest) + 1);
				6196	}
				6197
				6198	#ifdef FEAT_EVAL
				6199	/*
				6200	* Used for the submatch() function: get the string from tne n'th submatch in
				6201	* allocated memory.
				6202	* Returns NULL when not in a ":s" command and for a non-existing submatch.
				6203	*/
				6204	char_u *
				6205	reg_submatch(no)
				6206	int no;
				6207	{
				6208	char_u *retval = NULL;
				6209	char_u *s;
				6210	int len;
				6211	int round;
				6212	linenr_T lnum;
				6213
				6214	if (!can_f_submatch)
				6215	return NULL;
				6216
				6217	if (submatch_match == NULL)
				6218	{
				6219	/*
				6220	* First round: compute the length and allocate memory.
				6221	* Second round: copy the text.
				6222	*/
				6223	for (round = 1; round <= 2; ++round)
				6224	{
				6225	lnum = submatch_mmatch->startpos[no].lnum;
				6226	if (lnum < 0 \|\| submatch_mmatch->endpos[no].lnum < 0)
				6227	return NULL;
				6228
				6229	s = reg_getline(lnum) + submatch_mmatch->startpos[no].col;
				6230	if (s == NULL) /* anti-crash check, cannot happen? */
				6231	break;
				6232	if (submatch_mmatch->endpos[no].lnum == lnum)
				6233	{
				6234	/* Within one line: take form start to end col. */
				6235	len = submatch_mmatch->endpos[no].col
				6236	- submatch_mmatch->startpos[no].col;
				6237	if (round == 2)
				6238	{
				6239	STRNCPY(retval, s, len);
				6240	retval[len] = NUL;
				6241	}
				6242	++len;
				6243	}
				6244	else
				6245	{
				6246	/* Multiple lines: take start line from start col, middle
				6247	* lines completely and end line up to end col. */
				6248	len = (int)STRLEN(s);
				6249	if (round == 2)
				6250	{
				6251	STRCPY(retval, s);
				6252	retval[len] = '\n';
				6253	}
				6254	++len;
				6255	++lnum;
				6256	while (lnum < submatch_mmatch->endpos[no].lnum)
				6257	{
				6258	s = reg_getline(lnum++);
				6259	if (round == 2)
				6260	STRCPY(retval + len, s);
				6261	len += (int)STRLEN(s);
				6262	if (round == 2)
				6263	retval[len] = '\n';
				6264	++len;
				6265	}
				6266	if (round == 2)
				6267	STRNCPY(retval + len, reg_getline(lnum),
				6268	submatch_mmatch->endpos[no].col);
				6269	len += submatch_mmatch->endpos[no].col;
				6270	if (round == 2)
				6271	retval[len] = NUL;
				6272	++len;
				6273	}
				6274
				6275	if (round == 1)
				6276	{
				6277	retval = lalloc((long_u)len, TRUE);
				6278	if (s == NULL)
				6279	return NULL;
				6280	}
				6281	}
				6282	}
				6283	else
				6284	{
				6285	if (submatch_match->endp[no] == NULL)
				6286	retval = NULL;
				6287	else
				6288	{
				6289	s = submatch_match->startp[no];
				6290	retval = vim_strnsave(s, (int)(submatch_match->endp[no] - s));
				6291	}
				6292	}
				6293
				6294	return retval;
				6295	}
				6296	#endif