Blame - src/regexp.c - android_external_vim

blob: dac34a2b3bcf6f31d2024889d04d33a220162db3 [file] [log] [blame]

Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1	/* vi:set ts=8 sts=4 sw=4:
				2	*
				3	* Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
				4	*
				5	* NOTICE:
				6	*
				7	* This is NOT the original regular expression code as written by Henry
				8	* Spencer. This code has been modified specifically for use with the VIM
				9	* editor, and should not be used separately from Vim. If you want a good
				10	* regular expression library, get the original code. The copyright notice
				11	* that follows is from the original.
				12	*
				13	* END NOTICE
				14	*
				15	* Copyright (c) 1986 by University of Toronto.
				16	* Written by Henry Spencer. Not derived from licensed software.
				17	*
				18	* Permission is granted to anyone to use this software for any
				19	* purpose on any computer system, and to redistribute it freely,
				20	* subject to the following restrictions:
				21	*
				22	* 1. The author is not responsible for the consequences of use of
				23	* this software, no matter how awful, even if they arise
				24	* from defects in it.
				25	*
				26	* 2. The origin of this software must not be misrepresented, either
				27	* by explicit claim or by omission.
				28	*
				29	* 3. Altered versions must be plainly marked as such, and must not
				30	* be misrepresented as being the original software.
				31	*
				32	* Beware that some of this code is subtly aware of the way operator
				33	* precedence is structured in regular expressions. Serious changes in
				34	* regular-expression syntax might require a total rethink.
				35	*
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	36	* Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
				37	* Webb, Ciaran McCreesh and Bram Moolenaar.
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	38	* Named character class support added by Walter Briscoe (1998 Jul 01)
				39	*/
				40
				41	#include "vim.h"
				42
				43	#undef DEBUG
				44
				45	/*
				46	* The "internal use only" fields in regexp.h are present to pass info from
				47	* compile to execute that permits the execute phase to run lots faster on
				48	* simple cases. They are:
				49	*
				50	* regstart char that must begin a match; NUL if none obvious; Can be a
				51	* multi-byte character.
				52	* reganch is the match anchored (at beginning-of-line only)?
				53	* regmust string (pointer into program) that match must include, or NULL
				54	* regmlen length of regmust string
				55	* regflags RF_ values or'ed together
				56	*
				57	* Regstart and reganch permit very fast decisions on suitable starting points
				58	* for a match, cutting down the work a lot. Regmust permits fast rejection
				59	* of lines that cannot possibly match. The regmust tests are costly enough
				60	* that vim_regcomp() supplies a regmust only if the r.e. contains something
				61	* potentially expensive (at present, the only such thing detected is * or +
				62	* at the start of the r.e., which can involve a lot of backup). Regmlen is
				63	* supplied because the test in vim_regexec() needs it and vim_regcomp() is
				64	* computing it anyway.
				65	*/
				66
				67	/*
				68	* Structure for regexp "program". This is essentially a linear encoding
				69	* of a nondeterministic finite-state machine (aka syntax charts or
				70	* "railroad normal form" in parsing technology). Each node is an opcode
				71	* plus a "next" pointer, possibly plus an operand. "Next" pointers of
				72	* all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
				73	* pointer with a BRANCH on both ends of it is connecting two alternatives.
				74	* (Here we have one of the subtle syntax dependencies: an individual BRANCH
				75	* (as opposed to a collection of them) is never concatenated with anything
				76	* because of operator precedence). The "next" pointer of a BRACES_COMPLEX
				77	* node points to the node after the stuff to be repeated. The operand of some
				78	* types of node is a literal string; for others, it is a node leading into a
				79	* sub-FSM. In particular, the operand of a BRANCH node is the first node of
				80	* the branch. (NB this is not a tree structure: the tail of the branch
				81	* connects to the thing following the set of BRANCHes.)
				82	*
				83	* pattern is coded like:
				84	*
				85	* +-----------------+
				86	* \| V
				87	* <aa>\\|<bb> BRANCH <aa> BRANCH <bb> --> END
				88	* \| ^ \| ^
				89	* +------+ +----------+
				90	*
				91	*
				92	* +------------------+
				93	* V \|
				94	* <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
				95	* \| \| ^ ^
				96	* \| +---------------+ \|
				97	* +---------------------------------------------+
				98	*
				99	*
				100	* +-------------------------+
				101	* V \|
				102	* <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
				103	* \| \| ^
				104	* \| +----------------+
				105	* +-----------------------------------------------+
				106	*
				107	*
				108	* <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END
				109	* \| \| ^ ^
				110	* \| +----------------+ \|
				111	* +--------------------------------+
				112	*
				113	* +---------+
				114	* \| V
				115	* \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END
				116	* \| \| \| \| ^ ^
				117	* \| \| \| +-----+ \|
				118	* \| \| +----------------+ \|
				119	* \| +---------------------------+ \|
				120	* +------------------------------------------------------+
				121	*
				122	* They all start with a BRANCH for "\\|" alternaties, even when there is only
				123	* one alternative.
				124	*/
				125
				126	/*
				127	* The opcodes are:
				128	*/
				129
				130	/* definition number opnd? meaning */
				131	#define END 0 /* End of program or NOMATCH operand. */
				132	#define BOL 1 /* Match "" at beginning of line. */
				133	#define EOL 2 /* Match "" at end of line. */
				134	#define BRANCH 3 /* node Match this alternative, or the
				135	* next... */
				136	#define BACK 4 /* Match "", "next" ptr points backward. */
				137	#define EXACTLY 5 /* str Match this string. */
				138	#define NOTHING 6 /* Match empty string. */
				139	#define STAR 7 /* node Match this (simple) thing 0 or more
				140	* times. */
				141	#define PLUS 8 /* node Match this (simple) thing 1 or more
				142	* times. */
				143	#define MATCH 9 /* node match the operand zero-width */
				144	#define NOMATCH 10 /* node check for no match with operand */
				145	#define BEHIND 11 /* node look behind for a match with operand */
				146	#define NOBEHIND 12 /* node look behind for no match with operand */
				147	#define SUBPAT 13 /* node match the operand here */
				148	#define BRACE_SIMPLE 14 /* node Match this (simple) thing between m and
				149	* n times (\{m,n\}). */
				150	#define BOW 15 /* Match "" after [^a-zA-Z0-9_] */
				151	#define EOW 16 /* Match "" at [^a-zA-Z0-9_] */
				152	#define BRACE_LIMITS 17 /* nr nr define the min & max for BRACE_SIMPLE
				153	* and BRACE_COMPLEX. */
				154	#define NEWL 18 /* Match line-break */
				155	#define BHPOS 19 /* End position for BEHIND or NOBEHIND */
				156
				157
				158	/* character classes: 20-48 normal, 50-78 include a line-break */
				159	#define ADD_NL 30
				160	#define FIRST_NL ANY + ADD_NL
				161	#define ANY 20 /* Match any one character. */
				162	#define ANYOF 21 /* str Match any character in this string. */
				163	#define ANYBUT 22 /* str Match any character not in this
				164	* string. */
				165	#define IDENT 23 /* Match identifier char */
				166	#define SIDENT 24 /* Match identifier char but no digit */
				167	#define KWORD 25 /* Match keyword char */
				168	#define SKWORD 26 /* Match word char but no digit */
				169	#define FNAME 27 /* Match file name char */
				170	#define SFNAME 28 /* Match file name char but no digit */
				171	#define PRINT 29 /* Match printable char */
				172	#define SPRINT 30 /* Match printable char but no digit */
				173	#define WHITE 31 /* Match whitespace char */
				174	#define NWHITE 32 /* Match non-whitespace char */
				175	#define DIGIT 33 /* Match digit char */
				176	#define NDIGIT 34 /* Match non-digit char */
				177	#define HEX 35 /* Match hex char */
				178	#define NHEX 36 /* Match non-hex char */
				179	#define OCTAL 37 /* Match octal char */
				180	#define NOCTAL 38 /* Match non-octal char */
				181	#define WORD 39 /* Match word char */
				182	#define NWORD 40 /* Match non-word char */
				183	#define HEAD 41 /* Match head char */
				184	#define NHEAD 42 /* Match non-head char */
				185	#define ALPHA 43 /* Match alpha char */
				186	#define NALPHA 44 /* Match non-alpha char */
				187	#define LOWER 45 /* Match lowercase char */
				188	#define NLOWER 46 /* Match non-lowercase char */
				189	#define UPPER 47 /* Match uppercase char */
				190	#define NUPPER 48 /* Match non-uppercase char */
				191	#define LAST_NL NUPPER + ADD_NL
				192	#define WITH_NL(op) ((op) >= FIRST_NL && (op) <= LAST_NL)
				193
				194	#define MOPEN 80 /* -89 Mark this point in input as start of
				195	* \( subexpr. MOPEN + 0 marks start of
				196	* match. */
				197	#define MCLOSE 90 /* -99 Analogous to MOPEN. MCLOSE + 0 marks
				198	* end of match. */
				199	#define BACKREF 100 /* -109 node Match same string again \1-\9 */
				200
				201	#ifdef FEAT_SYN_HL
				202	# define ZOPEN 110 /* -119 Mark this point in input as start of
				203	* \z( subexpr. */
				204	# define ZCLOSE 120 /* -129 Analogous to ZOPEN. */
				205	# define ZREF 130 /* -139 node Match external submatch \z1-\z9 */
				206	#endif
				207
				208	#define BRACE_COMPLEX 140 /* -149 node Match nodes between m & n times */
				209
				210	#define NOPEN 150 /* Mark this point in input as start of
				211	\%( subexpr. */
				212	#define NCLOSE 151 /* Analogous to NOPEN. */
				213
				214	#define MULTIBYTECODE 200 /* mbc Match one multi-byte character */
				215	#define RE_BOF 201 /* Match "" at beginning of file. */
				216	#define RE_EOF 202 /* Match "" at end of file. */
				217	#define CURSOR 203 /* Match location of cursor. */
				218
				219	#define RE_LNUM 204 /* nr cmp Match line number */
				220	#define RE_COL 205 /* nr cmp Match column number */
				221	#define RE_VCOL 206 /* nr cmp Match virtual column number */
				222
				223	/*
				224	* Magic characters have a special meaning, they don't match literally.
				225	* Magic characters are negative. This separates them from literal characters
				226	* (possibly multi-byte). Only ASCII characters can be Magic.
				227	*/
				228	#define Magic(x) ((int)(x) - 256)
				229	#define un_Magic(x) ((x) + 256)
				230	#define is_Magic(x) ((x) < 0)
				231
				232	static int no_Magic __ARGS((int x));
				233	static int toggle_Magic __ARGS((int x));
				234
				235	static int
				236	no_Magic(x)
				237	int x;
				238	{
				239	if (is_Magic(x))
				240	return un_Magic(x);
				241	return x;
				242	}
				243
				244	static int
				245	toggle_Magic(x)
				246	int x;
				247	{
				248	if (is_Magic(x))
				249	return un_Magic(x);
				250	return Magic(x);
				251	}
				252
				253	/*
				254	* The first byte of the regexp internal "program" is actually this magic
				255	* number; the start node begins in the second byte. It's used to catch the
				256	* most severe mutilation of the program by the caller.
				257	*/
				258
				259	#define REGMAGIC 0234
				260
				261	/*
				262	* Opcode notes:
				263	*
				264	* BRANCH The set of branches constituting a single choice are hooked
				265	* together with their "next" pointers, since precedence prevents
				266	* anything being concatenated to any individual branch. The
				267	* "next" pointer of the last BRANCH in a choice points to the
				268	* thing following the whole choice. This is also where the
				269	* final "next" pointer of each individual branch points; each
				270	* branch starts with the operand node of a BRANCH node.
				271	*
				272	* BACK Normal "next" pointers all implicitly point forward; BACK
				273	* exists to make loop structures possible.
				274	*
				275	* STAR,PLUS '=', and complex '*' and '+', are implemented as circular
				276	* BRANCH structures using BACK. Simple cases (one character
				277	* per match) are implemented with STAR and PLUS for speed
				278	* and to minimize recursive plunges.
				279	*
				280	* BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
				281	* node, and defines the min and max limits to be used for that
				282	* node.
				283	*
				284	* MOPEN,MCLOSE ...are numbered at compile time.
				285	* ZOPEN,ZCLOSE ...ditto
				286	*/
				287
				288	/*
				289	* A node is one char of opcode followed by two chars of "next" pointer.
				290	* "Next" pointers are stored as two 8-bit bytes, high order first. The
				291	* value is a positive offset from the opcode of the node containing it.
				292	* An operand, if any, simply follows the node. (Note that much of the
				293	* code generation knows about this implicit relationship.)
				294	*
				295	* Using two bytes for the "next" pointer is vast overkill for most things,
				296	* but allows patterns to get big without disasters.
				297	*/
				298	#define OP(p) ((int)*(p))
				299	#define NEXT(p) (((((p) + 1) & 0377) << 8) + (((p) + 2) & 0377))
				300	#define OPERAND(p) ((p) + 3)
				301	/* Obtain an operand that was stored as four bytes, MSB first. */
				302	#define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
				303	+ ((long)(p)[5] << 8) + (long)(p)[6])
				304	/* Obtain a second operand stored as four bytes. */
				305	#define OPERAND_MAX(p) OPERAND_MIN((p) + 4)
				306	/* Obtain a second single-byte operand stored after a four bytes operand. */
				307	#define OPERAND_CMP(p) (p)[7]
				308
				309	/*
				310	* Utility definitions.
				311	*/
				312	#define UCHARAT(p) ((int)(char_u )(p))
				313
				314	/* Used for an error (down from) vim_regcomp(): give the error message, set
				315	* rc_did_emsg and return NULL */
				316	#define EMSG_RET_NULL(m) { EMSG(m); rc_did_emsg = TRUE; return NULL; }
				317	#define EMSG_M_RET_NULL(m, c) { EMSG2(m, c ? "" : "\\"); rc_did_emsg = TRUE; return NULL; }
				318	#define EMSG_RET_FAIL(m) { EMSG(m); rc_did_emsg = TRUE; return FAIL; }
				319	#define EMSG_ONE_RET_NULL EMSG_M_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
				320
				321	#define MAX_LIMIT (32767L << 16L)
				322
				323	static int re_multi_type __ARGS((int));
				324	static int cstrncmp __ARGS((char_u s1, char_u s2, int *n));
				325	static char_u cstrchr __ARGS((char_u , int));
				326
				327	#ifdef DEBUG
				328	static void regdump __ARGS((char_u , regprog_T ));
				329	static char_u regprop __ARGS((char_u ));
				330	#endif
				331
				332	#define NOT_MULTI 0
				333	#define MULTI_ONE 1
				334	#define MULTI_MULT 2
				335	/*
				336	* Return NOT_MULTI if c is not a "multi" operator.
				337	* Return MULTI_ONE if c is a single "multi" operator.
				338	* Return MULTI_MULT if c is a multi "multi" operator.
				339	*/
				340	static int
				341	re_multi_type(c)
				342	int c;
				343	{
				344	if (c == Magic('@') \|\| c == Magic('=') \|\| c == Magic('?'))
				345	return MULTI_ONE;
				346	if (c == Magic('*') \|\| c == Magic('+') \|\| c == Magic('{'))
				347	return MULTI_MULT;
				348	return NOT_MULTI;
				349	}
				350
				351	/*
				352	* Flags to be passed up and down.
				353	*/
				354	#define HASWIDTH 0x1 /* Known never to match null string. */
				355	#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */
				356	#define SPSTART 0x4 /* Starts with * or +. */
				357	#define HASNL 0x8 /* Contains some \n. */
				358	#define HASLOOKBH 0x10 /* Contains "\@<=" or "\@<!". */
				359	#define WORST 0 /* Worst case. */
				360
				361	/*
				362	* When regcode is set to this value, code is not emitted and size is computed
				363	* instead.
				364	*/
				365	#define JUST_CALC_SIZE ((char_u *) -1)
				366
				367	static char_u *reg_prev_sub;
				368
				369	/*
				370	* REGEXP_INRANGE contains all characters which are always special in a []
				371	* range after '\'.
				372	* REGEXP_ABBR contains all characters which act as abbreviations after '\'.
				373	* These are:
				374	* \n - New line (NL).
				375	* \r - Carriage Return (CR).
				376	* \t - Tab (TAB).
				377	* \e - Escape (ESC).
				378	* \b - Backspace (Ctrl_H).
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	379	* \d - Character code in decimal, eg \d123
				380	* \o - Character code in octal, eg \o80
				381	* \x - Character code in hex, eg \x4a
				382	* \u - Multibyte character code, eg \u20ac
				383	* \U - Long multibyte character code, eg \U12345678
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	384	*/
				385	static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	386	static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	387
				388	static int backslash_trans __ARGS((int c));
				389	static int skip_class_name __ARGS((char_u **pp));
				390	static char_u skip_anyof __ARGS((char_u p));
				391	static void init_class_tab __ARGS((void));
				392
				393	/*
				394	* Translate '\x' to its control character, except "\n", which is Magic.
				395	*/
				396	static int
				397	backslash_trans(c)
				398	int c;
				399	{
				400	switch (c)
				401	{
				402	case 'r': return CAR;
				403	case 't': return TAB;
				404	case 'e': return ESC;
				405	case 'b': return BS;
				406	}
				407	return c;
				408	}
				409
				410	/*
				411	* Check for a character class name. "pp" points to the '['.
				412	* Returns one of the CLASS_ items. CLASS_NONE means that no item was
				413	* recognized. Otherwise "pp" is advanced to after the item.
				414	*/
				415	static int
				416	skip_class_name(pp)
				417	char_u **pp;
				418	{
				419	static const char *(class_names[]) =
				420	{
				421	"alnum:]",
				422	#define CLASS_ALNUM 0
				423	"alpha:]",
				424	#define CLASS_ALPHA 1
				425	"blank:]",
				426	#define CLASS_BLANK 2
				427	"cntrl:]",
				428	#define CLASS_CNTRL 3
				429	"digit:]",
				430	#define CLASS_DIGIT 4
				431	"graph:]",
				432	#define CLASS_GRAPH 5
				433	"lower:]",
				434	#define CLASS_LOWER 6
				435	"print:]",
				436	#define CLASS_PRINT 7
				437	"punct:]",
				438	#define CLASS_PUNCT 8
				439	"space:]",
				440	#define CLASS_SPACE 9
				441	"upper:]",
				442	#define CLASS_UPPER 10
				443	"xdigit:]",
				444	#define CLASS_XDIGIT 11
				445	"tab:]",
				446	#define CLASS_TAB 12
				447	"return:]",
				448	#define CLASS_RETURN 13
				449	"backspace:]",
				450	#define CLASS_BACKSPACE 14
				451	"escape:]",
				452	#define CLASS_ESCAPE 15
				453	};
				454	#define CLASS_NONE 99
				455	int i;
				456
				457	if ((*pp)[1] == ':')
				458	{
				459	for (i = 0; i < sizeof(class_names) / sizeof(*class_names); ++i)
				460	if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
				461	{
				462	*pp += STRLEN(class_names[i]) + 2;
				463	return i;
				464	}
				465	}
				466	return CLASS_NONE;
				467	}
				468
				469	/*
				470	* Skip over a "[]" range.
				471	* "p" must point to the character after the '['.
				472	* The returned pointer is on the matching ']', or the terminating NUL.
				473	*/
				474	static char_u *
				475	skip_anyof(p)
				476	char_u *p;
				477	{
				478	int cpo_lit; /* 'cpoptions' contains 'l' flag */
				479	#ifdef FEAT_MBYTE
				480	int l;
				481	#endif
				482
				483	cpo_lit = (!reg_syn && vim_strchr(p_cpo, CPO_LITERAL) != NULL);
				484
				485	if (p == '^') / Complement of range. */
				486	++p;
				487	if (p == ']' \|\| p == '-')
				488	++p;
				489	while (p != NUL && p != ']')
				490	{
				491	#ifdef FEAT_MBYTE
				492	if (has_mbyte && (l = (*mb_ptr2len_check)(p)) > 1)
				493	p += l;
				494	else
				495	#endif
				496	if (*p == '-')
				497	{
				498	++p;
				499	if (p != ']' && p != NUL)
				500	{
				501	#ifdef FEAT_MBYTE
				502	if (has_mbyte)
				503	p += (*mb_ptr2len_check)(p);
				504	else
				505	#endif
				506	++p;
				507	}
				508	}
				509	else if (*p == '\\'
				510	&& (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
				511	\|\| (!cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
				512	p += 2;
				513	else if (*p == '[')
				514	{
				515	if (skip_class_name(&p) == CLASS_NONE)
				516	++p; /* It was not a class name */
				517	}
				518	else
				519	++p;
				520	}
				521
				522	return p;
				523	}
				524
				525	/*
				526	* Specific version of character class functions.
				527	* Using a table to keep this fast.
				528	*/
				529	static short class_tab[256];
				530
				531	#define RI_DIGIT 0x01
				532	#define RI_HEX 0x02
				533	#define RI_OCTAL 0x04
				534	#define RI_WORD 0x08
				535	#define RI_HEAD 0x10
				536	#define RI_ALPHA 0x20
				537	#define RI_LOWER 0x40
				538	#define RI_UPPER 0x80
				539	#define RI_WHITE 0x100
				540
				541	static void
				542	init_class_tab()
				543	{
				544	int i;
				545	static int done = FALSE;
				546
				547	if (done)
				548	return;
				549
				550	for (i = 0; i < 256; ++i)
				551	{
				552	if (i >= '0' && i <= '7')
				553	class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
				554	else if (i >= '8' && i <= '9')
				555	class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
				556	else if (i >= 'a' && i <= 'f')
				557	class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
				558	#ifdef EBCDIC
				559	else if ((i >= 'g' && i <= 'i') \|\| (i >= 'j' && i <= 'r')
				560	\|\| (i >= 's' && i <= 'z'))
				561	#else
				562	else if (i >= 'g' && i <= 'z')
				563	#endif
				564	class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
				565	else if (i >= 'A' && i <= 'F')
				566	class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
				567	#ifdef EBCDIC
				568	else if ((i >= 'G' && i <= 'I') \|\| ( i >= 'J' && i <= 'R')
				569	\|\| (i >= 'S' && i <= 'Z'))
				570	#else
				571	else if (i >= 'G' && i <= 'Z')
				572	#endif
				573	class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
				574	else if (i == '_')
				575	class_tab[i] = RI_WORD + RI_HEAD;
				576	else
				577	class_tab[i] = 0;
				578	}
				579	class_tab[' '] \|= RI_WHITE;
				580	class_tab['\t'] \|= RI_WHITE;
				581	done = TRUE;
				582	}
				583
				584	#ifdef FEAT_MBYTE
				585	# define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT))
				586	# define ri_hex(c) (c < 0x100 && (class_tab[c] & RI_HEX))
				587	# define ri_octal(c) (c < 0x100 && (class_tab[c] & RI_OCTAL))
				588	# define ri_word(c) (c < 0x100 && (class_tab[c] & RI_WORD))
				589	# define ri_head(c) (c < 0x100 && (class_tab[c] & RI_HEAD))
				590	# define ri_alpha(c) (c < 0x100 && (class_tab[c] & RI_ALPHA))
				591	# define ri_lower(c) (c < 0x100 && (class_tab[c] & RI_LOWER))
				592	# define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER))
				593	# define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE))
				594	#else
				595	# define ri_digit(c) (class_tab[c] & RI_DIGIT)
				596	# define ri_hex(c) (class_tab[c] & RI_HEX)
				597	# define ri_octal(c) (class_tab[c] & RI_OCTAL)
				598	# define ri_word(c) (class_tab[c] & RI_WORD)
				599	# define ri_head(c) (class_tab[c] & RI_HEAD)
				600	# define ri_alpha(c) (class_tab[c] & RI_ALPHA)
				601	# define ri_lower(c) (class_tab[c] & RI_LOWER)
				602	# define ri_upper(c) (class_tab[c] & RI_UPPER)
				603	# define ri_white(c) (class_tab[c] & RI_WHITE)
				604	#endif
				605
				606	/* flags for regflags */
				607	#define RF_ICASE 1 /* ignore case */
				608	#define RF_NOICASE 2 /* don't ignore case */
				609	#define RF_HASNL 4 /* can match a NL */
				610	#define RF_ICOMBINE 8 /* ignore combining characters */
				611	#define RF_LOOKBH 16 /* uses "\@<=" or "\@<!" */
				612
				613	/*
				614	* Global work variables for vim_regcomp().
				615	*/
				616
				617	static char_u regparse; / Input-scan pointer. */
				618	static int prevchr_len; /* byte length of previous char */
				619	static int num_complex_braces; /* Complex \{...} count */
				620	static int regnpar; /* () count. */
				621	#ifdef FEAT_SYN_HL
				622	static int regnzpar; /* \z() count. */
				623	static int re_has_z; /* \z item detected */
				624	#endif
				625	static char_u regcode; / Code-emit pointer, or JUST_CALC_SIZE */
				626	static long regsize; /* Code size. */
				627	static char_u had_endbrace[NSUBEXP]; /* flags, TRUE if end of () found */
				628	static unsigned regflags; /* RF_ flags for prog */
				629	static long brace_min[10]; /* Minimums for complex brace repeats */
				630	static long brace_max[10]; /* Maximums for complex brace repeats */
				631	static int brace_count[10]; /* Current counts for complex brace repeats */
				632	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				633	static int had_eol; /* TRUE when EOL found by vim_regcomp() */
				634	#endif
				635	static int one_exactly = FALSE; /* only do one char for EXACTLY */
				636
				637	static int reg_magic; /* magicness of the pattern: */
				638	#define MAGIC_NONE 1 /* "\V" very unmagic */
				639	#define MAGIC_OFF 2 /* "\M" or 'magic' off */
				640	#define MAGIC_ON 3 /* "\m" or 'magic' */
				641	#define MAGIC_ALL 4 /* "\v" very magic */
				642
				643	static int reg_string; /* matching with a string instead of a buffer
				644	line */
				645
				646	/*
				647	* META contains all characters that may be magic, except '^' and '$'.
				648	*/
				649
				650	#ifdef EBCDIC
				651	static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{\|~";
				652	#else
				653	/* META[] is used often enough to justify turning it into a table. */
				654	static char_u META_flags[] = {
				655	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				656	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				657	/* % & ( ) * + . */
				658	0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
				659	/* 1 2 3 4 5 6 7 8 9 < = > ? */
				660	0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
				661	/* @ A C D F H I K L M O */
				662	1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
				663	/* P S U V W X Z [ _ */
				664	1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
				665	/* a c d f h i k l m n o */
				666	0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
				667	/* p s u v w x z { \| ~ */
				668	1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
				669	};
				670	#endif
				671
				672	static int curchr;
				673
				674	/* arguments for reg() */
				675	#define REG_NOPAREN 0 /* toplevel reg() */
				676	#define REG_PAREN 1 /* */
				677	#define REG_ZPAREN 2 /* \z(\) */
				678	#define REG_NPAREN 3 /* \%(\) */
				679
				680	/*
				681	* Forward declarations for vim_regcomp()'s friends.
				682	*/
				683	static void initchr __ARGS((char_u *));
				684	static int getchr __ARGS((void));
				685	static void skipchr_keepstart __ARGS((void));
				686	static int peekchr __ARGS((void));
				687	static void skipchr __ARGS((void));
				688	static void ungetchr __ARGS((void));
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	689	static int gethexchrs __ARGS((int maxinputlen));
				690	static int getoctchrs __ARGS((void));
				691	static int getdecchrs __ARGS((void));
				692	static int coll_get_char __ARGS((void));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	693	static void regcomp_start __ARGS((char_u *expr, int flags));
				694	static char_u reg __ARGS((int, int ));
				695	static char_u regbranch __ARGS((int flagp));
				696	static char_u regconcat __ARGS((int flagp));
				697	static char_u regpiece __ARGS((int ));
				698	static char_u regatom __ARGS((int ));
				699	static char_u *regnode __ARGS((int));
				700	static int prog_magic_wrong __ARGS((void));
				701	static char_u regnext __ARGS((char_u ));
				702	static void regc __ARGS((int b));
				703	#ifdef FEAT_MBYTE
				704	static void regmbc __ARGS((int c));
				705	#endif
				706	static void reginsert __ARGS((int, char_u *));
				707	static void reginsert_limits __ARGS((int, long, long, char_u *));
				708	static char_u re_put_long __ARGS((char_u pr, long_u val));
				709	static int read_limits __ARGS((long , long ));
				710	static void regtail __ARGS((char_u , char_u ));
				711	static void regoptail __ARGS((char_u , char_u ));
				712
				713	/*
				714	* Return TRUE if compiled regular expression "prog" can match a line break.
				715	*/
				716	int
				717	re_multiline(prog)
				718	regprog_T *prog;
				719	{
				720	return (prog->regflags & RF_HASNL);
				721	}
				722
				723	/*
				724	* Return TRUE if compiled regular expression "prog" looks before the start
				725	* position (pattern contains "\@<=" or "\@<!").
				726	*/
				727	int
				728	re_lookbehind(prog)
				729	regprog_T *prog;
				730	{
				731	return (prog->regflags & RF_LOOKBH);
				732	}
				733
				734	/*
				735	* Skip past regular expression.
				736	* Stop at end of 'p' of where 'dirc' is found ('/', '?', etc).
				737	* Take care of characters with a backslash in front of it.
				738	* Skip strings inside [ and ].
				739	* When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
				740	* expression and change "\?" to "?". If "*newp" is not NULL the expression
				741	* is changed in-place.
				742	*/
				743	char_u *
				744	skip_regexp(startp, dirc, magic, newp)
				745	char_u *startp;
				746	int dirc;
				747	int magic;
				748	char_u **newp;
				749	{
				750	int mymagic;
				751	char_u *p = startp;
				752
				753	if (magic)
				754	mymagic = MAGIC_ON;
				755	else
				756	mymagic = MAGIC_OFF;
				757
				758	for (; p[0] != NUL; ++p)
				759	{
				760	if (p[0] == dirc) /* found end of regexp */
				761	break;
				762	if ((p[0] == '[' && mymagic >= MAGIC_ON)
				763	\|\| (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
				764	{
				765	p = skip_anyof(p + 1);
				766	if (p[0] == NUL)
				767	break;
				768	}
				769	else if (p[0] == '\\' && p[1] != NUL)
				770	{
				771	if (dirc == '?' && newp != NULL && p[1] == '?')
				772	{
				773	/* change "\?" to "?", make a copy first. */
				774	if (*newp == NULL)
				775	{
				776	*newp = vim_strsave(startp);
				777	if (*newp != NULL)
				778	p = *newp + (p - startp);
				779	}
				780	if (*newp != NULL)
				781	mch_memmove(p, p + 1, STRLEN(p));
				782	else
				783	++p;
				784	}
				785	else
				786	++p; /* skip next character */
				787	if (*p == 'v')
				788	mymagic = MAGIC_ALL;
				789	else if (*p == 'V')
				790	mymagic = MAGIC_NONE;
				791	}
				792	#ifdef FEAT_MBYTE
				793	else if (has_mbyte)
				794	p += (*mb_ptr2len_check)(p) - 1;
				795	#endif
				796	}
				797	return p;
				798	}
				799
				800	/*
				801	* vim_regcomp - compile a regular expression into internal code
				802	*
				803	* We can't allocate space until we know how big the compiled form will be,
				804	* but we can't compile it (and thus know how big it is) until we've got a
				805	* place to put the code. So we cheat: we compile it twice, once with code
				806	* generation turned off and size counting turned on, and once "for real".
				807	* This also means that we don't allocate space until we are sure that the
				808	* thing really will compile successfully, and we never have to move the
				809	* code and thus invalidate pointers into it. (Note that it has to be in
				810	* one piece because vim_free() must be able to free it all.)
				811	*
				812	* Whether upper/lower case is to be ignored is decided when executing the
				813	* program, it does not matter here.
				814	*
				815	* Beware that the optimization-preparation code in here knows about some
				816	* of the structure of the compiled regexp.
				817	* "re_flags": RE_MAGIC and/or RE_STRING.
				818	*/
				819	regprog_T *
				820	vim_regcomp(expr, re_flags)
				821	char_u *expr;
				822	int re_flags;
				823	{
				824	regprog_T *r;
				825	char_u *scan;
				826	char_u *longest;
				827	int len;
				828	int flags;
				829
				830	if (expr == NULL)
				831	EMSG_RET_NULL(_(e_null));
				832
				833	init_class_tab();
				834
				835	/*
				836	* First pass: determine size, legality.
				837	*/
				838	regcomp_start(expr, re_flags);
				839	regcode = JUST_CALC_SIZE;
				840	regc(REGMAGIC);
				841	if (reg(REG_NOPAREN, &flags) == NULL)
				842	return NULL;
				843
				844	/* Small enough for pointer-storage convention? */
				845	#ifdef SMALL_MALLOC /* 16 bit storage allocation */
				846	if (regsize >= 65536L - 256L)
				847	EMSG_RET_NULL(_("E339: Pattern too long"));
				848	#endif
				849
				850	/* Allocate space. */
				851	r = (regprog_T *)lalloc(sizeof(regprog_T) + regsize, TRUE);
				852	if (r == NULL)
				853	return NULL;
				854
				855	/*
				856	* Second pass: emit code.
				857	*/
				858	regcomp_start(expr, re_flags);
				859	regcode = r->program;
				860	regc(REGMAGIC);
				861	if (reg(REG_NOPAREN, &flags) == NULL)
				862	{
				863	vim_free(r);
				864	return NULL;
				865	}
				866
				867	/* Dig out information for optimizations. */
				868	r->regstart = NUL; /* Worst-case defaults. */
				869	r->reganch = 0;
				870	r->regmust = NULL;
				871	r->regmlen = 0;
				872	r->regflags = regflags;
				873	if (flags & HASNL)
				874	r->regflags \|= RF_HASNL;
				875	if (flags & HASLOOKBH)
				876	r->regflags \|= RF_LOOKBH;
				877	#ifdef FEAT_SYN_HL
				878	/* Remember whether this pattern has any \z specials in it. */
				879	r->reghasz = re_has_z;
				880	#endif
				881	scan = r->program + 1; /* First BRANCH. */
				882	if (OP(regnext(scan)) == END) /* Only one top-level choice. */
				883	{
				884	scan = OPERAND(scan);
				885
				886	/* Starting-point info. */
				887	if (OP(scan) == BOL \|\| OP(scan) == RE_BOF)
				888	{
				889	r->reganch++;
				890	scan = regnext(scan);
				891	}
				892
				893	if (OP(scan) == EXACTLY)
				894	{
				895	#ifdef FEAT_MBYTE
				896	if (has_mbyte)
				897	r->regstart = (*mb_ptr2char)(OPERAND(scan));
				898	else
				899	#endif
				900	r->regstart = *OPERAND(scan);
				901	}
				902	else if ((OP(scan) == BOW
				903	\|\| OP(scan) == EOW
				904	\|\| OP(scan) == NOTHING
				905	\|\| OP(scan) == MOPEN + 0 \|\| OP(scan) == NOPEN
				906	\|\| OP(scan) == MCLOSE + 0 \|\| OP(scan) == NCLOSE)
				907	&& OP(regnext(scan)) == EXACTLY)
				908	{
				909	#ifdef FEAT_MBYTE
				910	if (has_mbyte)
				911	r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
				912	else
				913	#endif
				914	r->regstart = *OPERAND(regnext(scan));
				915	}
				916
				917	/*
				918	* If there's something expensive in the r.e., find the longest
				919	* literal string that must appear and make it the regmust. Resolve
				920	* ties in favor of later strings, since the regstart check works
				921	* with the beginning of the r.e. and avoiding duplication
				922	* strengthens checking. Not a strong reason, but sufficient in the
				923	* absence of others.
				924	*/
				925	/*
				926	* When the r.e. starts with BOW, it is faster to look for a regmust
				927	* first. Used a lot for "#" and "*" commands. (Added by mool).
				928	*/
				929	if ((flags & SPSTART \|\| OP(scan) == BOW \|\| OP(scan) == EOW)
				930	&& !(flags & HASNL))
				931	{
				932	longest = NULL;
				933	len = 0;
				934	for (; scan != NULL; scan = regnext(scan))
				935	if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
				936	{
				937	longest = OPERAND(scan);
				938	len = (int)STRLEN(OPERAND(scan));
				939	}
				940	r->regmust = longest;
				941	r->regmlen = len;
				942	}
				943	}
				944	#ifdef DEBUG
				945	regdump(expr, r);
				946	#endif
				947	return r;
				948	}
				949
				950	/*
				951	* Setup to parse the regexp. Used once to get the length and once to do it.
				952	*/
				953	static void
				954	regcomp_start(expr, re_flags)
				955	char_u *expr;
				956	int re_flags; /* see vim_regcomp() */
				957	{
				958	initchr(expr);
				959	if (re_flags & RE_MAGIC)
				960	reg_magic = MAGIC_ON;
				961	else
				962	reg_magic = MAGIC_OFF;
				963	reg_string = (re_flags & RE_STRING);
				964
				965	num_complex_braces = 0;
				966	regnpar = 1;
				967	vim_memset(had_endbrace, 0, sizeof(had_endbrace));
				968	#ifdef FEAT_SYN_HL
				969	regnzpar = 1;
				970	re_has_z = 0;
				971	#endif
				972	regsize = 0L;
				973	regflags = 0;
				974	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				975	had_eol = FALSE;
				976	#endif
				977	}
				978
				979	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				980	/*
				981	* Check if during the previous call to vim_regcomp the EOL item "$" has been
				982	* found. This is messy, but it works fine.
				983	*/
				984	int
				985	vim_regcomp_had_eol()
				986	{
				987	return had_eol;
				988	}
				989	#endif
				990
				991	/*
				992	* reg - regular expression, i.e. main body or parenthesized thing
				993	*
				994	* Caller must absorb opening parenthesis.
				995	*
				996	* Combining parenthesis handling with the base level of regular expression
				997	* is a trifle forced, but the need to tie the tails of the branches to what
				998	* follows makes it hard to avoid.
				999	*/
				1000	static char_u *
				1001	reg(paren, flagp)
				1002	int paren; /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
				1003	int *flagp;
				1004	{
				1005	char_u *ret;
				1006	char_u *br;
				1007	char_u *ender;
				1008	int parno = 0;
				1009	int flags;
				1010
				1011	flagp = HASWIDTH; / Tentatively. */
				1012
				1013	#ifdef FEAT_SYN_HL
				1014	if (paren == REG_ZPAREN)
				1015	{
				1016	/* Make a ZOPEN node. */
				1017	if (regnzpar >= NSUBEXP)
				1018	EMSG_RET_NULL(_("E50: Too many \\z("));
				1019	parno = regnzpar;
				1020	regnzpar++;
				1021	ret = regnode(ZOPEN + parno);
				1022	}
				1023	else
				1024	#endif
				1025	if (paren == REG_PAREN)
				1026	{
				1027	/* Make a MOPEN node. */
				1028	if (regnpar >= NSUBEXP)
				1029	EMSG_M_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
				1030	parno = regnpar;
				1031	++regnpar;
				1032	ret = regnode(MOPEN + parno);
				1033	}
				1034	else if (paren == REG_NPAREN)
				1035	{
				1036	/* Make a NOPEN node. */
				1037	ret = regnode(NOPEN);
				1038	}
				1039	else
				1040	ret = NULL;
				1041
				1042	/* Pick up the branches, linking them together. */
				1043	br = regbranch(&flags);
				1044	if (br == NULL)
				1045	return NULL;
				1046	if (ret != NULL)
				1047	regtail(ret, br); /* [MZ]OPEN -> first. */
				1048	else
				1049	ret = br;
				1050	/* If one of the branches can be zero-width, the whole thing can.
				1051	* If one of the branches has * at start or matches a line-break, the
				1052	* whole thing can. */
				1053	if (!(flags & HASWIDTH))
				1054	*flagp &= ~HASWIDTH;
				1055	*flagp \|= flags & (SPSTART \| HASNL \| HASLOOKBH);
				1056	while (peekchr() == Magic('\|'))
				1057	{
				1058	skipchr();
				1059	br = regbranch(&flags);
				1060	if (br == NULL)
				1061	return NULL;
				1062	regtail(ret, br); /* BRANCH -> BRANCH. */
				1063	if (!(flags & HASWIDTH))
				1064	*flagp &= ~HASWIDTH;
				1065	*flagp \|= flags & (SPSTART \| HASNL \| HASLOOKBH);
				1066	}
				1067
				1068	/* Make a closing node, and hook it on the end. */
				1069	ender = regnode(
				1070	#ifdef FEAT_SYN_HL
				1071	paren == REG_ZPAREN ? ZCLOSE + parno :
				1072	#endif
				1073	paren == REG_PAREN ? MCLOSE + parno :
				1074	paren == REG_NPAREN ? NCLOSE : END);
				1075	regtail(ret, ender);
				1076
				1077	/* Hook the tails of the branches to the closing node. */
				1078	for (br = ret; br != NULL; br = regnext(br))
				1079	regoptail(br, ender);
				1080
				1081	/* Check for proper termination. */
				1082	if (paren != REG_NOPAREN && getchr() != Magic(')'))
				1083	{
				1084	#ifdef FEAT_SYN_HL
				1085	if (paren == REG_ZPAREN)
				1086	EMSG_RET_NULL(_("E52: Unmatched \\z("))
				1087	else
				1088	#endif
				1089	if (paren == REG_NPAREN)
				1090	EMSG_M_RET_NULL(_("E53: Unmatched %s%%("), reg_magic == MAGIC_ALL)
				1091	else
				1092	EMSG_M_RET_NULL(_("E54: Unmatched %s("), reg_magic == MAGIC_ALL)
				1093	}
				1094	else if (paren == REG_NOPAREN && peekchr() != NUL)
				1095	{
				1096	if (curchr == Magic(')'))
				1097	EMSG_M_RET_NULL(_("E55: Unmatched %s)"), reg_magic == MAGIC_ALL)
				1098	else
				1099	EMSG_RET_NULL(_(e_trailing)) /* "Can't happen". */
				1100	/* NOTREACHED */
				1101	}
				1102	/*
				1103	* Here we set the flag allowing back references to this set of
				1104	* parentheses.
				1105	*/
				1106	if (paren == REG_PAREN)
				1107	had_endbrace[parno] = TRUE; /* have seen the close paren */
				1108	return ret;
				1109	}
				1110
				1111	/*
				1112	* regbranch - one alternative of an \| operator
				1113	*
				1114	* Implements the & operator.
				1115	*/
				1116	static char_u *
				1117	regbranch(flagp)
				1118	int *flagp;
				1119	{
				1120	char_u *ret;
				1121	char_u *chain = NULL;
				1122	char_u *latest;
				1123	int flags;
				1124
				1125	flagp = WORST \| HASNL; / Tentatively. */
				1126
				1127	ret = regnode(BRANCH);
				1128	for (;;)
				1129	{
				1130	latest = regconcat(&flags);
				1131	if (latest == NULL)
				1132	return NULL;
				1133	/* If one of the branches has width, the whole thing has. If one of
				1134	* the branches anchors at start-of-line, the whole thing does.
				1135	* If one of the branches uses look-behind, the whole thing does. */
				1136	*flagp \|= flags & (HASWIDTH \| SPSTART \| HASLOOKBH);
				1137	/* If one of the branches doesn't match a line-break, the whole thing
				1138	* doesn't. */
				1139	*flagp &= ~HASNL \| (flags & HASNL);
				1140	if (chain != NULL)
				1141	regtail(chain, latest);
				1142	if (peekchr() != Magic('&'))
				1143	break;
				1144	skipchr();
				1145	regtail(latest, regnode(END)); /* operand ends */
				1146	reginsert(MATCH, latest);
				1147	chain = latest;
				1148	}
				1149
				1150	return ret;
				1151	}
				1152
				1153	/*
				1154	* regbranch - one alternative of an \| or & operator
				1155	*
				1156	* Implements the concatenation operator.
				1157	*/
				1158	static char_u *
				1159	regconcat(flagp)
				1160	int *flagp;
				1161	{
				1162	char_u *first = NULL;
				1163	char_u *chain = NULL;
				1164	char_u *latest;
				1165	int flags;
				1166	int cont = TRUE;
				1167
				1168	flagp = WORST; / Tentatively. */
				1169
				1170	while (cont)
				1171	{
				1172	switch (peekchr())
				1173	{
				1174	case NUL:
				1175	case Magic('\|'):
				1176	case Magic('&'):
				1177	case Magic(')'):
				1178	cont = FALSE;
				1179	break;
				1180	case Magic('Z'):
				1181	#ifdef FEAT_MBYTE
				1182	regflags \|= RF_ICOMBINE;
				1183	#endif
				1184	skipchr_keepstart();
				1185	break;
				1186	case Magic('c'):
				1187	regflags \|= RF_ICASE;
				1188	skipchr_keepstart();
				1189	break;
				1190	case Magic('C'):
				1191	regflags \|= RF_NOICASE;
				1192	skipchr_keepstart();
				1193	break;
				1194	case Magic('v'):
				1195	reg_magic = MAGIC_ALL;
				1196	skipchr_keepstart();
				1197	curchr = -1;
				1198	break;
				1199	case Magic('m'):
				1200	reg_magic = MAGIC_ON;
				1201	skipchr_keepstart();
				1202	curchr = -1;
				1203	break;
				1204	case Magic('M'):
				1205	reg_magic = MAGIC_OFF;
				1206	skipchr_keepstart();
				1207	curchr = -1;
				1208	break;
				1209	case Magic('V'):
				1210	reg_magic = MAGIC_NONE;
				1211	skipchr_keepstart();
				1212	curchr = -1;
				1213	break;
				1214	default:
				1215	latest = regpiece(&flags);
				1216	if (latest == NULL)
				1217	return NULL;
				1218	*flagp \|= flags & (HASWIDTH \| HASNL \| HASLOOKBH);
				1219	if (chain == NULL) /* First piece. */
				1220	*flagp \|= flags & SPSTART;
				1221	else
				1222	regtail(chain, latest);
				1223	chain = latest;
				1224	if (first == NULL)
				1225	first = latest;
				1226	break;
				1227	}
				1228	}
				1229	if (first == NULL) /* Loop ran zero times. */
				1230	first = regnode(NOTHING);
				1231	return first;
				1232	}
				1233
				1234	/*
				1235	* regpiece - something followed by possible [*+=]
				1236	*
				1237	* Note that the branching code sequences used for = and the general cases
				1238	* of * and + are somewhat optimized: they use the same NOTHING node as
				1239	* both the endmarker for their branch list and the body of the last branch.
				1240	* It might seem that this node could be dispensed with entirely, but the
				1241	* endmarker role is not redundant.
				1242	*/
				1243	static char_u *
				1244	regpiece(flagp)
				1245	int *flagp;
				1246	{
				1247	char_u *ret;
				1248	int op;
				1249	char_u *next;
				1250	int flags;
				1251	long minval;
				1252	long maxval;
				1253
				1254	ret = regatom(&flags);
				1255	if (ret == NULL)
				1256	return NULL;
				1257
				1258	op = peekchr();
				1259	if (re_multi_type(op) == NOT_MULTI)
				1260	{
				1261	*flagp = flags;
				1262	return ret;
				1263	}
				1264	if (!(flags & HASWIDTH) && re_multi_type(op) == MULTI_MULT)
				1265	{
				1266	if (op == Magic('*'))
				1267	EMSG_M_RET_NULL(_("E56: %s* operand could be empty"),
				1268	reg_magic >= MAGIC_ON);
				1269	if (op == Magic('+'))
				1270	EMSG_M_RET_NULL(_("E57: %s+ operand could be empty"),
				1271	reg_magic == MAGIC_ALL);
				1272	/* "\{}" is checked below, it's allowed when there is an upper limit */
				1273	}
				1274	/* default flags */
				1275	*flagp = (WORST \| SPSTART \| (flags & (HASNL \| HASLOOKBH)));
				1276
				1277	skipchr();
				1278	switch (op)
				1279	{
				1280	case Magic('*'):
				1281	if (flags & SIMPLE)
				1282	reginsert(STAR, ret);
				1283	else
				1284	{
				1285	/* Emit x* as (x&\|), where & means "self". */
				1286	reginsert(BRANCH, ret); /* Either x */
				1287	regoptail(ret, regnode(BACK)); /* and loop */
				1288	regoptail(ret, ret); /* back */
				1289	regtail(ret, regnode(BRANCH)); /* or */
				1290	regtail(ret, regnode(NOTHING)); /* null. */
				1291	}
				1292	break;
				1293
				1294	case Magic('+'):
				1295	if (flags & SIMPLE)
				1296	reginsert(PLUS, ret);
				1297	else
				1298	{
				1299	/* Emit x+ as x(&\|), where & means "self". */
				1300	next = regnode(BRANCH); /* Either */
				1301	regtail(ret, next);
				1302	regtail(regnode(BACK), ret); /* loop back */
				1303	regtail(next, regnode(BRANCH)); /* or */
				1304	regtail(ret, regnode(NOTHING)); /* null. */
				1305	}
				1306	*flagp = (WORST \| HASWIDTH \| (flags & (HASNL \| HASLOOKBH)));
				1307	break;
				1308
				1309	case Magic('@'):
				1310	{
				1311	int lop = END;
				1312
				1313	switch (no_Magic(getchr()))
				1314	{
				1315	case '=': lop = MATCH; break; /* \@= */
				1316	case '!': lop = NOMATCH; break; /* \@! */
				1317	case '>': lop = SUBPAT; break; /* \@> */
				1318	case '<': switch (no_Magic(getchr()))
				1319	{
				1320	case '=': lop = BEHIND; break; /* \@<= */
				1321	case '!': lop = NOBEHIND; break; /* \@<! */
				1322	}
				1323	}
				1324	if (lop == END)
				1325	EMSG_M_RET_NULL(_("E59: invalid character after %s@"),
				1326	reg_magic == MAGIC_ALL);
				1327	/* Look behind must match with behind_pos. */
				1328	if (lop == BEHIND \|\| lop == NOBEHIND)
				1329	{
				1330	regtail(ret, regnode(BHPOS));
				1331	*flagp \|= HASLOOKBH;
				1332	}
				1333	regtail(ret, regnode(END)); /* operand ends */
				1334	reginsert(lop, ret);
				1335	break;
				1336	}
				1337
				1338	case Magic('?'):
				1339	case Magic('='):
				1340	/* Emit x= as (x\|) */
				1341	reginsert(BRANCH, ret); /* Either x */
				1342	regtail(ret, regnode(BRANCH)); /* or */
				1343	next = regnode(NOTHING); /* null. */
				1344	regtail(ret, next);
				1345	regoptail(ret, next);
				1346	break;
				1347
				1348	case Magic('{'):
				1349	if (!read_limits(&minval, &maxval))
				1350	return NULL;
				1351	if (!(flags & HASWIDTH) && (maxval > minval
				1352	? maxval >= MAX_LIMIT : minval >= MAX_LIMIT))
				1353	EMSG_M_RET_NULL(_("E58: %s{ operand could be empty"),
				1354	reg_magic == MAGIC_ALL);
				1355	if (flags & SIMPLE)
				1356	{
				1357	reginsert(BRACE_SIMPLE, ret);
				1358	reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
				1359	}
				1360	else
				1361	{
				1362	if (num_complex_braces >= 10)
				1363	EMSG_M_RET_NULL(_("E60: Too many complex %s{...}s"),
				1364	reg_magic == MAGIC_ALL);
				1365	reginsert(BRACE_COMPLEX + num_complex_braces, ret);
				1366	regoptail(ret, regnode(BACK));
				1367	regoptail(ret, ret);
				1368	reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
				1369	++num_complex_braces;
				1370	}
				1371	if (minval > 0 && maxval > 0)
				1372	*flagp = (HASWIDTH \| (flags & (HASNL \| HASLOOKBH)));
				1373	break;
				1374	}
				1375	if (re_multi_type(peekchr()) != NOT_MULTI)
				1376	{
				1377	/* Can't have a multi follow a multi. */
				1378	if (peekchr() == Magic('*'))
				1379	sprintf((char )IObuff, _("E61: Nested %s"),
				1380	reg_magic >= MAGIC_ON ? "" : "\\");
				1381	else
				1382	sprintf((char *)IObuff, _("E62: Nested %s%c"),
				1383	reg_magic == MAGIC_ALL ? "" : "\\", no_Magic(peekchr()));
				1384	EMSG_RET_NULL(IObuff);
				1385	}
				1386
				1387	return ret;
				1388	}
				1389
				1390	/*
				1391	* regatom - the lowest level
				1392	*
				1393	* Optimization: gobbles an entire sequence of ordinary characters so that
				1394	* it can turn them into a single node, which is smaller to store and
				1395	* faster to run. Don't do this when one_exactly is set.
				1396	*/
				1397	static char_u *
				1398	regatom(flagp)
				1399	int *flagp;
				1400	{
				1401	char_u *ret;
				1402	int flags;
				1403	int cpo_lit; /* 'cpoptions' contains 'l' flag */
				1404	int c;
				1405	static char_u classchars = (char_u )".iIkKfFpPsSdDxXoOwWhHaAlLuU";
				1406	static int classcodes[] = {ANY, IDENT, SIDENT, KWORD, SKWORD,
				1407	FNAME, SFNAME, PRINT, SPRINT,
				1408	WHITE, NWHITE, DIGIT, NDIGIT,
				1409	HEX, NHEX, OCTAL, NOCTAL,
				1410	WORD, NWORD, HEAD, NHEAD,
				1411	ALPHA, NALPHA, LOWER, NLOWER,
				1412	UPPER, NUPPER
				1413	};
				1414	char_u *p;
				1415	int extra = 0;
				1416
				1417	flagp = WORST; / Tentatively. */
				1418	cpo_lit = (!reg_syn && vim_strchr(p_cpo, CPO_LITERAL) != NULL);
				1419
				1420	c = getchr();
				1421	switch (c)
				1422	{
				1423	case Magic('^'):
				1424	ret = regnode(BOL);
				1425	break;
				1426
				1427	case Magic('$'):
				1428	ret = regnode(EOL);
				1429	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				1430	had_eol = TRUE;
				1431	#endif
				1432	break;
				1433
				1434	case Magic('<'):
				1435	ret = regnode(BOW);
				1436	break;
				1437
				1438	case Magic('>'):
				1439	ret = regnode(EOW);
				1440	break;
				1441
				1442	case Magic('_'):
				1443	c = no_Magic(getchr());
				1444	if (c == '^') /* "\_^" is start-of-line */
				1445	{
				1446	ret = regnode(BOL);
				1447	break;
				1448	}
				1449	if (c == '$') /* "\_$" is end-of-line */
				1450	{
				1451	ret = regnode(EOL);
				1452	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				1453	had_eol = TRUE;
				1454	#endif
				1455	break;
				1456	}
				1457
				1458	extra = ADD_NL;
				1459	*flagp \|= HASNL;
				1460
				1461	/* "\_[" is character range plus newline */
				1462	if (c == '[')
				1463	goto collection;
				1464
				1465	/* "\_x" is character class plus newline */
				1466	/FALLTHROUGH/
				1467
				1468	/*
				1469	* Character classes.
				1470	*/
				1471	case Magic('.'):
				1472	case Magic('i'):
				1473	case Magic('I'):
				1474	case Magic('k'):
				1475	case Magic('K'):
				1476	case Magic('f'):
				1477	case Magic('F'):
				1478	case Magic('p'):
				1479	case Magic('P'):
				1480	case Magic('s'):
				1481	case Magic('S'):
				1482	case Magic('d'):
				1483	case Magic('D'):
				1484	case Magic('x'):
				1485	case Magic('X'):
				1486	case Magic('o'):
				1487	case Magic('O'):
				1488	case Magic('w'):
				1489	case Magic('W'):
				1490	case Magic('h'):
				1491	case Magic('H'):
				1492	case Magic('a'):
				1493	case Magic('A'):
				1494	case Magic('l'):
				1495	case Magic('L'):
				1496	case Magic('u'):
				1497	case Magic('U'):
				1498	p = vim_strchr(classchars, no_Magic(c));
				1499	if (p == NULL)
				1500	EMSG_RET_NULL(_("E63: invalid use of \\_"));
				1501	ret = regnode(classcodes[p - classchars] + extra);
				1502	*flagp \|= HASWIDTH \| SIMPLE;
				1503	break;
				1504
				1505	case Magic('n'):
				1506	if (reg_string)
				1507	{
				1508	/* In a string "\n" matches a newline character. */
				1509	ret = regnode(EXACTLY);
				1510	regc(NL);
				1511	regc(NUL);
				1512	*flagp \|= HASWIDTH \| SIMPLE;
				1513	}
				1514	else
				1515	{
				1516	/* In buffer text "\n" matches the end of a line. */
				1517	ret = regnode(NEWL);
				1518	*flagp \|= HASWIDTH \| HASNL;
				1519	}
				1520	break;
				1521
				1522	case Magic('('):
				1523	if (one_exactly)
				1524	EMSG_ONE_RET_NULL;
				1525	ret = reg(REG_PAREN, &flags);
				1526	if (ret == NULL)
				1527	return NULL;
				1528	*flagp \|= flags & (HASWIDTH \| SPSTART \| HASNL \| HASLOOKBH);
				1529	break;
				1530
				1531	case NUL:
				1532	case Magic('\|'):
				1533	case Magic('&'):
				1534	case Magic(')'):
				1535	EMSG_RET_NULL(_(e_internal)); /* Supposed to be caught earlier. */
				1536	/* NOTREACHED */
				1537
				1538	case Magic('='):
				1539	case Magic('?'):
				1540	case Magic('+'):
				1541	case Magic('@'):
				1542	case Magic('{'):
				1543	case Magic('*'):
				1544	c = no_Magic(c);
				1545	sprintf((char *)IObuff, _("E64: %s%c follows nothing"),
				1546	(c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL)
				1547	? "" : "\\", c);
				1548	EMSG_RET_NULL(IObuff);
				1549	/* NOTREACHED */
				1550
				1551	case Magic('~'): /* previous substitute pattern */
				1552	if (reg_prev_sub)
				1553	{
				1554	char_u *lp;
				1555
				1556	ret = regnode(EXACTLY);
				1557	lp = reg_prev_sub;
				1558	while (*lp != NUL)
				1559	regc(*lp++);
				1560	regc(NUL);
				1561	if (*reg_prev_sub != NUL)
				1562	{
				1563	*flagp \|= HASWIDTH;
				1564	if ((lp - reg_prev_sub) == 1)
				1565	*flagp \|= SIMPLE;
				1566	}
				1567	}
				1568	else
				1569	EMSG_RET_NULL(_(e_nopresub));
				1570	break;
				1571
				1572	case Magic('1'):
				1573	case Magic('2'):
				1574	case Magic('3'):
				1575	case Magic('4'):
				1576	case Magic('5'):
				1577	case Magic('6'):
				1578	case Magic('7'):
				1579	case Magic('8'):
				1580	case Magic('9'):
				1581	{
				1582	int refnum;
				1583
				1584	refnum = c - Magic('0');
				1585	/*
				1586	* Check if the back reference is legal. We must have seen the
				1587	* close brace.
				1588	* TODO: Should also check that we don't refer to something
				1589	* that is repeated (+*=): what instance of the repetition
				1590	* should we match?
				1591	*/
				1592	if (!had_endbrace[refnum])
				1593	{
				1594	/* Trick: check if "@<=" or "@<!" follows, in which case
				1595	* the \1 can appear before the referenced match. */
				1596	for (p = regparse; *p != NUL; ++p)
				1597	if (p[0] == '@' && p[1] == '<'
				1598	&& (p[2] == '!' \|\| p[2] == '='))
				1599	break;
				1600	if (*p == NUL)
				1601	EMSG_RET_NULL(_("E65: Illegal back reference"));
				1602	}
				1603	ret = regnode(BACKREF + refnum);
				1604	}
				1605	break;
				1606
				1607	#ifdef FEAT_SYN_HL
				1608	case Magic('z'):
				1609	{
				1610	c = no_Magic(getchr());
				1611	switch (c)
				1612	{
				1613	case '(': if (reg_do_extmatch != REX_SET)
				1614	EMSG_RET_NULL(_("E66: \\z( not allowed here"));
				1615	if (one_exactly)
				1616	EMSG_ONE_RET_NULL;
				1617	ret = reg(REG_ZPAREN, &flags);
				1618	if (ret == NULL)
				1619	return NULL;
				1620	*flagp \|= flags & (HASWIDTH\|SPSTART\|HASNL\|HASLOOKBH);
				1621	re_has_z = REX_SET;
				1622	break;
				1623
				1624	case '1':
				1625	case '2':
				1626	case '3':
				1627	case '4':
				1628	case '5':
				1629	case '6':
				1630	case '7':
				1631	case '8':
				1632	case '9': if (reg_do_extmatch != REX_USE)
				1633	EMSG_RET_NULL(_("E67: \\z1 et al. not allowed here"));
				1634	ret = regnode(ZREF + c - '0');
				1635	re_has_z = REX_USE;
				1636	break;
				1637
				1638	case 's': ret = regnode(MOPEN + 0);
				1639	break;
				1640
				1641	case 'e': ret = regnode(MCLOSE + 0);
				1642	break;
				1643
				1644	default: EMSG_RET_NULL(_("E68: Invalid character after \\z"));
				1645	}
				1646	}
				1647	break;
				1648	#endif
				1649
				1650	case Magic('%'):
				1651	{
				1652	c = no_Magic(getchr());
				1653	switch (c)
				1654	{
				1655	/* () without a back reference */
				1656	case '(':
				1657	if (one_exactly)
				1658	EMSG_ONE_RET_NULL;
				1659	ret = reg(REG_NPAREN, &flags);
				1660	if (ret == NULL)
				1661	return NULL;
				1662	*flagp \|= flags & (HASWIDTH \| SPSTART \| HASNL \| HASLOOKBH);
				1663	break;
				1664
				1665	/* Catch \%^ and \%$ regardless of where they appear in the
				1666	* pattern -- regardless of whether or not it makes sense. */
				1667	case '^':
				1668	ret = regnode(RE_BOF);
				1669	break;
				1670
				1671	case '$':
				1672	ret = regnode(RE_EOF);
				1673	break;
				1674
				1675	case '#':
				1676	ret = regnode(CURSOR);
				1677	break;
				1678
				1679	/* \%[abc]: Emit as a list of branches, all ending at the last
				1680	* branch which matches nothing. */
				1681	case '[':
				1682	if (one_exactly) /* doesn't nest */
				1683	EMSG_ONE_RET_NULL;
				1684	{
				1685	char_u *lastbranch;
				1686	char_u *lastnode = NULL;
				1687	char_u *br;
				1688
				1689	ret = NULL;
				1690	while ((c = getchr()) != ']')
				1691	{
				1692	if (c == NUL)
				1693	EMSG_M_RET_NULL(_("E69: Missing ] after %s%%["),
				1694	reg_magic == MAGIC_ALL);
				1695	br = regnode(BRANCH);
				1696	if (ret == NULL)
				1697	ret = br;
				1698	else
				1699	regtail(lastnode, br);
				1700
				1701	ungetchr();
				1702	one_exactly = TRUE;
				1703	lastnode = regatom(flagp);
				1704	one_exactly = FALSE;
				1705	if (lastnode == NULL)
				1706	return NULL;
				1707	}
				1708	if (ret == NULL)
				1709	EMSG_M_RET_NULL(_("E70: Empty %s%%[]"),
				1710	reg_magic == MAGIC_ALL);
				1711	lastbranch = regnode(BRANCH);
				1712	br = regnode(NOTHING);
				1713	if (ret != JUST_CALC_SIZE)
				1714	{
				1715	regtail(lastnode, br);
				1716	regtail(lastbranch, br);
				1717	/* connect all branches to the NOTHING
				1718	* branch at the end */
				1719	for (br = ret; br != lastnode; )
				1720	{
				1721	if (OP(br) == BRANCH)
				1722	{
				1723	regtail(br, lastbranch);
				1724	br = OPERAND(br);
				1725	}
				1726	else
				1727	br = regnext(br);
				1728	}
				1729	}
				1730	*flagp &= ~HASWIDTH;
				1731	break;
				1732	}
				1733
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	1734	case 'd': /* %d123 decimal */
				1735	case 'o': /* %o123 octal */
				1736	case 'x': /* %xab hex 2 */
				1737	case 'u': /* %uabcd hex 4 */
				1738	case 'U': /* %U1234abcd hex 8 */
				1739	{
				1740	int i;
				1741
				1742	switch (c)
				1743	{
				1744	case 'd': i = getdecchrs(); break;
				1745	case 'o': i = getoctchrs(); break;
				1746	case 'x': i = gethexchrs(2); break;
				1747	case 'u': i = gethexchrs(4); break;
				1748	case 'U': i = gethexchrs(8); break;
				1749	default: i = -1; break;
				1750	}
				1751
				1752	if (i < 0)
				1753	EMSG_M_RET_NULL(
				1754	_("E678: Invalid character after %s%%[dxouU]"),
				1755	reg_magic == MAGIC_ALL);
				1756	ret = regnode(EXACTLY);
				1757	if (i == 0)
				1758	regc(0x0a);
				1759	else
				1760	#ifdef FEAT_MBYTE
				1761	regmbc(i);
				1762	#else
				1763	regc(i);
				1764	#endif
				1765	regc(NUL);
				1766	*flagp \|= HASWIDTH;
				1767	break;
				1768	}
				1769
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1770	default:
				1771	if (VIM_ISDIGIT(c) \|\| c == '<' \|\| c == '>')
				1772	{
				1773	long_u n = 0;
				1774	int cmp;
				1775
				1776	cmp = c;
				1777	if (cmp == '<' \|\| cmp == '>')
				1778	c = getchr();
				1779	while (VIM_ISDIGIT(c))
				1780	{
				1781	n = n * 10 + (c - '0');
				1782	c = getchr();
				1783	}
				1784	if (c == 'l' \|\| c == 'c' \|\| c == 'v')
				1785	{
				1786	if (c == 'l')
				1787	ret = regnode(RE_LNUM);
				1788	else if (c == 'c')
				1789	ret = regnode(RE_COL);
				1790	else
				1791	ret = regnode(RE_VCOL);
				1792	if (ret == JUST_CALC_SIZE)
				1793	regsize += 5;
				1794	else
				1795	{
				1796	/* put the number and the optional
				1797	* comparator after the opcode */
				1798	regcode = re_put_long(regcode, n);
				1799	*regcode++ = cmp;
				1800	}
				1801	break;
				1802	}
				1803	}
				1804
				1805	EMSG_M_RET_NULL(_("E71: Invalid character after %s%%"),
				1806	reg_magic == MAGIC_ALL);
				1807	}
				1808	}
				1809	break;
				1810
				1811	case Magic('['):
				1812	collection:
				1813	{
				1814	char_u *lp;
				1815
				1816	/*
				1817	* If there is no matching ']', we assume the '[' is a normal
				1818	* character. This makes 'incsearch' and ":help [" work.
				1819	*/
				1820	lp = skip_anyof(regparse);
				1821	if (lp == ']') / there is a matching ']' */
				1822	{
				1823	int startc = -1; /* > 0 when next '-' is a range */
				1824	int endc;
				1825
				1826	/*
				1827	* In a character class, different parsing rules apply.
				1828	* Not even \ is special anymore, nothing is.
				1829	*/
				1830	if (regparse == '^') / Complement of range. */
				1831	{
				1832	ret = regnode(ANYBUT + extra);
				1833	regparse++;
				1834	}
				1835	else
				1836	ret = regnode(ANYOF + extra);
				1837
				1838	/* At the start ']' and '-' mean the literal character. */
				1839	if (regparse == ']' \|\| regparse == '-')
				1840	regc(*regparse++);
				1841
				1842	while (regparse != NUL && regparse != ']')
				1843	{
				1844	if (*regparse == '-')
				1845	{
				1846	++regparse;
				1847	/* The '-' is not used for a range at the end and
				1848	* after or before a '\n'. */
				1849	if (regparse == ']' \|\| regparse == NUL
				1850	\|\| startc == -1
				1851	\|\| (regparse[0] == '\\' && regparse[1] == 'n'))
				1852	{
				1853	regc('-');
				1854	startc = '-'; /* [--x] is a range */
				1855	}
				1856	else
				1857	{
				1858	#ifdef FEAT_MBYTE
				1859	if (has_mbyte)
				1860	endc = mb_ptr2char_adv(&regparse);
				1861	else
				1862	#endif
				1863	endc = *regparse++;
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	1864
				1865	/* Handle \o40, \x20 and \u20AC style sequences */
				1866	if (endc == '\\' && !cpo_lit)
				1867	endc = coll_get_char();
				1868
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1869	if (startc > endc)
				1870	EMSG_RET_NULL(_(e_invrange));
				1871	#ifdef FEAT_MBYTE
				1872	if (has_mbyte && ((*mb_char2len)(startc) > 1
				1873	\|\| (*mb_char2len)(endc) > 1))
				1874	{
				1875	/* Limit to a range of 256 chars */
				1876	if (endc > startc + 256)
				1877	EMSG_RET_NULL(_(e_invrange));
				1878	while (++startc <= endc)
				1879	regmbc(startc);
				1880	}
				1881	else
				1882	#endif
				1883	{
				1884	#ifdef EBCDIC
				1885	int alpha_only = FALSE;
				1886
				1887	/* for alphabetical range skip the gaps
				1888	* 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'. */
				1889	if (isalpha(startc) && isalpha(endc))
				1890	alpha_only = TRUE;
				1891	#endif
				1892	while (++startc <= endc)
				1893	#ifdef EBCDIC
				1894	if (!alpha_only \|\| isalpha(startc))
				1895	#endif
				1896	regc(startc);
				1897	}
				1898	startc = -1;
				1899	}
				1900	}
				1901	/*
				1902	* Only "\]", "\^", "\]" and "\\" are special in Vi. Vim
				1903	* accepts "\t", "\e", etc., but only when the 'l' flag in
				1904	* 'cpoptions' is not included.
				1905	*/
				1906	else if (*regparse == '\\'
				1907	&& (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
				1908	\|\| (!cpo_lit
				1909	&& vim_strchr(REGEXP_ABBR,
				1910	regparse[1]) != NULL)))
				1911	{
				1912	regparse++;
				1913	if (*regparse == 'n')
				1914	{
				1915	/* '\n' in range: also match NL */
				1916	if (ret != JUST_CALC_SIZE)
				1917	{
				1918	if (*ret == ANYBUT)
				1919	*ret = ANYBUT + ADD_NL;
				1920	else if (*ret == ANYOF)
				1921	*ret = ANYOF + ADD_NL;
				1922	/* else: must have had a \n already */
				1923	}
				1924	*flagp \|= HASNL;
				1925	regparse++;
				1926	startc = -1;
				1927	}
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	1928	else if (*regparse == 'd'
				1929	\|\| *regparse == 'o'
				1930	\|\| *regparse == 'x'
				1931	\|\| *regparse == 'u'
				1932	\|\| *regparse == 'U')
				1933	{
				1934	startc = coll_get_char();
				1935	if (startc == 0)
				1936	regc(0x0a);
				1937	else
				1938	#ifdef FEAT_MBYTE
				1939	regmbc(startc);
				1940	#else
				1941	regc(startc);
				1942	#endif
				1943	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1944	else
				1945	{
				1946	startc = backslash_trans(*regparse++);
				1947	regc(startc);
				1948	}
				1949	}
				1950	else if (*regparse == '[')
				1951	{
				1952	int c_class;
				1953	int cu;
				1954
				1955	c_class = skip_class_name(&regparse);
				1956	startc = -1;
				1957	/* Characters assumed to be 8 bits! */
				1958	switch (c_class)
				1959	{
				1960	case CLASS_NONE:
				1961	/* literal '[', allow [[-x] as a range */
				1962	startc = *regparse++;
				1963	regc(startc);
				1964	break;
				1965	case CLASS_ALNUM:
				1966	for (cu = 1; cu <= 255; cu++)
				1967	if (isalnum(cu))
				1968	regc(cu);
				1969	break;
				1970	case CLASS_ALPHA:
				1971	for (cu = 1; cu <= 255; cu++)
				1972	if (isalpha(cu))
				1973	regc(cu);
				1974	break;
				1975	case CLASS_BLANK:
				1976	regc(' ');
				1977	regc('\t');
				1978	break;
				1979	case CLASS_CNTRL:
				1980	for (cu = 1; cu <= 255; cu++)
				1981	if (iscntrl(cu))
				1982	regc(cu);
				1983	break;
				1984	case CLASS_DIGIT:
				1985	for (cu = 1; cu <= 255; cu++)
				1986	if (VIM_ISDIGIT(cu))
				1987	regc(cu);
				1988	break;
				1989	case CLASS_GRAPH:
				1990	for (cu = 1; cu <= 255; cu++)
				1991	if (isgraph(cu))
				1992	regc(cu);
				1993	break;
				1994	case CLASS_LOWER:
				1995	for (cu = 1; cu <= 255; cu++)
				1996	if (islower(cu))
				1997	regc(cu);
				1998	break;
				1999	case CLASS_PRINT:
				2000	for (cu = 1; cu <= 255; cu++)
				2001	if (vim_isprintc(cu))
				2002	regc(cu);
				2003	break;
				2004	case CLASS_PUNCT:
				2005	for (cu = 1; cu <= 255; cu++)
				2006	if (ispunct(cu))
				2007	regc(cu);
				2008	break;
				2009	case CLASS_SPACE:
				2010	for (cu = 9; cu <= 13; cu++)
				2011	regc(cu);
				2012	regc(' ');
				2013	break;
				2014	case CLASS_UPPER:
				2015	for (cu = 1; cu <= 255; cu++)
				2016	if (isupper(cu))
				2017	regc(cu);
				2018	break;
				2019	case CLASS_XDIGIT:
				2020	for (cu = 1; cu <= 255; cu++)
				2021	if (vim_isxdigit(cu))
				2022	regc(cu);
				2023	break;
				2024	case CLASS_TAB:
				2025	regc('\t');
				2026	break;
				2027	case CLASS_RETURN:
				2028	regc('\r');
				2029	break;
				2030	case CLASS_BACKSPACE:
				2031	regc('\b');
				2032	break;
				2033	case CLASS_ESCAPE:
				2034	regc('\033');
				2035	break;
				2036	}
				2037	}
				2038	else
				2039	{
				2040	#ifdef FEAT_MBYTE
				2041	if (has_mbyte)
				2042	{
				2043	int len;
				2044
				2045	/* produce a multibyte character, including any
				2046	* following composing characters */
				2047	startc = mb_ptr2char(regparse);
				2048	len = (*mb_ptr2len_check)(regparse);
				2049	if (enc_utf8 && utf_char2len(startc) != len)
				2050	startc = -1; /* composing chars */
				2051	while (--len >= 0)
				2052	regc(*regparse++);
				2053	}
				2054	else
				2055	#endif
				2056	{
				2057	startc = *regparse++;
				2058	regc(startc);
				2059	}
				2060	}
				2061	}
				2062	regc(NUL);
				2063	prevchr_len = 1; /* last char was the ']' */
				2064	if (*regparse != ']')
				2065	EMSG_RET_NULL(_(e_toomsbra)); /* Cannot happen? */
				2066	skipchr(); /* let's be friends with the lexer again */
				2067	*flagp \|= HASWIDTH \| SIMPLE;
				2068	break;
				2069	}
				2070	}
				2071	/* FALLTHROUGH */
				2072
				2073	default:
				2074	{
				2075	int len;
				2076
				2077	#ifdef FEAT_MBYTE
				2078	/* A multi-byte character is handled as a separate atom if it's
				2079	* before a multi. */
				2080	if (has_mbyte && (*mb_char2len)(c) > 1
				2081	&& re_multi_type(peekchr()) != NOT_MULTI)
				2082	{
				2083	ret = regnode(MULTIBYTECODE);
				2084	regmbc(c);
				2085	*flagp \|= HASWIDTH \| SIMPLE;
				2086	break;
				2087	}
				2088	#endif
				2089
				2090	ret = regnode(EXACTLY);
				2091
				2092	/*
				2093	* Append characters as long as:
				2094	* - there is no following multi, we then need the character in
				2095	* front of it as a single character operand
				2096	* - not running into a Magic character
				2097	* - "one_exactly" is not set
				2098	* But always emit at least one character. Might be a Multi,
				2099	* e.g., a "[" without matching "]".
				2100	*/
				2101	for (len = 0; c != NUL && (len == 0
				2102	\|\| (re_multi_type(peekchr()) == NOT_MULTI
				2103	&& !one_exactly
				2104	&& !is_Magic(c))); ++len)
				2105	{
				2106	c = no_Magic(c);
				2107	#ifdef FEAT_MBYTE
				2108	if (has_mbyte)
				2109	{
				2110	regmbc(c);
				2111	if (enc_utf8)
				2112	{
				2113	int off;
				2114	int l;
				2115
				2116	/* Need to get composing character too, directly
				2117	* access regparse for that, because skipchr() skips
				2118	* over composing chars. */
				2119	ungetchr();
				2120	if (*regparse == '\\' && regparse[1] != NUL)
				2121	off = 1;
				2122	else
				2123	off = 0;
				2124	for (;;)
				2125	{
				2126	l = utf_ptr2len_check(regparse + off);
				2127	if (!UTF_COMPOSINGLIKE(regparse + off,
				2128	regparse + off + l))
				2129	break;
				2130	off += l;
				2131	regmbc(utf_ptr2char(regparse + off));
				2132	}
				2133	skipchr();
				2134	}
				2135	}
				2136	else
				2137	#endif
				2138	regc(c);
				2139	c = getchr();
				2140	}
				2141	ungetchr();
				2142
				2143	regc(NUL);
				2144	*flagp \|= HASWIDTH;
				2145	if (len == 1)
				2146	*flagp \|= SIMPLE;
				2147	}
				2148	break;
				2149	}
				2150
				2151	return ret;
				2152	}
				2153
				2154	/*
				2155	* emit a node
				2156	* Return pointer to generated code.
				2157	*/
				2158	static char_u *
				2159	regnode(op)
				2160	int op;
				2161	{
				2162	char_u *ret;
				2163
				2164	ret = regcode;
				2165	if (ret == JUST_CALC_SIZE)
				2166	regsize += 3;
				2167	else
				2168	{
				2169	*regcode++ = op;
				2170	regcode++ = NUL; / Null "next" pointer. */
				2171	*regcode++ = NUL;
				2172	}
				2173	return ret;
				2174	}
				2175
				2176	/*
				2177	* Emit (if appropriate) a byte of code
				2178	*/
				2179	static void
				2180	regc(b)
				2181	int b;
				2182	{
				2183	if (regcode == JUST_CALC_SIZE)
				2184	regsize++;
				2185	else
				2186	*regcode++ = b;
				2187	}
				2188
				2189	#ifdef FEAT_MBYTE
				2190	/*
				2191	* Emit (if appropriate) a multi-byte character of code
				2192	*/
				2193	static void
				2194	regmbc(c)
				2195	int c;
				2196	{
				2197	if (regcode == JUST_CALC_SIZE)
				2198	regsize += (*mb_char2len)(c);
				2199	else
				2200	regcode += (*mb_char2bytes)(c, regcode);
				2201	}
				2202	#endif
				2203
				2204	/*
				2205	* reginsert - insert an operator in front of already-emitted operand
				2206	*
				2207	* Means relocating the operand.
				2208	*/
				2209	static void
				2210	reginsert(op, opnd)
				2211	int op;
				2212	char_u *opnd;
				2213	{
				2214	char_u *src;
				2215	char_u *dst;
				2216	char_u *place;
				2217
				2218	if (regcode == JUST_CALC_SIZE)
				2219	{
				2220	regsize += 3;
				2221	return;
				2222	}
				2223	src = regcode;
				2224	regcode += 3;
				2225	dst = regcode;
				2226	while (src > opnd)
				2227	--dst = --src;
				2228
				2229	place = opnd; /* Op node, where operand used to be. */
				2230	*place++ = op;
				2231	*place++ = NUL;
				2232	*place = NUL;
				2233	}
				2234
				2235	/*
				2236	* reginsert_limits - insert an operator in front of already-emitted operand.
				2237	* The operator has the given limit values as operands. Also set next pointer.
				2238	*
				2239	* Means relocating the operand.
				2240	*/
				2241	static void
				2242	reginsert_limits(op, minval, maxval, opnd)
				2243	int op;
				2244	long minval;
				2245	long maxval;
				2246	char_u *opnd;
				2247	{
				2248	char_u *src;
				2249	char_u *dst;
				2250	char_u *place;
				2251
				2252	if (regcode == JUST_CALC_SIZE)
				2253	{
				2254	regsize += 11;
				2255	return;
				2256	}
				2257	src = regcode;
				2258	regcode += 11;
				2259	dst = regcode;
				2260	while (src > opnd)
				2261	--dst = --src;
				2262
				2263	place = opnd; /* Op node, where operand used to be. */
				2264	*place++ = op;
				2265	*place++ = NUL;
				2266	*place++ = NUL;
				2267	place = re_put_long(place, (long_u)minval);
				2268	place = re_put_long(place, (long_u)maxval);
				2269	regtail(opnd, place);
				2270	}
				2271
				2272	/*
				2273	* Write a long as four bytes at "p" and return pointer to the next char.
				2274	*/
				2275	static char_u *
				2276	re_put_long(p, val)
				2277	char_u *p;
				2278	long_u val;
				2279	{
				2280	*p++ = (char_u) ((val >> 24) & 0377);
				2281	*p++ = (char_u) ((val >> 16) & 0377);
				2282	*p++ = (char_u) ((val >> 8) & 0377);
				2283	*p++ = (char_u) (val & 0377);
				2284	return p;
				2285	}
				2286
				2287	/*
				2288	* regtail - set the next-pointer at the end of a node chain
				2289	*/
				2290	static void
				2291	regtail(p, val)
				2292	char_u *p;
				2293	char_u *val;
				2294	{
				2295	char_u *scan;
				2296	char_u *temp;
				2297	int offset;
				2298
				2299	if (p == JUST_CALC_SIZE)
				2300	return;
				2301
				2302	/* Find last node. */
				2303	scan = p;
				2304	for (;;)
				2305	{
				2306	temp = regnext(scan);
				2307	if (temp == NULL)
				2308	break;
				2309	scan = temp;
				2310	}
				2311
				2312	if (OP(scan) == BACK)
				2313	offset = (int)(scan - val);
				2314	else
				2315	offset = (int)(val - scan);
				2316	*(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
				2317	*(scan + 2) = (char_u) (offset & 0377);
				2318	}
				2319
				2320	/*
				2321	* regoptail - regtail on item after a BRANCH; nop if none
				2322	*/
				2323	static void
				2324	regoptail(p, val)
				2325	char_u *p;
				2326	char_u *val;
				2327	{
				2328	/* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
				2329	if (p == NULL \|\| p == JUST_CALC_SIZE
				2330	\|\| (OP(p) != BRANCH
				2331	&& (OP(p) < BRACE_COMPLEX \|\| OP(p) > BRACE_COMPLEX + 9)))
				2332	return;
				2333	regtail(OPERAND(p), val);
				2334	}
				2335
				2336	/*
				2337	* getchr() - get the next character from the pattern. We know about
				2338	* magic and such, so therefore we need a lexical analyzer.
				2339	*/
				2340
				2341	/* static int curchr; */
				2342	static int prevprevchr;
				2343	static int prevchr;
				2344	static int nextchr; /* used for ungetchr() */
				2345	/*
				2346	* Note: prevchr is sometimes -1 when we are not at the start,
				2347	* eg in /[ ^I]^ the pattern was never found even if it existed, because ^ was
				2348	* taken to be magic -- webb
				2349	*/
				2350	static int at_start; /* True when on the first character */
				2351	static int prev_at_start; /* True when on the second character */
				2352
				2353	static void
				2354	initchr(str)
				2355	char_u *str;
				2356	{
				2357	regparse = str;
				2358	prevchr_len = 0;
				2359	curchr = prevprevchr = prevchr = nextchr = -1;
				2360	at_start = TRUE;
				2361	prev_at_start = FALSE;
				2362	}
				2363
				2364	static int
				2365	peekchr()
				2366	{
				2367	if (curchr == -1)
				2368	{
				2369	switch (curchr = regparse[0])
				2370	{
				2371	case '.':
				2372	case '[':
				2373	case '~':
				2374	/* magic when 'magic' is on */
				2375	if (reg_magic >= MAGIC_ON)
				2376	curchr = Magic(curchr);
				2377	break;
				2378	case '(':
				2379	case ')':
				2380	case '{':
				2381	case '%':
				2382	case '+':
				2383	case '=':
				2384	case '?':
				2385	case '@':
				2386	case '!':
				2387	case '&':
				2388	case '\|':
				2389	case '<':
				2390	case '>':
				2391	case '#': /* future ext. */
				2392	case '"': /* future ext. */
				2393	case '\'': /* future ext. */
				2394	case ',': /* future ext. */
				2395	case '-': /* future ext. */
				2396	case ':': /* future ext. */
				2397	case ';': /* future ext. */
				2398	case '`': /* future ext. */
				2399	case '/': /* Can't be used in / command */
				2400	/* magic only after "\v" */
				2401	if (reg_magic == MAGIC_ALL)
				2402	curchr = Magic(curchr);
				2403	break;
				2404	case '*':
				2405	/* * is not magic as the very first character, eg "?*ptr" and when
				2406	* after '^', eg "/^ptr" /
				2407	if (reg_magic >= MAGIC_ON && !at_start
				2408	&& !(prev_at_start && prevchr == Magic('^')))
				2409	curchr = Magic('*');
				2410	break;
				2411	case '^':
				2412	/* '^' is only magic as the very first character and if it's after
				2413	* "\(", "\\|", "\&' or "\n" */
				2414	if (reg_magic >= MAGIC_OFF
				2415	&& (at_start
				2416	\|\| reg_magic == MAGIC_ALL
				2417	\|\| prevchr == Magic('(')
				2418	\|\| prevchr == Magic('\|')
				2419	\|\| prevchr == Magic('&')
				2420	\|\| prevchr == Magic('n')
				2421	\|\| (no_Magic(prevchr) == '('
				2422	&& prevprevchr == Magic('%'))))
				2423	{
				2424	curchr = Magic('^');
				2425	at_start = TRUE;
				2426	prev_at_start = FALSE;
				2427	}
				2428	break;
				2429	case '$':
				2430	/* '$' is only magic as the very last char and if it's in front of
				2431	* either "\\|", "\)", "\&", or "\n" */
				2432	if (reg_magic >= MAGIC_OFF)
				2433	{
				2434	char_u *p = regparse + 1;
				2435
				2436	/* ignore \c \C \m and \M after '$' */
				2437	while (p[0] == '\\' && (p[1] == 'c' \|\| p[1] == 'C'
				2438	\|\| p[1] == 'm' \|\| p[1] == 'M' \|\| p[1] == 'Z'))
				2439	p += 2;
				2440	if (p[0] == NUL
				2441	\|\| (p[0] == '\\'
				2442	&& (p[1] == '\|' \|\| p[1] == '&' \|\| p[1] == ')'
				2443	\|\| p[1] == 'n'))
				2444	\|\| reg_magic == MAGIC_ALL)
				2445	curchr = Magic('$');
				2446	}
				2447	break;
				2448	case '\\':
				2449	{
				2450	int c = regparse[1];
				2451
				2452	if (c == NUL)
				2453	curchr = '\\'; /* trailing '\' */
				2454	else if (
				2455	#ifdef EBCDIC
				2456	vim_strchr(META, c)
				2457	#else
				2458	c <= '~' && META_flags[c]
				2459	#endif
				2460	)
				2461	{
				2462	/*
				2463	* META contains everything that may be magic sometimes,
				2464	* except ^ and $ ("\^" and "\$" are only magic after
				2465	* "\v"). We now fetch the next character and toggle its
				2466	* magicness. Therefore, \ is so meta-magic that it is
				2467	* not in META.
				2468	*/
				2469	curchr = -1;
				2470	prev_at_start = at_start;
				2471	at_start = FALSE; /* be able to say "/\ptr" /
				2472	++regparse;
				2473	peekchr();
				2474	--regparse;
				2475	curchr = toggle_Magic(curchr);
				2476	}
				2477	else if (vim_strchr(REGEXP_ABBR, c))
				2478	{
				2479	/*
				2480	* Handle abbreviations, like "\t" for TAB -- webb
				2481	*/
				2482	curchr = backslash_trans(c);
				2483	}
				2484	else if (reg_magic == MAGIC_NONE && (c == '$' \|\| c == '^'))
				2485	curchr = toggle_Magic(c);
				2486	else
				2487	{
				2488	/*
				2489	* Next character can never be (made) magic?
				2490	* Then backslashing it won't do anything.
				2491	*/
				2492	#ifdef FEAT_MBYTE
				2493	if (has_mbyte)
				2494	curchr = (*mb_ptr2char)(regparse + 1);
				2495	else
				2496	#endif
				2497	curchr = c;
				2498	}
				2499	break;
				2500	}
				2501
				2502	#ifdef FEAT_MBYTE
				2503	default:
				2504	if (has_mbyte)
				2505	curchr = (*mb_ptr2char)(regparse);
				2506	#endif
				2507	}
				2508	}
				2509
				2510	return curchr;
				2511	}
				2512
				2513	/*
				2514	* Eat one lexed character. Do this in a way that we can undo it.
				2515	*/
				2516	static void
				2517	skipchr()
				2518	{
				2519	/* peekchr() eats a backslash, do the same here */
				2520	if (*regparse == '\\')
				2521	prevchr_len = 1;
				2522	else
				2523	prevchr_len = 0;
				2524	if (regparse[prevchr_len] != NUL)
				2525	{
				2526	#ifdef FEAT_MBYTE
				2527	if (has_mbyte)
				2528	prevchr_len += (*mb_ptr2len_check)(regparse + prevchr_len);
				2529	else
				2530	#endif
				2531	++prevchr_len;
				2532	}
				2533	regparse += prevchr_len;
				2534	prev_at_start = at_start;
				2535	at_start = FALSE;
				2536	prevprevchr = prevchr;
				2537	prevchr = curchr;
				2538	curchr = nextchr; /* use previously unget char, or -1 */
				2539	nextchr = -1;
				2540	}
				2541
				2542	/*
				2543	* Skip a character while keeping the value of prev_at_start for at_start.
				2544	* prevchr and prevprevchr are also kept.
				2545	*/
				2546	static void
				2547	skipchr_keepstart()
				2548	{
				2549	int as = prev_at_start;
				2550	int pr = prevchr;
				2551	int prpr = prevprevchr;
				2552
				2553	skipchr();
				2554	at_start = as;
				2555	prevchr = pr;
				2556	prevprevchr = prpr;
				2557	}
				2558
				2559	static int
				2560	getchr()
				2561	{
				2562	int chr = peekchr();
				2563
				2564	skipchr();
				2565	return chr;
				2566	}
				2567
				2568	/*
				2569	* put character back. Works only once!
				2570	*/
				2571	static void
				2572	ungetchr()
				2573	{
				2574	nextchr = curchr;
				2575	curchr = prevchr;
				2576	prevchr = prevprevchr;
				2577	at_start = prev_at_start;
				2578	prev_at_start = FALSE;
				2579
				2580	/* Backup regparse, so that it's at the same position as before the
				2581	* getchr(). */
				2582	regparse -= prevchr_len;
				2583	}
				2584
				2585	/*
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	2586	* get and return the value of the hex string immediately after the current
				2587	* position. Return -1 for invalid, or 0-255 for valid. Position is updated:
				2588	* blahblah\%x20asdf
				2589	* before-^ ^-after
				2590	* The parameter controls the maximum number of input characters. This will be
				2591	* 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
				2592	*/
				2593	static int
				2594	gethexchrs(maxinputlen)
				2595	int maxinputlen;
				2596	{
				2597	int nr = 0;
				2598	int c;
				2599	int i;
				2600
				2601	for (i = 0; i < maxinputlen; ++i)
				2602	{
				2603	c = regparse[0];
				2604	if (!vim_isxdigit(c))
				2605	break;
				2606	nr <<= 4;
				2607	nr \|= hex2nr(c);
				2608	++regparse;
				2609	}
				2610
				2611	if (i == 0)
				2612	return -1;
				2613	return nr;
				2614	}
				2615
				2616	/*
				2617	* get and return the value of the decimal string immediately after the
				2618	* current position. Return -1 for invalid. Consumes all digits.
				2619	*/
				2620	static int
				2621	getdecchrs()
				2622	{
				2623	int nr = 0;
				2624	int c;
				2625	int i;
				2626
				2627	for (i = 0; ; ++i)
				2628	{
				2629	c = regparse[0];
				2630	if (c < '0' \|\| c > '9')
				2631	break;
				2632	nr *= 10;
				2633	nr += c - '0';
				2634	++regparse;
				2635	}
				2636
				2637	if (i == 0)
				2638	return -1;
				2639	return nr;
				2640	}
				2641
				2642	/*
				2643	* get and return the value of the octal string immediately after the current
				2644	* position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
				2645	* numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
				2646	* treat 8 or 9 as recognised characters. Position is updated:
				2647	* blahblah\%o210asdf
				2648	* before-^ ^-after
				2649	*/
				2650	static int
				2651	getoctchrs()
				2652	{
				2653	int nr = 0;
				2654	int c;
				2655	int i;
				2656
				2657	for (i = 0; i < 3 && nr < 040; ++i)
				2658	{
				2659	c = regparse[0];
				2660	if (c < '0' \|\| c > '7')
				2661	break;
				2662	nr <<= 3;
				2663	nr \|= hex2nr(c);
				2664	++regparse;
				2665	}
				2666
				2667	if (i == 0)
				2668	return -1;
				2669	return nr;
				2670	}
				2671
				2672	/*
				2673	* Get a number after a backslash that is inside [].
				2674	* When nothing is recognized return a backslash.
				2675	*/
				2676	static int
				2677	coll_get_char()
				2678	{
				2679	int nr = -1;
				2680
				2681	switch (*regparse++)
				2682	{
				2683	case 'd': nr = getdecchrs(); break;
				2684	case 'o': nr = getoctchrs(); break;
				2685	case 'x': nr = gethexchrs(2); break;
				2686	case 'u': nr = gethexchrs(4); break;
				2687	case 'U': nr = gethexchrs(8); break;
				2688	}
				2689	if (nr < 0)
				2690	{
				2691	/* If getting the number fails be backwards compatible: the character
				2692	* is a backslash. */
				2693	--regparse;
				2694	nr = '\\';
				2695	}
				2696	return nr;
				2697	}
				2698
				2699	/*
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2700	* read_limits - Read two integers to be taken as a minimum and maximum.
				2701	* If the first character is '-', then the range is reversed.
				2702	* Should end with 'end'. If minval is missing, zero is default, if maxval is
				2703	* missing, a very big number is the default.
				2704	*/
				2705	static int
				2706	read_limits(minval, maxval)
				2707	long *minval;
				2708	long *maxval;
				2709	{
				2710	int reverse = FALSE;
				2711	char_u *first_char;
				2712	long tmp;
				2713
				2714	if (*regparse == '-')
				2715	{
				2716	/* Starts with '-', so reverse the range later */
				2717	regparse++;
				2718	reverse = TRUE;
				2719	}
				2720	first_char = regparse;
				2721	*minval = getdigits(&regparse);
				2722	if (regparse == ',') / There is a comma */
				2723	{
				2724	if (vim_isdigit(*++regparse))
				2725	*maxval = getdigits(&regparse);
				2726	else
				2727	*maxval = MAX_LIMIT;
				2728	}
				2729	else if (VIM_ISDIGIT(*first_char))
				2730	maxval = minval; /* It was \{n} or \{-n} */
				2731	else
				2732	maxval = MAX_LIMIT; / It was \{} or \{-} */
				2733	if (*regparse == '\\')
				2734	regparse++; /* Allow either \{...} or \{...\} */
				2735	if (regparse != '}' \|\| (maxval == 0 && *minval == 0))
				2736	{
				2737	sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"),
				2738	reg_magic == MAGIC_ALL ? "" : "\\");
				2739	EMSG_RET_FAIL(IObuff);
				2740	}
				2741
				2742	/*
				2743	* Reverse the range if there was a '-', or make sure it is in the right
				2744	* order otherwise.
				2745	*/
				2746	if ((!reverse && minval > maxval) \|\| (reverse && minval < maxval))
				2747	{
				2748	tmp = *minval;
				2749	minval = maxval;
				2750	*maxval = tmp;
				2751	}
				2752	skipchr(); /* let's be friends with the lexer again */
				2753	return OK;
				2754	}
				2755
				2756	/*
				2757	* vim_regexec and friends
				2758	*/
				2759
				2760	/*
				2761	* Global work variables for vim_regexec().
				2762	*/
				2763
				2764	/* The current match-position is remembered with these variables: */
				2765	static linenr_T reglnum; /* line number, relative to first line */
				2766	static char_u regline; / start of current line */
				2767	static char_u reginput; / current input, points into "regline" */
				2768
				2769	static int need_clear_subexpr; /* subexpressions still need to be
				2770	* cleared */
				2771	#ifdef FEAT_SYN_HL
				2772	static int need_clear_zsubexpr = FALSE; /* extmatch subexpressions
				2773	* still need to be cleared */
				2774	#endif
				2775
				2776	static int out_of_stack; /* TRUE when ran out of stack space */
				2777
				2778	/*
				2779	* Structure used to save the current input state, when it needs to be
				2780	* restored after trying a match. Used by reg_save() and reg_restore().
				2781	*/
				2782	typedef struct
				2783	{
				2784	union
				2785	{
				2786	char_u ptr; / reginput pointer, for single-line regexp */
				2787	lpos_T pos; /* reginput pos, for multi-line regexp */
				2788	} rs_u;
				2789	} regsave_T;
				2790
				2791	/* struct to save start/end pointer/position in for */
				2792	typedef struct
				2793	{
				2794	union
				2795	{
				2796	char_u *ptr;
				2797	lpos_T pos;
				2798	} se_u;
				2799	} save_se_T;
				2800
				2801	static char_u *reg_getline __ARGS((linenr_T lnum));
				2802	static long vim_regexec_both __ARGS((char_u *line, colnr_T col));
				2803	static long regtry __ARGS((regprog_T *prog, colnr_T col));
				2804	static void cleanup_subexpr __ARGS((void));
				2805	#ifdef FEAT_SYN_HL
				2806	static void cleanup_zsubexpr __ARGS((void));
				2807	#endif
				2808	static void reg_nextline __ARGS((void));
				2809	static void reg_save __ARGS((regsave_T *save));
				2810	static void reg_restore __ARGS((regsave_T *save));
				2811	static int reg_save_equal __ARGS((regsave_T *save));
				2812	static void save_se_multi __ARGS((save_se_T savep, lpos_T posp));
				2813	static void save_se_one __ARGS((save_se_T savep, char_u *pp));
				2814
				2815	/* Save the sub-expressions before attempting a match. */
				2816	#define save_se(savep, posp, pp) \
				2817	REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
				2818
				2819	/* After a failed match restore the sub-expressions. */
				2820	#define restore_se(savep, posp, pp) { \
				2821	if (REG_MULTI) \
				2822	*(posp) = (savep)->se_u.pos; \
				2823	else \
				2824	*(pp) = (savep)->se_u.ptr; }
				2825
				2826	static int re_num_cmp __ARGS((long_u val, char_u *scan));
				2827	static int regmatch __ARGS((char_u *prog));
				2828	static int regrepeat __ARGS((char_u *p, long maxcount));
				2829
				2830	#ifdef DEBUG
				2831	int regnarrate = 0;
				2832	#endif
				2833
				2834	/*
				2835	* Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
				2836	* Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
				2837	* contains '\c' or '\C' the value is overruled.
				2838	*/
				2839	static int ireg_ic;
				2840
				2841	#ifdef FEAT_MBYTE
				2842	/*
				2843	* Similar to ireg_ic, but only for 'combining' characters. Set with \Z flag
				2844	* in the regexp. Defaults to false, always.
				2845	*/
				2846	static int ireg_icombine;
				2847	#endif
				2848
				2849	/*
				2850	* Sometimes need to save a copy of a line. Since alloc()/free() is very
				2851	* slow, we keep one allocated piece of memory and only re-allocate it when
				2852	* it's too small. It's freed in vim_regexec_both() when finished.
				2853	*/
				2854	static char_u *reg_tofree;
				2855	static unsigned reg_tofreelen;
				2856
				2857	/*
				2858	* These variables are set when executing a regexp to speed up the execution.
				2859	* Which ones are set depends on whethere a single-line or multi-line match is
				2860	* done:
				2861	* single-line multi-line
				2862	* reg_match &regmatch_T NULL
				2863	* reg_mmatch NULL &regmmatch_T
				2864	* reg_startp reg_match->startp <invalid>
				2865	* reg_endp reg_match->endp <invalid>
				2866	* reg_startpos <invalid> reg_mmatch->startpos
				2867	* reg_endpos <invalid> reg_mmatch->endpos
				2868	* reg_win NULL window in which to search
				2869	* reg_buf <invalid> buffer in which to search
				2870	* reg_firstlnum <invalid> first line in which to search
				2871	* reg_maxline 0 last line nr
				2872	* reg_line_lbr FALSE or TRUE FALSE
				2873	*/
				2874	static regmatch_T *reg_match;
				2875	static regmmatch_T *reg_mmatch;
				2876	static char_u **reg_startp = NULL;
				2877	static char_u **reg_endp = NULL;
				2878	static lpos_T *reg_startpos = NULL;
				2879	static lpos_T *reg_endpos = NULL;
				2880	static win_T *reg_win;
				2881	static buf_T *reg_buf;
				2882	static linenr_T reg_firstlnum;
				2883	static linenr_T reg_maxline;
				2884	static int reg_line_lbr; /* "\n" in string is line break */
				2885
				2886	/*
				2887	* Get pointer to the line "lnum", which is relative to "reg_firstlnum".
				2888	*/
				2889	static char_u *
				2890	reg_getline(lnum)
				2891	linenr_T lnum;
				2892	{
				2893	/* when looking behind for a match/no-match lnum is negative. But we
				2894	* can't go before line 1 */
				2895	if (reg_firstlnum + lnum < 1)
				2896	return NULL;
				2897	return ml_get_buf(reg_buf, reg_firstlnum + lnum, FALSE);
				2898	}
				2899
				2900	static regsave_T behind_pos;
				2901
				2902	#ifdef FEAT_SYN_HL
				2903	static char_u reg_startzp[NSUBEXP]; / Workspace to mark beginning */
				2904	static char_u reg_endzp[NSUBEXP]; / and end of \z(...\) matches */
				2905	static lpos_T reg_startzpos[NSUBEXP]; /* idem, beginning pos */
				2906	static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
				2907	#endif
				2908
				2909	/* TRUE if using multi-line regexp. */
				2910	#define REG_MULTI (reg_match == NULL)
				2911
				2912	/*
				2913	* Match a regexp against a string.
				2914	* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
				2915	* Uses curbuf for line count and 'iskeyword'.
				2916	*
				2917	* Return TRUE if there is a match, FALSE if not.
				2918	*/
				2919	int
				2920	vim_regexec(rmp, line, col)
				2921	regmatch_T *rmp;
				2922	char_u line; / string to match against */
				2923	colnr_T col; /* column to start looking for match */
				2924	{
				2925	reg_match = rmp;
				2926	reg_mmatch = NULL;
				2927	reg_maxline = 0;
				2928	reg_line_lbr = FALSE;
				2929	reg_win = NULL;
				2930	ireg_ic = rmp->rm_ic;
				2931	#ifdef FEAT_MBYTE
				2932	ireg_icombine = FALSE;
				2933	#endif
				2934	return (vim_regexec_both(line, col) != 0);
				2935	}
				2936
				2937	#if defined(FEAT_MODIFY_FNAME) \|\| defined(FEAT_EVAL) \|\| defined(PROTO)
				2938	/*
				2939	* Like vim_regexec(), but consider a "\n" in "line" to be a line break.
				2940	*/
				2941	int
				2942	vim_regexec_nl(rmp, line, col)
				2943	regmatch_T *rmp;
				2944	char_u line; / string to match against */
				2945	colnr_T col; /* column to start looking for match */
				2946	{
				2947	reg_match = rmp;
				2948	reg_mmatch = NULL;
				2949	reg_maxline = 0;
				2950	reg_line_lbr = TRUE;
				2951	reg_win = NULL;
				2952	ireg_ic = rmp->rm_ic;
				2953	#ifdef FEAT_MBYTE
				2954	ireg_icombine = FALSE;
				2955	#endif
				2956	return (vim_regexec_both(line, col) != 0);
				2957	}
				2958	#endif
				2959
				2960	/*
				2961	* Match a regexp against multiple lines.
				2962	* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
				2963	* Uses curbuf for line count and 'iskeyword'.
				2964	*
				2965	* Return zero if there is no match. Return number of lines contained in the
				2966	* match otherwise.
				2967	*/
				2968	long
				2969	vim_regexec_multi(rmp, win, buf, lnum, col)
				2970	regmmatch_T *rmp;
				2971	win_T win; / window in which to search or NULL */
				2972	buf_T buf; / buffer in which to search */
				2973	linenr_T lnum; /* nr of line to start looking for match */
				2974	colnr_T col; /* column to start looking for match */
				2975	{
				2976	long r;
				2977	buf_T *save_curbuf = curbuf;
				2978
				2979	reg_match = NULL;
				2980	reg_mmatch = rmp;
				2981	reg_buf = buf;
				2982	reg_win = win;
				2983	reg_firstlnum = lnum;
				2984	reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
				2985	reg_line_lbr = FALSE;
				2986	ireg_ic = rmp->rmm_ic;
				2987	#ifdef FEAT_MBYTE
				2988	ireg_icombine = FALSE;
				2989	#endif
				2990
				2991	/* Need to switch to buffer "buf" to make vim_iswordc() work. */
				2992	curbuf = buf;
				2993	r = vim_regexec_both(NULL, col);
				2994	curbuf = save_curbuf;
				2995
				2996	return r;
				2997	}
				2998
				2999	/*
				3000	* Match a regexp against a string ("line" points to the string) or multiple
				3001	* lines ("line" is NULL, use reg_getline()).
				3002	*/
				3003	#ifdef HAVE_SETJMP_H
				3004	static long
				3005	vim_regexec_both(line_arg, col_arg)
				3006	char_u *line_arg;
				3007	colnr_T col_arg; /* column to start looking for match */
				3008	#else
				3009	static long
				3010	vim_regexec_both(line, col)
				3011	char_u *line;
				3012	colnr_T col; /* column to start looking for match */
				3013	#endif
				3014	{
				3015	regprog_T *prog;
				3016	char_u *s;
				3017	long retval;
				3018	#ifdef HAVE_SETJMP_H
				3019	char_u *line;
				3020	colnr_T col;
				3021	#endif
				3022
				3023	reg_tofree = NULL;
				3024
				3025	#ifdef HAVE_TRY_EXCEPT
				3026	__try
				3027	{
				3028	#endif
				3029
				3030	#ifdef HAVE_SETJMP_H
				3031	/*
				3032	* Matching with a regexp may cause a very deep recursive call of
				3033	* regmatch(). Vim will crash when running out of stack space. Catch
				3034	* this here if the system supports it.
				3035	*/
				3036	mch_startjmp();
				3037	if (SETJMP(lc_jump_env) != 0)
				3038	{
				3039	mch_didjmp();
				3040	# ifdef SIGHASARG
				3041	if (lc_signal != SIGINT)
				3042	# endif
				3043	EMSG(_("E361: Crash intercepted; regexp too complex?"));
				3044	retval = 0L;
				3045	goto theend;
				3046	}
				3047
				3048	/* Trick to avoid "might be clobbered by `longjmp'" warning from gcc. */
				3049	line = line_arg;
				3050	col = col_arg;
				3051	#endif
				3052	retval = 0L;
				3053
				3054	if (REG_MULTI)
				3055	{
				3056	prog = reg_mmatch->regprog;
				3057	line = reg_getline((linenr_T)0);
				3058	reg_startpos = reg_mmatch->startpos;
				3059	reg_endpos = reg_mmatch->endpos;
				3060	}
				3061	else
				3062	{
				3063	prog = reg_match->regprog;
				3064	reg_startp = reg_match->startp;
				3065	reg_endp = reg_match->endp;
				3066	}
				3067
				3068	/* Be paranoid... */
				3069	if (prog == NULL \|\| line == NULL)
				3070	{
				3071	EMSG(_(e_null));
				3072	goto theend;
				3073	}
				3074
				3075	/* Check validity of program. */
				3076	if (prog_magic_wrong())
				3077	goto theend;
				3078
				3079	/* If pattern contains "\c" or "\C": overrule value of ireg_ic */
				3080	if (prog->regflags & RF_ICASE)
				3081	ireg_ic = TRUE;
				3082	else if (prog->regflags & RF_NOICASE)
				3083	ireg_ic = FALSE;
				3084
				3085	#ifdef FEAT_MBYTE
				3086	/* If pattern contains "\Z" overrule value of ireg_icombine */
				3087	if (prog->regflags & RF_ICOMBINE)
				3088	ireg_icombine = TRUE;
				3089	#endif
				3090
				3091	/* If there is a "must appear" string, look for it. */
				3092	if (prog->regmust != NULL)
				3093	{
				3094	int c;
				3095
				3096	#ifdef FEAT_MBYTE
				3097	if (has_mbyte)
				3098	c = (*mb_ptr2char)(prog->regmust);
				3099	else
				3100	#endif
				3101	c = *prog->regmust;
				3102	s = line + col;
				3103	while ((s = cstrchr(s, c)) != NULL)
				3104	{
				3105	if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
				3106	break; /* Found it. */
				3107	#ifdef FEAT_MBYTE
				3108	if (has_mbyte)
				3109	s += (*mb_ptr2len_check)(s);
				3110	else
				3111	#endif
				3112	++s;
				3113	}
				3114	if (s == NULL) /* Not present. */
				3115	goto theend;
				3116	}
				3117
				3118	regline = line;
				3119	reglnum = 0;
				3120	out_of_stack = FALSE;
				3121
				3122	/* Simplest case: Anchored match need be tried only once. */
				3123	if (prog->reganch)
				3124	{
				3125	int c;
				3126
				3127	#ifdef FEAT_MBYTE
				3128	if (has_mbyte)
				3129	c = (*mb_ptr2char)(regline + col);
				3130	else
				3131	#endif
				3132	c = regline[col];
				3133	if (prog->regstart == NUL
				3134	\|\| prog->regstart == c
				3135	\|\| (ireg_ic && ((
				3136	#ifdef FEAT_MBYTE
				3137	(enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
				3138	\|\| (c < 255 && prog->regstart < 255 &&
				3139	#endif
				3140	TOLOWER_LOC(prog->regstart) == TOLOWER_LOC(c)))))
				3141	retval = regtry(prog, col);
				3142	else
				3143	retval = 0;
				3144	}
				3145	else
				3146	{
				3147	/* Messy cases: unanchored match. */
				3148	while (!got_int && !out_of_stack)
				3149	{
				3150	if (prog->regstart != NUL)
				3151	{
				3152	/* Skip until the char we know it must start with. */
				3153	s = cstrchr(regline + col, prog->regstart);
				3154	if (s == NULL)
				3155	{
				3156	retval = 0;
				3157	break;
				3158	}
				3159	col = (int)(s - regline);
				3160	}
				3161
				3162	retval = regtry(prog, col);
				3163	if (retval > 0)
				3164	break;
				3165
				3166	/* if not currently on the first line, get it again */
				3167	if (reglnum != 0)
				3168	{
				3169	regline = reg_getline((linenr_T)0);
				3170	reglnum = 0;
				3171	}
				3172	if (regline[col] == NUL)
				3173	break;
				3174	#ifdef FEAT_MBYTE
				3175	if (has_mbyte)
				3176	col += (*mb_ptr2len_check)(regline + col);
				3177	else
				3178	#endif
				3179	++col;
				3180	}
				3181	}
				3182
				3183	if (out_of_stack)
				3184	EMSG(_("E363: pattern caused out-of-stack error"));
				3185
				3186	#ifdef HAVE_TRY_EXCEPT
				3187	}
				3188	__except(EXCEPTION_EXECUTE_HANDLER)
				3189	{
				3190	if (GetExceptionCode() == EXCEPTION_STACK_OVERFLOW)
				3191	{
				3192	RESETSTKOFLW();
				3193	EMSG(_("E363: pattern caused out-of-stack error"));
				3194	}
				3195	else
				3196	EMSG(_("E361: Crash intercepted; regexp too complex?"));
				3197	retval = 0L;
				3198	}
				3199	#endif
				3200
				3201	theend:
				3202	/* Didn't find a match. */
				3203	vim_free(reg_tofree);
				3204	#ifdef HAVE_SETJMP_H
				3205	mch_endjmp();
				3206	#endif
				3207	return retval;
				3208	}
				3209
				3210	#ifdef FEAT_SYN_HL
				3211	static reg_extmatch_T *make_extmatch __ARGS((void));
				3212
				3213	/*
				3214	* Create a new extmatch and mark it as referenced once.
				3215	*/
				3216	static reg_extmatch_T *
				3217	make_extmatch()
				3218	{
				3219	reg_extmatch_T *em;
				3220
				3221	em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T));
				3222	if (em != NULL)
				3223	em->refcnt = 1;
				3224	return em;
				3225	}
				3226
				3227	/*
				3228	* Add a reference to an extmatch.
				3229	*/
				3230	reg_extmatch_T *
				3231	ref_extmatch(em)
				3232	reg_extmatch_T *em;
				3233	{
				3234	if (em != NULL)
				3235	em->refcnt++;
				3236	return em;
				3237	}
				3238
				3239	/*
				3240	* Remove a reference to an extmatch. If there are no references left, free
				3241	* the info.
				3242	*/
				3243	void
				3244	unref_extmatch(em)
				3245	reg_extmatch_T *em;
				3246	{
				3247	int i;
				3248
				3249	if (em != NULL && --em->refcnt <= 0)
				3250	{
				3251	for (i = 0; i < NSUBEXP; ++i)
				3252	vim_free(em->matches[i]);
				3253	vim_free(em);
				3254	}
				3255	}
				3256	#endif
				3257
				3258	/*
				3259	* regtry - try match of "prog" with at regline["col"].
				3260	* Returns 0 for failure, number of lines contained in the match otherwise.
				3261	*/
				3262	static long
				3263	regtry(prog, col)
				3264	regprog_T *prog;
				3265	colnr_T col;
				3266	{
				3267	reginput = regline + col;
				3268	need_clear_subexpr = TRUE;
				3269	#ifdef FEAT_SYN_HL
				3270	/* Clear the external match subpointers if necessary. */
				3271	if (prog->reghasz == REX_SET)
				3272	need_clear_zsubexpr = TRUE;
				3273	#endif
				3274
				3275	if (regmatch(prog->program + 1))
				3276	{
				3277	cleanup_subexpr();
				3278	if (REG_MULTI)
				3279	{
				3280	if (reg_startpos[0].lnum < 0)
				3281	{
				3282	reg_startpos[0].lnum = 0;
				3283	reg_startpos[0].col = col;
				3284	}
				3285	if (reg_endpos[0].lnum < 0)
				3286	{
				3287	reg_endpos[0].lnum = reglnum;
				3288	reg_endpos[0].col = (int)(reginput - regline);
				3289	}
				3290	else
				3291	/* Use line number of "\ze". */
				3292	reglnum = reg_endpos[0].lnum;
				3293	}
				3294	else
				3295	{
				3296	if (reg_startp[0] == NULL)
				3297	reg_startp[0] = regline + col;
				3298	if (reg_endp[0] == NULL)
				3299	reg_endp[0] = reginput;
				3300	}
				3301	#ifdef FEAT_SYN_HL
				3302	/* Package any found \z(...\) matches for export. Default is none. */
				3303	unref_extmatch(re_extmatch_out);
				3304	re_extmatch_out = NULL;
				3305
				3306	if (prog->reghasz == REX_SET)
				3307	{
				3308	int i;
				3309
				3310	cleanup_zsubexpr();
				3311	re_extmatch_out = make_extmatch();
				3312	for (i = 0; i < NSUBEXP; i++)
				3313	{
				3314	if (REG_MULTI)
				3315	{
				3316	/* Only accept single line matches. */
				3317	if (reg_startzpos[i].lnum >= 0
				3318	&& reg_endzpos[i].lnum == reg_startzpos[i].lnum)
				3319	re_extmatch_out->matches[i] =
				3320	vim_strnsave(reg_getline(reg_startzpos[i].lnum)
				3321	+ reg_startzpos[i].col,
				3322	reg_endzpos[i].col - reg_startzpos[i].col);
				3323	}
				3324	else
				3325	{
				3326	if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
				3327	re_extmatch_out->matches[i] =
				3328	vim_strnsave(reg_startzp[i],
				3329	(int)(reg_endzp[i] - reg_startzp[i]));
				3330	}
				3331	}
				3332	}
				3333	#endif
				3334	return 1 + reglnum;
				3335	}
				3336	return 0;
				3337	}
				3338
				3339	#ifdef FEAT_MBYTE
				3340	/* multi-byte: advance reginput with a function */
				3341	# define ADVANCE_REGINPUT() advance_reginput()
				3342
				3343	static void advance_reginput __ARGS((void));
				3344	static int reg_prev_class __ARGS((void));
				3345
				3346	static void
				3347	advance_reginput()
				3348	{
				3349	if (has_mbyte)
				3350	reginput += (*mb_ptr2len_check)(reginput);
				3351	else
				3352	++reginput;
				3353	}
				3354
				3355	/*
				3356	* Get class of previous character.
				3357	*/
				3358	static int
				3359	reg_prev_class()
				3360	{
				3361	if (reginput > regline)
				3362	return mb_get_class(reginput - 1
				3363	- (*mb_head_off)(regline, reginput - 1));
				3364	return -1;
				3365	}
				3366
				3367	#else
				3368	/* No multi-byte: It's too simple to make a function for. */
				3369	# define ADVANCE_REGINPUT() ++reginput
				3370	#endif
				3371
				3372	/*
				3373	* The arguments from BRACE_LIMITS are stored here. They are actually local
				3374	* to regmatch(), but they are here to reduce the amount of stack space used
				3375	* (it can be called recursively many times).
				3376	*/
				3377	static long bl_minval;
				3378	static long bl_maxval;
				3379
				3380	/*
				3381	* regmatch - main matching routine
				3382	*
				3383	* Conceptually the strategy is simple: Check to see whether the current
				3384	* node matches, call self recursively to see whether the rest matches,
				3385	* and then act accordingly. In practice we make some effort to avoid
				3386	* recursion, in particular by going through "ordinary" nodes (that don't
				3387	* need to know whether the rest of the match failed) by a loop instead of
				3388	* by recursion.
				3389	*
				3390	* Returns TRUE when there is a match. Leaves reginput and reglnum just after
				3391	* the last matched character.
				3392	* Returns FALSE when there is no match. Leaves reginput and reglnum in an
				3393	* undefined state!
				3394	*/
				3395	static int
				3396	regmatch(scan)
				3397	char_u scan; / Current node. */
				3398	{
				3399	char_u next; / Next node. */
				3400	int op;
				3401	int c;
				3402
				3403	#ifdef HAVE_GETRLIMIT
				3404	/* Check if we are running out of stack space. Could be caused by
				3405	* recursively calling ourselves. */
				3406	if (out_of_stack \|\| mch_stackcheck((char *)&op) == FAIL)
				3407	{
				3408	out_of_stack = TRUE;
				3409	return FALSE;
				3410	}
				3411	#endif
				3412
				3413	/* Some patterns my cause a long time to match, even though they are not
				3414	* illegal. E.g., "$[a-z]\+$\+Q". Allow breaking them with CTRL-C. */
				3415	fast_breakcheck();
				3416
				3417	#ifdef DEBUG
				3418	if (scan != NULL && regnarrate)
				3419	{
				3420	mch_errmsg(regprop(scan));
				3421	mch_errmsg("(\n");
				3422	}
				3423	#endif
				3424	while (scan != NULL)
				3425	{
				3426	if (got_int \|\| out_of_stack)
				3427	return FALSE;
				3428	#ifdef DEBUG
				3429	if (regnarrate)
				3430	{
				3431	mch_errmsg(regprop(scan));
				3432	mch_errmsg("...\n");
				3433	# ifdef FEAT_SYN_HL
				3434	if (re_extmatch_in != NULL)
				3435	{
				3436	int i;
				3437
				3438	mch_errmsg(_("External submatches:\n"));
				3439	for (i = 0; i < NSUBEXP; i++)
				3440	{
				3441	mch_errmsg(" \"");
				3442	if (re_extmatch_in->matches[i] != NULL)
				3443	mch_errmsg(re_extmatch_in->matches[i]);
				3444	mch_errmsg("\"\n");
				3445	}
				3446	}
				3447	# endif
				3448	}
				3449	#endif
				3450	next = regnext(scan);
				3451
				3452	op = OP(scan);
				3453	/* Check for character class with NL added. */
				3454	if (WITH_NL(op) && *reginput == NUL && reglnum < reg_maxline)
				3455	{
				3456	reg_nextline();
				3457	}
				3458	else if (reg_line_lbr && WITH_NL(op) && *reginput == '\n')
				3459	{
				3460	ADVANCE_REGINPUT();
				3461	}
				3462	else
				3463	{
				3464	if (WITH_NL(op))
				3465	op -= ADD_NL;
				3466	#ifdef FEAT_MBYTE
				3467	if (has_mbyte)
				3468	c = (*mb_ptr2char)(reginput);
				3469	else
				3470	#endif
				3471	c = *reginput;
				3472	switch (op)
				3473	{
				3474	case BOL:
				3475	if (reginput != regline)
				3476	return FALSE;
				3477	break;
				3478
				3479	case EOL:
				3480	if (c != NUL)
				3481	return FALSE;
				3482	break;
				3483
				3484	case RE_BOF:
				3485	/* Passing -1 to the getline() function provided for the search
				3486	* should always return NULL if the current line is the first
				3487	* line of the file. */
				3488	if (reglnum != 0 \|\| reginput != regline
				3489	\|\| (REG_MULTI && reg_getline((linenr_T)-1) != NULL))
				3490	return FALSE;
				3491	break;
				3492
				3493	case RE_EOF:
				3494	if (reglnum != reg_maxline \|\| c != NUL)
				3495	return FALSE;
				3496	break;
				3497
				3498	case CURSOR:
				3499	/* Check if the buffer is in a window and compare the
				3500	* reg_win->w_cursor position to the match position. */
				3501	if (reg_win == NULL
				3502	\|\| (reglnum + reg_firstlnum != reg_win->w_cursor.lnum)
				3503	\|\| ((colnr_T)(reginput - regline) != reg_win->w_cursor.col))
				3504	return FALSE;
				3505	break;
				3506
				3507	case RE_LNUM:
				3508	if (!REG_MULTI \|\| !re_num_cmp((long_u)(reglnum + reg_firstlnum),
				3509	scan))
				3510	return FALSE;
				3511	break;
				3512
				3513	case RE_COL:
				3514	if (!re_num_cmp((long_u)(reginput - regline) + 1, scan))
				3515	return FALSE;
				3516	break;
				3517
				3518	case RE_VCOL:
				3519	if (!re_num_cmp((long_u)win_linetabsize(
				3520	reg_win == NULL ? curwin : reg_win,
				3521	regline, (colnr_T)(reginput - regline)) + 1, scan))
				3522	return FALSE;
				3523	break;
				3524
				3525	case BOW: /* \<word; reginput points to w */
				3526	if (c == NUL) /* Can't match at end of line */
				3527	return FALSE;
				3528	#ifdef FEAT_MBYTE
				3529	if (has_mbyte)
				3530	{
				3531	int this_class;
				3532
				3533	/* Get class of current and previous char (if it exists). */
				3534	this_class = mb_get_class(reginput);
				3535	if (this_class <= 1)
				3536	return FALSE; /* not on a word at all */
				3537	if (reg_prev_class() == this_class)
				3538	return FALSE; /* previous char is in same word */
				3539	}
				3540	#endif
				3541	else
				3542	{
				3543	if (!vim_iswordc(c)
				3544	\|\| (reginput > regline && vim_iswordc(reginput[-1])))
				3545	return FALSE;
				3546	}
				3547	break;
				3548
				3549	case EOW: /* word\>; reginput points after d */
				3550	if (reginput == regline) /* Can't match at start of line */
				3551	return FALSE;
				3552	#ifdef FEAT_MBYTE
				3553	if (has_mbyte)
				3554	{
				3555	int this_class, prev_class;
				3556
				3557	/* Get class of current and previous char (if it exists). */
				3558	this_class = mb_get_class(reginput);
				3559	prev_class = reg_prev_class();
				3560	if (this_class == prev_class)
				3561	return FALSE;
				3562	if (prev_class == 0 \|\| prev_class == 1)
				3563	return FALSE;
				3564	}
				3565	else
				3566	#endif
				3567	{
				3568	if (!vim_iswordc(reginput[-1]))
				3569	return FALSE;
				3570	if (reginput[0] != NUL && vim_iswordc(c))
				3571	return FALSE;
				3572	}
				3573	break; /* Matched with EOW */
				3574
				3575	case ANY:
				3576	if (c == NUL)
				3577	return FALSE;
				3578	ADVANCE_REGINPUT();
				3579	break;
				3580
				3581	case IDENT:
				3582	if (!vim_isIDc(c))
				3583	return FALSE;
				3584	ADVANCE_REGINPUT();
				3585	break;
				3586
				3587	case SIDENT:
				3588	if (VIM_ISDIGIT(*reginput) \|\| !vim_isIDc(c))
				3589	return FALSE;
				3590	ADVANCE_REGINPUT();
				3591	break;
				3592
				3593	case KWORD:
				3594	if (!vim_iswordp(reginput))
				3595	return FALSE;
				3596	ADVANCE_REGINPUT();
				3597	break;
				3598
				3599	case SKWORD:
				3600	if (VIM_ISDIGIT(*reginput) \|\| !vim_iswordp(reginput))
				3601	return FALSE;
				3602	ADVANCE_REGINPUT();
				3603	break;
				3604
				3605	case FNAME:
				3606	if (!vim_isfilec(c))
				3607	return FALSE;
				3608	ADVANCE_REGINPUT();
				3609	break;
				3610
				3611	case SFNAME:
				3612	if (VIM_ISDIGIT(*reginput) \|\| !vim_isfilec(c))
				3613	return FALSE;
				3614	ADVANCE_REGINPUT();
				3615	break;
				3616
				3617	case PRINT:
				3618	if (ptr2cells(reginput) != 1)
				3619	return FALSE;
				3620	ADVANCE_REGINPUT();
				3621	break;
				3622
				3623	case SPRINT:
				3624	if (VIM_ISDIGIT(*reginput) \|\| ptr2cells(reginput) != 1)
				3625	return FALSE;
				3626	ADVANCE_REGINPUT();
				3627	break;
				3628
				3629	case WHITE:
				3630	if (!vim_iswhite(c))
				3631	return FALSE;
				3632	ADVANCE_REGINPUT();
				3633	break;
				3634
				3635	case NWHITE:
				3636	if (c == NUL \|\| vim_iswhite(c))
				3637	return FALSE;
				3638	ADVANCE_REGINPUT();
				3639	break;
				3640
				3641	case DIGIT:
				3642	if (!ri_digit(c))
				3643	return FALSE;
				3644	ADVANCE_REGINPUT();
				3645	break;
				3646
				3647	case NDIGIT:
				3648	if (c == NUL \|\| ri_digit(c))
				3649	return FALSE;
				3650	ADVANCE_REGINPUT();
				3651	break;
				3652
				3653	case HEX:
				3654	if (!ri_hex(c))
				3655	return FALSE;
				3656	ADVANCE_REGINPUT();
				3657	break;
				3658
				3659	case NHEX:
				3660	if (c == NUL \|\| ri_hex(c))
				3661	return FALSE;
				3662	ADVANCE_REGINPUT();
				3663	break;
				3664
				3665	case OCTAL:
				3666	if (!ri_octal(c))
				3667	return FALSE;
				3668	ADVANCE_REGINPUT();
				3669	break;
				3670
				3671	case NOCTAL:
				3672	if (c == NUL \|\| ri_octal(c))
				3673	return FALSE;
				3674	ADVANCE_REGINPUT();
				3675	break;
				3676
				3677	case WORD:
				3678	if (!ri_word(c))
				3679	return FALSE;
				3680	ADVANCE_REGINPUT();
				3681	break;
				3682
				3683	case NWORD:
				3684	if (c == NUL \|\| ri_word(c))
				3685	return FALSE;
				3686	ADVANCE_REGINPUT();
				3687	break;
				3688
				3689	case HEAD:
				3690	if (!ri_head(c))
				3691	return FALSE;
				3692	ADVANCE_REGINPUT();
				3693	break;
				3694
				3695	case NHEAD:
				3696	if (c == NUL \|\| ri_head(c))
				3697	return FALSE;
				3698	ADVANCE_REGINPUT();
				3699	break;
				3700
				3701	case ALPHA:
				3702	if (!ri_alpha(c))
				3703	return FALSE;
				3704	ADVANCE_REGINPUT();
				3705	break;
				3706
				3707	case NALPHA:
				3708	if (c == NUL \|\| ri_alpha(c))
				3709	return FALSE;
				3710	ADVANCE_REGINPUT();
				3711	break;
				3712
				3713	case LOWER:
				3714	if (!ri_lower(c))
				3715	return FALSE;
				3716	ADVANCE_REGINPUT();
				3717	break;
				3718
				3719	case NLOWER:
				3720	if (c == NUL \|\| ri_lower(c))
				3721	return FALSE;
				3722	ADVANCE_REGINPUT();
				3723	break;
				3724
				3725	case UPPER:
				3726	if (!ri_upper(c))
				3727	return FALSE;
				3728	ADVANCE_REGINPUT();
				3729	break;
				3730
				3731	case NUPPER:
				3732	if (c == NUL \|\| ri_upper(c))
				3733	return FALSE;
				3734	ADVANCE_REGINPUT();
				3735	break;
				3736
				3737	case EXACTLY:
				3738	{
				3739	int len;
				3740	char_u *opnd;
				3741
				3742	opnd = OPERAND(scan);
				3743	/* Inline the first byte, for speed. */
				3744	if (opnd != reginput
				3745	&& (!ireg_ic \|\| (
				3746	#ifdef FEAT_MBYTE
				3747	!enc_utf8 &&
				3748	#endif
				3749	TOLOWER_LOC(opnd) != TOLOWER_LOC(reginput))))
				3750	return FALSE;
				3751	if (*opnd == NUL)
				3752	{
				3753	/* match empty string always works; happens when "~" is
				3754	* empty. */
				3755	}
				3756	else if (opnd[1] == NUL
				3757	#ifdef FEAT_MBYTE
				3758	&& !(enc_utf8 && ireg_ic)
				3759	#endif
				3760	)
				3761	++reginput; /* matched a single char */
				3762	else
				3763	{
				3764	len = (int)STRLEN(opnd);
				3765	/* Need to match first byte again for multi-byte. */
				3766	if (cstrncmp(opnd, reginput, &len) != 0)
				3767	return FALSE;
				3768	#ifdef FEAT_MBYTE
				3769	/* Check for following composing character. */
				3770	if (enc_utf8 && UTF_COMPOSINGLIKE(reginput, reginput + len))
				3771	{
				3772	/* raaron: This code makes a composing character get
				3773	* ignored, which is the correct behavior (sometimes)
				3774	* for voweled Hebrew texts. */
				3775	if (!ireg_icombine)
				3776	return FALSE;
				3777	}
				3778	else
				3779	#endif
				3780	reginput += len;
				3781	}
				3782	}
				3783	break;
				3784
				3785	case ANYOF:
				3786	case ANYBUT:
				3787	if (c == NUL)
				3788	return FALSE;
				3789	if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
				3790	return FALSE;
				3791	ADVANCE_REGINPUT();
				3792	break;
				3793
				3794	#ifdef FEAT_MBYTE
				3795	case MULTIBYTECODE:
				3796	if (has_mbyte)
				3797	{
				3798	int i, len;
				3799	char_u *opnd;
				3800
				3801	opnd = OPERAND(scan);
				3802	/* Safety check (just in case 'encoding' was changed since
				3803	* compiling the program). */
				3804	if ((len = (*mb_ptr2len_check)(opnd)) < 2)
				3805	return FALSE;
				3806	for (i = 0; i < len; ++i)
				3807	if (opnd[i] != reginput[i])
				3808	return FALSE;
				3809	reginput += len;
				3810	}
				3811	else
				3812	return FALSE;
				3813	break;
				3814	#endif
				3815
				3816	case NOTHING:
				3817	break;
				3818
				3819	case BACK:
				3820	break;
				3821
				3822	case MOPEN + 0: /* Match start: \zs */
				3823	case MOPEN + 1: /* \( */
				3824	case MOPEN + 2:
				3825	case MOPEN + 3:
				3826	case MOPEN + 4:
				3827	case MOPEN + 5:
				3828	case MOPEN + 6:
				3829	case MOPEN + 7:
				3830	case MOPEN + 8:
				3831	case MOPEN + 9:
				3832	{
				3833	int no;
				3834	save_se_T save;
				3835
				3836	no = op - MOPEN;
				3837	cleanup_subexpr();
				3838	save_se(&save, &reg_startpos[no], &reg_startp[no]);
				3839
				3840	if (regmatch(next))
				3841	return TRUE;
				3842
				3843	restore_se(&save, &reg_startpos[no], &reg_startp[no]);
				3844	return FALSE;
				3845	}
				3846	/* break; Not Reached */
				3847
				3848	case NOPEN: /* \%( */
				3849	case NCLOSE: /* \) after \%( */
				3850	if (regmatch(next))
				3851	return TRUE;
				3852	return FALSE;
				3853	/* break; Not Reached */
				3854
				3855	#ifdef FEAT_SYN_HL
				3856	case ZOPEN + 1:
				3857	case ZOPEN + 2:
				3858	case ZOPEN + 3:
				3859	case ZOPEN + 4:
				3860	case ZOPEN + 5:
				3861	case ZOPEN + 6:
				3862	case ZOPEN + 7:
				3863	case ZOPEN + 8:
				3864	case ZOPEN + 9:
				3865	{
				3866	int no;
				3867	save_se_T save;
				3868
				3869	no = op - ZOPEN;
				3870	cleanup_zsubexpr();
				3871	save_se(&save, &reg_startzpos[no], &reg_startzp[no]);
				3872
				3873	if (regmatch(next))
				3874	return TRUE;
				3875
				3876	restore_se(&save, &reg_startzpos[no], &reg_startzp[no]);
				3877	return FALSE;
				3878	}
				3879	/* break; Not Reached */
				3880	#endif
				3881
				3882	case MCLOSE + 0: /* Match end: \ze */
				3883	case MCLOSE + 1: /* \) */
				3884	case MCLOSE + 2:
				3885	case MCLOSE + 3:
				3886	case MCLOSE + 4:
				3887	case MCLOSE + 5:
				3888	case MCLOSE + 6:
				3889	case MCLOSE + 7:
				3890	case MCLOSE + 8:
				3891	case MCLOSE + 9:
				3892	{
				3893	int no;
				3894	save_se_T save;
				3895
				3896	no = op - MCLOSE;
				3897	cleanup_subexpr();
				3898	save_se(&save, &reg_endpos[no], &reg_endp[no]);
				3899
				3900	if (regmatch(next))
				3901	return TRUE;
				3902
				3903	restore_se(&save, &reg_endpos[no], &reg_endp[no]);
				3904	return FALSE;
				3905	}
				3906	/* break; Not Reached */
				3907
				3908	#ifdef FEAT_SYN_HL
				3909	case ZCLOSE + 1: /* \) after \z( */
				3910	case ZCLOSE + 2:
				3911	case ZCLOSE + 3:
				3912	case ZCLOSE + 4:
				3913	case ZCLOSE + 5:
				3914	case ZCLOSE + 6:
				3915	case ZCLOSE + 7:
				3916	case ZCLOSE + 8:
				3917	case ZCLOSE + 9:
				3918	{
				3919	int no;
				3920	save_se_T save;
				3921
				3922	no = op - ZCLOSE;
				3923	cleanup_zsubexpr();
				3924	save_se(&save, &reg_endzpos[no], &reg_endzp[no]);
				3925
				3926	if (regmatch(next))
				3927	return TRUE;
				3928
				3929	restore_se(&save, &reg_endzpos[no], &reg_endzp[no]);
				3930	return FALSE;
				3931	}
				3932	/* break; Not Reached */
				3933	#endif
				3934
				3935	case BACKREF + 1:
				3936	case BACKREF + 2:
				3937	case BACKREF + 3:
				3938	case BACKREF + 4:
				3939	case BACKREF + 5:
				3940	case BACKREF + 6:
				3941	case BACKREF + 7:
				3942	case BACKREF + 8:
				3943	case BACKREF + 9:
				3944	{
				3945	int no;
				3946	int len;
				3947	linenr_T clnum;
				3948	colnr_T ccol;
				3949	char_u *p;
				3950
				3951	no = op - BACKREF;
				3952	cleanup_subexpr();
				3953	if (!REG_MULTI) /* Single-line regexp */
				3954	{
				3955	if (reg_endp[no] == NULL)
				3956	{
				3957	/* Backref was not set: Match an empty string. */
				3958	len = 0;
				3959	}
				3960	else
				3961	{
				3962	/* Compare current input with back-ref in the same
				3963	* line. */
				3964	len = (int)(reg_endp[no] - reg_startp[no]);
				3965	if (cstrncmp(reg_startp[no], reginput, &len) != 0)
				3966	return FALSE;
				3967	}
				3968	}
				3969	else /* Multi-line regexp */
				3970	{
				3971	if (reg_endpos[no].lnum < 0)
				3972	{
				3973	/* Backref was not set: Match an empty string. */
				3974	len = 0;
				3975	}
				3976	else
				3977	{
				3978	if (reg_startpos[no].lnum == reglnum
				3979	&& reg_endpos[no].lnum == reglnum)
				3980	{
				3981	/* Compare back-ref within the current line. */
				3982	len = reg_endpos[no].col - reg_startpos[no].col;
				3983	if (cstrncmp(regline + reg_startpos[no].col,
				3984	reginput, &len) != 0)
				3985	return FALSE;
				3986	}
				3987	else
				3988	{
				3989	/* Messy situation: Need to compare between two
				3990	* lines. */
				3991	ccol = reg_startpos[no].col;
				3992	clnum = reg_startpos[no].lnum;
				3993	for (;;)
				3994	{
				3995	/* Since getting one line may invalidate
				3996	* the other, need to make copy. Slow! */
				3997	if (regline != reg_tofree)
				3998	{
				3999	len = (int)STRLEN(regline);
				4000	if (reg_tofree == NULL
				4001	\|\| len >= (int)reg_tofreelen)
				4002	{
				4003	len += 50; /* get some extra */
				4004	vim_free(reg_tofree);
				4005	reg_tofree = alloc(len);
				4006	if (reg_tofree == NULL)
				4007	return FALSE; /* out of memory! */
				4008	reg_tofreelen = len;
				4009	}
				4010	STRCPY(reg_tofree, regline);
				4011	reginput = reg_tofree
				4012	+ (reginput - regline);
				4013	regline = reg_tofree;
				4014	}
				4015
				4016	/* Get the line to compare with. */
				4017	p = reg_getline(clnum);
				4018	if (clnum == reg_endpos[no].lnum)
				4019	len = reg_endpos[no].col - ccol;
				4020	else
				4021	len = (int)STRLEN(p + ccol);
				4022
				4023	if (cstrncmp(p + ccol, reginput, &len) != 0)
				4024	return FALSE; /* doesn't match */
				4025	if (clnum == reg_endpos[no].lnum)
				4026	break; /* match and at end! */
				4027	if (reglnum == reg_maxline)
				4028	return FALSE; /* text too short */
				4029
				4030	/* Advance to next line. */
				4031	reg_nextline();
				4032	++clnum;
				4033	ccol = 0;
				4034	if (got_int \|\| out_of_stack)
				4035	return FALSE;
				4036	}
				4037
				4038	/* found a match! Note that regline may now point
				4039	* to a copy of the line, that should not matter. */
				4040	}
				4041	}
				4042	}
				4043
				4044	/* Matched the backref, skip over it. */
				4045	reginput += len;
				4046	}
				4047	break;
				4048
				4049	#ifdef FEAT_SYN_HL
				4050	case ZREF + 1:
				4051	case ZREF + 2:
				4052	case ZREF + 3:
				4053	case ZREF + 4:
				4054	case ZREF + 5:
				4055	case ZREF + 6:
				4056	case ZREF + 7:
				4057	case ZREF + 8:
				4058	case ZREF + 9:
				4059	{
				4060	int no;
				4061	int len;
				4062
				4063	cleanup_zsubexpr();
				4064	no = op - ZREF;
				4065	if (re_extmatch_in != NULL
				4066	&& re_extmatch_in->matches[no] != NULL)
				4067	{
				4068	len = (int)STRLEN(re_extmatch_in->matches[no]);
				4069	if (cstrncmp(re_extmatch_in->matches[no],
				4070	reginput, &len) != 0)
				4071	return FALSE;
				4072	reginput += len;
				4073	}
				4074	else
				4075	{
				4076	/* Backref was not set: Match an empty string. */
				4077	}
				4078	}
				4079	break;
				4080	#endif
				4081
				4082	case BRANCH:
				4083	{
				4084	if (OP(next) != BRANCH) /* No choice. */
				4085	next = OPERAND(scan); /* Avoid recursion. */
				4086	else
				4087	{
				4088	regsave_T save;
				4089
				4090	do
				4091	{
				4092	reg_save(&save);
				4093	if (regmatch(OPERAND(scan)))
				4094	return TRUE;
				4095	reg_restore(&save);
				4096	scan = regnext(scan);
				4097	} while (scan != NULL && OP(scan) == BRANCH);
				4098	return FALSE;
				4099	/* NOTREACHED */
				4100	}
				4101	}
				4102	break;
				4103
				4104	case BRACE_LIMITS:
				4105	{
				4106	int no;
				4107
				4108	if (OP(next) == BRACE_SIMPLE)
				4109	{
				4110	bl_minval = OPERAND_MIN(scan);
				4111	bl_maxval = OPERAND_MAX(scan);
				4112	}
				4113	else if (OP(next) >= BRACE_COMPLEX
				4114	&& OP(next) < BRACE_COMPLEX + 10)
				4115	{
				4116	no = OP(next) - BRACE_COMPLEX;
				4117	brace_min[no] = OPERAND_MIN(scan);
				4118	brace_max[no] = OPERAND_MAX(scan);
				4119	brace_count[no] = 0;
				4120	}
				4121	else
				4122	{
				4123	EMSG(_(e_internal)); /* Shouldn't happen */
				4124	return FALSE;
				4125	}
				4126	}
				4127	break;
				4128
				4129	case BRACE_COMPLEX + 0:
				4130	case BRACE_COMPLEX + 1:
				4131	case BRACE_COMPLEX + 2:
				4132	case BRACE_COMPLEX + 3:
				4133	case BRACE_COMPLEX + 4:
				4134	case BRACE_COMPLEX + 5:
				4135	case BRACE_COMPLEX + 6:
				4136	case BRACE_COMPLEX + 7:
				4137	case BRACE_COMPLEX + 8:
				4138	case BRACE_COMPLEX + 9:
				4139	{
				4140	int no;
				4141	regsave_T save;
				4142
				4143	no = op - BRACE_COMPLEX;
				4144	++brace_count[no];
				4145
				4146	/* If not matched enough times yet, try one more */
				4147	if (brace_count[no] <= (brace_min[no] <= brace_max[no]
				4148	? brace_min[no] : brace_max[no]))
				4149	{
				4150	reg_save(&save);
				4151	if (regmatch(OPERAND(scan)))
				4152	return TRUE;
				4153	reg_restore(&save);
				4154	--brace_count[no]; /* failed, decrement match count */
				4155	return FALSE;
				4156	}
				4157
				4158	/* If matched enough times, may try matching some more */
				4159	if (brace_min[no] <= brace_max[no])
				4160	{
				4161	/* Range is the normal way around, use longest match */
				4162	if (brace_count[no] <= brace_max[no])
				4163	{
				4164	reg_save(&save);
				4165	if (regmatch(OPERAND(scan)))
				4166	return TRUE; /* matched some more times */
				4167	reg_restore(&save);
				4168	--brace_count[no]; /* matched just enough times */
				4169	/* continue with the items after \{} */
				4170	}
				4171	}
				4172	else
				4173	{
				4174	/* Range is backwards, use shortest match first */
				4175	if (brace_count[no] <= brace_min[no])
				4176	{
				4177	reg_save(&save);
				4178	if (regmatch(next))
				4179	return TRUE;
				4180	reg_restore(&save);
				4181	next = OPERAND(scan);
				4182	/* must try to match one more item */
				4183	}
				4184	}
				4185	}
				4186	break;
				4187
				4188	case BRACE_SIMPLE:
				4189	case STAR:
				4190	case PLUS:
				4191	{
				4192	int nextb; /* next byte */
				4193	int nextb_ic; /* next byte reverse case */
				4194	long count;
				4195	regsave_T save;
				4196	long minval;
				4197	long maxval;
				4198
				4199	/*
				4200	* Lookahead to avoid useless match attempts when we know
				4201	* what character comes next.
				4202	*/
				4203	if (OP(next) == EXACTLY)
				4204	{
				4205	nextb = *OPERAND(next);
				4206	if (ireg_ic)
				4207	{
				4208	if (isupper(nextb))
				4209	nextb_ic = TOLOWER_LOC(nextb);
				4210	else
				4211	nextb_ic = TOUPPER_LOC(nextb);
				4212	}
				4213	else
				4214	nextb_ic = nextb;
				4215	}
				4216	else
				4217	{
				4218	nextb = NUL;
				4219	nextb_ic = NUL;
				4220	}
				4221	if (op != BRACE_SIMPLE)
				4222	{
				4223	minval = (op == STAR) ? 0 : 1;
				4224	maxval = MAX_LIMIT;
				4225	}
				4226	else
				4227	{
				4228	minval = bl_minval;
				4229	maxval = bl_maxval;
				4230	}
				4231
				4232	/*
				4233	* When maxval > minval, try matching as much as possible, up
				4234	* to maxval. When maxval < minval, try matching at least the
				4235	* minimal number (since the range is backwards, that's also
				4236	* maxval!).
				4237	*/
				4238	count = regrepeat(OPERAND(scan), maxval);
				4239	if (got_int)
				4240	return FALSE;
				4241	if (minval <= maxval)
				4242	{
				4243	/* Range is the normal way around, use longest match */
				4244	while (count >= minval)
				4245	{
				4246	/* If it could match, try it. */
				4247	if (nextb == NUL \|\| *reginput == nextb
				4248	\|\| *reginput == nextb_ic)
				4249	{
				4250	reg_save(&save);
				4251	if (regmatch(next))
				4252	return TRUE;
				4253	reg_restore(&save);
				4254	}
				4255	/* Couldn't or didn't match -- back up one char. */
				4256	if (--count < minval)
				4257	break;
				4258	if (reginput == regline)
				4259	{
				4260	/* backup to last char of previous line */
				4261	--reglnum;
				4262	regline = reg_getline(reglnum);
				4263	/* Just in case regrepeat() didn't count right. */
				4264	if (regline == NULL)
				4265	return FALSE;
				4266	reginput = regline + STRLEN(regline);
				4267	fast_breakcheck();
				4268	if (got_int \|\| out_of_stack)
				4269	return FALSE;
				4270	}
				4271	else
				4272	{
				4273	--reginput;
				4274	#ifdef FEAT_MBYTE
				4275	if (has_mbyte)
				4276	reginput -= (*mb_head_off)(regline, reginput);
				4277	#endif
				4278	}
				4279	}
				4280	}
				4281	else
				4282	{
				4283	/* Range is backwards, use shortest match first.
				4284	* Careful: maxval and minval are exchanged! */
				4285	if (count < maxval)
				4286	return FALSE;
				4287	for (;;)
				4288	{
				4289	/* If it could work, try it. */
				4290	if (nextb == NUL \|\| *reginput == nextb
				4291	\|\| *reginput == nextb_ic)
				4292	{
				4293	reg_save(&save);
				4294	if (regmatch(next))
				4295	return TRUE;
				4296	reg_restore(&save);
				4297	}
				4298	/* Couldn't or didn't match: try advancing one char. */
				4299	if (count == minval
				4300	\|\| regrepeat(OPERAND(scan), 1L) == 0)
				4301	break;
				4302	++count;
				4303	if (got_int \|\| out_of_stack)
				4304	return FALSE;
				4305	}
				4306	}
				4307	return FALSE;
				4308	}
				4309	/* break; Not Reached */
				4310
				4311	case NOMATCH:
				4312	{
				4313	regsave_T save;
				4314
				4315	/* If the operand matches, we fail. Otherwise backup and
				4316	* continue with the next item. */
				4317	reg_save(&save);
				4318	if (regmatch(OPERAND(scan)))
				4319	return FALSE;
				4320	reg_restore(&save);
				4321	}
				4322	break;
				4323
				4324	case MATCH:
				4325	case SUBPAT:
				4326	{
				4327	regsave_T save;
				4328
				4329	/* If the operand doesn't match, we fail. Otherwise backup
				4330	* and continue with the next item. */
				4331	reg_save(&save);
				4332	if (!regmatch(OPERAND(scan)))
				4333	return FALSE;
				4334	if (op == MATCH) /* zero-width */
				4335	reg_restore(&save);
				4336	}
				4337	break;
				4338
				4339	case BEHIND:
				4340	case NOBEHIND:
				4341	{
				4342	regsave_T save_after, save_start;
				4343	regsave_T save_behind_pos;
				4344	int needmatch = (op == BEHIND);
				4345
				4346	/*
				4347	* Look back in the input of the operand matches or not. This
				4348	* must be done at every position in the input and checking if
				4349	* the match ends at the current position.
				4350	* First check if the next item matches, that's probably
				4351	* faster.
				4352	*/
				4353	reg_save(&save_start);
				4354	if (regmatch(next))
				4355	{
				4356	/* save the position after the found match for next */
				4357	reg_save(&save_after);
				4358
				4359	/* start looking for a match with operand at the current
				4360	* postion. Go back one character until we find the
				4361	* result, hitting the start of the line or the previous
				4362	* line (for multi-line matching).
				4363	* Set behind_pos to where the match should end, BHPOS
				4364	* will match it. */
				4365	save_behind_pos = behind_pos;
				4366	behind_pos = save_start;
				4367	for (;;)
				4368	{
				4369	reg_restore(&save_start);
				4370	if (regmatch(OPERAND(scan))
				4371	&& reg_save_equal(&behind_pos))
				4372	{
				4373	behind_pos = save_behind_pos;
				4374	/* found a match that ends where "next" started */
				4375	if (needmatch)
				4376	{
				4377	reg_restore(&save_after);
				4378	return TRUE;
				4379	}
				4380	return FALSE;
				4381	}
				4382	/*
				4383	* No match: Go back one character. May go to
				4384	* previous line once.
				4385	*/
				4386	if (REG_MULTI)
				4387	{
				4388	if (save_start.rs_u.pos.col == 0)
				4389	{
				4390	if (save_start.rs_u.pos.lnum
				4391	< behind_pos.rs_u.pos.lnum
				4392	\|\| reg_getline(
				4393	--save_start.rs_u.pos.lnum) == NULL)
				4394	break;
				4395	reg_restore(&save_start);
				4396	save_start.rs_u.pos.col =
				4397	(colnr_T)STRLEN(regline);
				4398	}
				4399	else
				4400	--save_start.rs_u.pos.col;
				4401	}
				4402	else
				4403	{
				4404	if (save_start.rs_u.ptr == regline)
				4405	break;
				4406	--save_start.rs_u.ptr;
				4407	}
				4408	}
				4409
				4410	/* NOBEHIND succeeds when no match was found */
				4411	behind_pos = save_behind_pos;
				4412	if (!needmatch)
				4413	{
				4414	reg_restore(&save_after);
				4415	return TRUE;
				4416	}
				4417	}
				4418	return FALSE;
				4419	}
				4420
				4421	case BHPOS:
				4422	if (REG_MULTI)
				4423	{
				4424	if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline)
				4425	\|\| behind_pos.rs_u.pos.lnum != reglnum)
				4426	return FALSE;
				4427	}
				4428	else if (behind_pos.rs_u.ptr != reginput)
				4429	return FALSE;
				4430	break;
				4431
				4432	case NEWL:
				4433	if ((c != NUL \|\| reglnum == reg_maxline)
				4434	&& (c != '\n' \|\| !reg_line_lbr))
				4435	return FALSE;
				4436	if (reg_line_lbr)
				4437	ADVANCE_REGINPUT();
				4438	else
				4439	reg_nextline();
				4440	break;
				4441
				4442	case END:
				4443	return TRUE; /* Success! */
				4444
				4445	default:
				4446	EMSG(_(e_re_corr));
				4447	#ifdef DEBUG
				4448	printf("Illegal op code %d\n", op);
				4449	#endif
				4450	return FALSE;
				4451	}
				4452	}
				4453
				4454	scan = next;
				4455	}
				4456
				4457	/*
				4458	* We get here only if there's trouble -- normally "case END" is the
				4459	* terminating point.
				4460	*/
				4461	EMSG(_(e_re_corr));
				4462	#ifdef DEBUG
				4463	printf("Premature EOL\n");
				4464	#endif
				4465	return FALSE;
				4466	}
				4467
				4468	#ifdef FEAT_MBYTE
				4469	# define ADVANCE_P(x) if (has_mbyte) x += (*mb_ptr2len_check)(x); else ++x
				4470	#else
				4471	# define ADVANCE_P(x) ++x
				4472	#endif
				4473
				4474	/*
				4475	* regrepeat - repeatedly match something simple, return how many.
				4476	* Advances reginput (and reglnum) to just after the matched chars.
				4477	*/
				4478	static int
				4479	regrepeat(p, maxcount)
				4480	char_u *p;
				4481	long maxcount; /* maximum number of matches allowed */
				4482	{
				4483	long count = 0;
				4484	char_u *scan;
				4485	char_u *opnd;
				4486	int mask;
				4487	int testval = 0;
				4488
				4489	scan = reginput; /* Make local copy of reginput for speed. */
				4490	opnd = OPERAND(p);
				4491	switch (OP(p))
				4492	{
				4493	case ANY:
				4494	case ANY + ADD_NL:
				4495	while (count < maxcount)
				4496	{
				4497	/* Matching anything means we continue until end-of-line (or
				4498	* end-of-file for ANY + ADD_NL), only limited by maxcount. */
				4499	while (*scan != NUL && count < maxcount)
				4500	{
				4501	++count;
				4502	ADVANCE_P(scan);
				4503	}
				4504	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline \|\| count == maxcount)
				4505	break;
				4506	++count; /* count the line-break */
				4507	reg_nextline();
				4508	scan = reginput;
				4509	if (got_int)
				4510	break;
				4511	}
				4512	break;
				4513
				4514	case IDENT:
				4515	case IDENT + ADD_NL:
				4516	testval = TRUE;
				4517	/FALLTHROUGH/
				4518	case SIDENT:
				4519	case SIDENT + ADD_NL:
				4520	while (count < maxcount)
				4521	{
				4522	if (vim_isIDc(scan) && (testval \|\| !VIM_ISDIGIT(scan)))
				4523	{
				4524	ADVANCE_P(scan);
				4525	}
				4526	else if (*scan == NUL)
				4527	{
				4528	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4529	break;
				4530	reg_nextline();
				4531	scan = reginput;
				4532	if (got_int)
				4533	break;
				4534	}
				4535	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4536	++scan;
				4537	else
				4538	break;
				4539	++count;
				4540	}
				4541	break;
				4542
				4543	case KWORD:
				4544	case KWORD + ADD_NL:
				4545	testval = TRUE;
				4546	/FALLTHROUGH/
				4547	case SKWORD:
				4548	case SKWORD + ADD_NL:
				4549	while (count < maxcount)
				4550	{
				4551	if (vim_iswordp(scan) && (testval \|\| !VIM_ISDIGIT(*scan)))
				4552	{
				4553	ADVANCE_P(scan);
				4554	}
				4555	else if (*scan == NUL)
				4556	{
				4557	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4558	break;
				4559	reg_nextline();
				4560	scan = reginput;
				4561	if (got_int)
				4562	break;
				4563	}
				4564	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4565	++scan;
				4566	else
				4567	break;
				4568	++count;
				4569	}
				4570	break;
				4571
				4572	case FNAME:
				4573	case FNAME + ADD_NL:
				4574	testval = TRUE;
				4575	/FALLTHROUGH/
				4576	case SFNAME:
				4577	case SFNAME + ADD_NL:
				4578	while (count < maxcount)
				4579	{
				4580	if (vim_isfilec(scan) && (testval \|\| !VIM_ISDIGIT(scan)))
				4581	{
				4582	ADVANCE_P(scan);
				4583	}
				4584	else if (*scan == NUL)
				4585	{
				4586	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4587	break;
				4588	reg_nextline();
				4589	scan = reginput;
				4590	if (got_int)
				4591	break;
				4592	}
				4593	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4594	++scan;
				4595	else
				4596	break;
				4597	++count;
				4598	}
				4599	break;
				4600
				4601	case PRINT:
				4602	case PRINT + ADD_NL:
				4603	testval = TRUE;
				4604	/FALLTHROUGH/
				4605	case SPRINT:
				4606	case SPRINT + ADD_NL:
				4607	while (count < maxcount)
				4608	{
				4609	if (*scan == NUL)
				4610	{
				4611	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4612	break;
				4613	reg_nextline();
				4614	scan = reginput;
				4615	if (got_int)
				4616	break;
				4617	}
				4618	else if (ptr2cells(scan) == 1 && (testval \|\| !VIM_ISDIGIT(*scan)))
				4619	{
				4620	ADVANCE_P(scan);
				4621	}
				4622	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4623	++scan;
				4624	else
				4625	break;
				4626	++count;
				4627	}
				4628	break;
				4629
				4630	case WHITE:
				4631	case WHITE + ADD_NL:
				4632	testval = mask = RI_WHITE;
				4633	do_class:
				4634	while (count < maxcount)
				4635	{
				4636	#ifdef FEAT_MBYTE
				4637	int l;
				4638	#endif
				4639	if (*scan == NUL)
				4640	{
				4641	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4642	break;
				4643	reg_nextline();
				4644	scan = reginput;
				4645	if (got_int)
				4646	break;
				4647	}
				4648	#ifdef FEAT_MBYTE
				4649	else if (has_mbyte && (l = (*mb_ptr2len_check)(scan)) > 1)
				4650	{
				4651	if (testval != 0)
				4652	break;
				4653	scan += l;
				4654	}
				4655	#endif
				4656	else if ((class_tab[*scan] & mask) == testval)
				4657	++scan;
				4658	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4659	++scan;
				4660	else
				4661	break;
				4662	++count;
				4663	}
				4664	break;
				4665
				4666	case NWHITE:
				4667	case NWHITE + ADD_NL:
				4668	mask = RI_WHITE;
				4669	goto do_class;
				4670	case DIGIT:
				4671	case DIGIT + ADD_NL:
				4672	testval = mask = RI_DIGIT;
				4673	goto do_class;
				4674	case NDIGIT:
				4675	case NDIGIT + ADD_NL:
				4676	mask = RI_DIGIT;
				4677	goto do_class;
				4678	case HEX:
				4679	case HEX + ADD_NL:
				4680	testval = mask = RI_HEX;
				4681	goto do_class;
				4682	case NHEX:
				4683	case NHEX + ADD_NL:
				4684	mask = RI_HEX;
				4685	goto do_class;
				4686	case OCTAL:
				4687	case OCTAL + ADD_NL:
				4688	testval = mask = RI_OCTAL;
				4689	goto do_class;
				4690	case NOCTAL:
				4691	case NOCTAL + ADD_NL:
				4692	mask = RI_OCTAL;
				4693	goto do_class;
				4694	case WORD:
				4695	case WORD + ADD_NL:
				4696	testval = mask = RI_WORD;
				4697	goto do_class;
				4698	case NWORD:
				4699	case NWORD + ADD_NL:
				4700	mask = RI_WORD;
				4701	goto do_class;
				4702	case HEAD:
				4703	case HEAD + ADD_NL:
				4704	testval = mask = RI_HEAD;
				4705	goto do_class;
				4706	case NHEAD:
				4707	case NHEAD + ADD_NL:
				4708	mask = RI_HEAD;
				4709	goto do_class;
				4710	case ALPHA:
				4711	case ALPHA + ADD_NL:
				4712	testval = mask = RI_ALPHA;
				4713	goto do_class;
				4714	case NALPHA:
				4715	case NALPHA + ADD_NL:
				4716	mask = RI_ALPHA;
				4717	goto do_class;
				4718	case LOWER:
				4719	case LOWER + ADD_NL:
				4720	testval = mask = RI_LOWER;
				4721	goto do_class;
				4722	case NLOWER:
				4723	case NLOWER + ADD_NL:
				4724	mask = RI_LOWER;
				4725	goto do_class;
				4726	case UPPER:
				4727	case UPPER + ADD_NL:
				4728	testval = mask = RI_UPPER;
				4729	goto do_class;
				4730	case NUPPER:
				4731	case NUPPER + ADD_NL:
				4732	mask = RI_UPPER;
				4733	goto do_class;
				4734
				4735	case EXACTLY:
				4736	{
				4737	int cu, cl;
				4738
				4739	/* This doesn't do a multi-byte character, because a MULTIBYTECODE
				4740	* would have been used for it. */
				4741	if (ireg_ic)
				4742	{
				4743	cu = TOUPPER_LOC(*opnd);
				4744	cl = TOLOWER_LOC(*opnd);
				4745	while (count < maxcount && (scan == cu \|\| scan == cl))
				4746	{
				4747	count++;
				4748	scan++;
				4749	}
				4750	}
				4751	else
				4752	{
				4753	cu = *opnd;
				4754	while (count < maxcount && *scan == cu)
				4755	{
				4756	count++;
				4757	scan++;
				4758	}
				4759	}
				4760	break;
				4761	}
				4762
				4763	#ifdef FEAT_MBYTE
				4764	case MULTIBYTECODE:
				4765	{
				4766	int i, len, cf = 0;
				4767
				4768	/* Safety check (just in case 'encoding' was changed since
				4769	* compiling the program). */
				4770	if ((len = (*mb_ptr2len_check)(opnd)) > 1)
				4771	{
				4772	if (ireg_ic && enc_utf8)
				4773	cf = utf_fold(utf_ptr2char(opnd));
				4774	while (count < maxcount)
				4775	{
				4776	for (i = 0; i < len; ++i)
				4777	if (opnd[i] != scan[i])
				4778	break;
				4779	if (i < len && (!ireg_ic \|\| !enc_utf8
				4780	\|\| utf_fold(utf_ptr2char(scan)) != cf))
				4781	break;
				4782	scan += len;
				4783	++count;
				4784	}
				4785	}
				4786	}
				4787	break;
				4788	#endif
				4789
				4790	case ANYOF:
				4791	case ANYOF + ADD_NL:
				4792	testval = TRUE;
				4793	/FALLTHROUGH/
				4794
				4795	case ANYBUT:
				4796	case ANYBUT + ADD_NL:
				4797	while (count < maxcount)
				4798	{
				4799	#ifdef FEAT_MBYTE
				4800	int len;
				4801	#endif
				4802	if (*scan == NUL)
				4803	{
				4804	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4805	break;
				4806	reg_nextline();
				4807	scan = reginput;
				4808	if (got_int)
				4809	break;
				4810	}
				4811	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4812	++scan;
				4813	#ifdef FEAT_MBYTE
				4814	else if (has_mbyte && (len = (*mb_ptr2len_check)(scan)) > 1)
				4815	{
				4816	if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
				4817	break;
				4818	scan += len;
				4819	}
				4820	#endif
				4821	else
				4822	{
				4823	if ((cstrchr(opnd, *scan) == NULL) == testval)
				4824	break;
				4825	++scan;
				4826	}
				4827	++count;
				4828	}
				4829	break;
				4830
				4831	case NEWL:
				4832	while (count < maxcount
				4833	&& ((*scan == NUL && reglnum < reg_maxline)
				4834	\|\| (*scan == '\n' && reg_line_lbr)))
				4835	{
				4836	count++;
				4837	if (reg_line_lbr)
				4838	ADVANCE_REGINPUT();
				4839	else
				4840	reg_nextline();
				4841	scan = reginput;
				4842	if (got_int)
				4843	break;
				4844	}
				4845	break;
				4846
				4847	default: /* Oh dear. Called inappropriately. */
				4848	EMSG(_(e_re_corr));
				4849	#ifdef DEBUG
				4850	printf("Called regrepeat with op code %d\n", OP(p));
				4851	#endif
				4852	break;
				4853	}
				4854
				4855	reginput = scan;
				4856
				4857	return (int)count;
				4858	}
				4859
				4860	/*
				4861	* regnext - dig the "next" pointer out of a node
				4862	*/
				4863	static char_u *
				4864	regnext(p)
				4865	char_u *p;
				4866	{
				4867	int offset;
				4868
				4869	if (p == JUST_CALC_SIZE)
				4870	return NULL;
				4871
				4872	offset = NEXT(p);
				4873	if (offset == 0)
				4874	return NULL;
				4875
				4876	if (OP(p) == BACK)
				4877	return p - offset;
				4878	else
				4879	return p + offset;
				4880	}
				4881
				4882	/*
				4883	* Check the regexp program for its magic number.
				4884	* Return TRUE if it's wrong.
				4885	*/
				4886	static int
				4887	prog_magic_wrong()
				4888	{
				4889	if (UCHARAT(REG_MULTI
				4890	? reg_mmatch->regprog->program
				4891	: reg_match->regprog->program) != REGMAGIC)
				4892	{
				4893	EMSG(_(e_re_corr));
				4894	return TRUE;
				4895	}
				4896	return FALSE;
				4897	}
				4898
				4899	/*
				4900	* Cleanup the subexpressions, if this wasn't done yet.
				4901	* This construction is used to clear the subexpressions only when they are
				4902	* used (to increase speed).
				4903	*/
				4904	static void
				4905	cleanup_subexpr()
				4906	{
				4907	if (need_clear_subexpr)
				4908	{
				4909	if (REG_MULTI)
				4910	{
				4911	/* Use 0xff to set lnum to -1 */
				4912	vim_memset(reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				4913	vim_memset(reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				4914	}
				4915	else
				4916	{
				4917	vim_memset(reg_startp, 0, sizeof(char_u ) NSUBEXP);
				4918	vim_memset(reg_endp, 0, sizeof(char_u ) NSUBEXP);
				4919	}
				4920	need_clear_subexpr = FALSE;
				4921	}
				4922	}
				4923
				4924	#ifdef FEAT_SYN_HL
				4925	static void
				4926	cleanup_zsubexpr()
				4927	{
				4928	if (need_clear_zsubexpr)
				4929	{
				4930	if (REG_MULTI)
				4931	{
				4932	/* Use 0xff to set lnum to -1 */
				4933	vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				4934	vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				4935	}
				4936	else
				4937	{
				4938	vim_memset(reg_startzp, 0, sizeof(char_u ) NSUBEXP);
				4939	vim_memset(reg_endzp, 0, sizeof(char_u ) NSUBEXP);
				4940	}
				4941	need_clear_zsubexpr = FALSE;
				4942	}
				4943	}
				4944	#endif
				4945
				4946	/*
				4947	* Advance reglnum, regline and reginput to the next line.
				4948	*/
				4949	static void
				4950	reg_nextline()
				4951	{
				4952	regline = reg_getline(++reglnum);
				4953	reginput = regline;
				4954	fast_breakcheck();
				4955	}
				4956
				4957	/*
				4958	* Save the input line and position in a regsave_T.
				4959	*/
				4960	static void
				4961	reg_save(save)
				4962	regsave_T *save;
				4963	{
				4964	if (REG_MULTI)
				4965	{
				4966	save->rs_u.pos.col = (colnr_T)(reginput - regline);
				4967	save->rs_u.pos.lnum = reglnum;
				4968	}
				4969	else
				4970	save->rs_u.ptr = reginput;
				4971	}
				4972
				4973	/*
				4974	* Restore the input line and position from a regsave_T.
				4975	*/
				4976	static void
				4977	reg_restore(save)
				4978	regsave_T *save;
				4979	{
				4980	if (REG_MULTI)
				4981	{
				4982	if (reglnum != save->rs_u.pos.lnum)
				4983	{
				4984	/* only call reg_getline() when the line number changed to save
				4985	* a bit of time */
				4986	reglnum = save->rs_u.pos.lnum;
				4987	regline = reg_getline(reglnum);
				4988	}
				4989	reginput = regline + save->rs_u.pos.col;
				4990	}
				4991	else
				4992	reginput = save->rs_u.ptr;
				4993	}
				4994
				4995	/*
				4996	* Return TRUE if current position is equal to saved position.
				4997	*/
				4998	static int
				4999	reg_save_equal(save)
				5000	regsave_T *save;
				5001	{
				5002	if (REG_MULTI)
				5003	return reglnum == save->rs_u.pos.lnum
				5004	&& reginput == regline + save->rs_u.pos.col;
				5005	return reginput == save->rs_u.ptr;
				5006	}
				5007
				5008	/*
				5009	* Tentatively set the sub-expression start to the current position (after
				5010	* calling regmatch() they will have changed). Need to save the existing
				5011	* values for when there is no match.
				5012	* Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
				5013	* depending on REG_MULTI.
				5014	*/
				5015	static void
				5016	save_se_multi(savep, posp)
				5017	save_se_T *savep;
				5018	lpos_T *posp;
				5019	{
				5020	savep->se_u.pos = *posp;
				5021	posp->lnum = reglnum;
				5022	posp->col = (colnr_T)(reginput - regline);
				5023	}
				5024
				5025	static void
				5026	save_se_one(savep, pp)
				5027	save_se_T *savep;
				5028	char_u **pp;
				5029	{
				5030	savep->se_u.ptr = *pp;
				5031	*pp = reginput;
				5032	}
				5033
				5034	/*
				5035	* Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
				5036	*/
				5037	static int
				5038	re_num_cmp(val, scan)
				5039	long_u val;
				5040	char_u *scan;
				5041	{
				5042	long_u n = OPERAND_MIN(scan);
				5043
				5044	if (OPERAND_CMP(scan) == '>')
				5045	return val > n;
				5046	if (OPERAND_CMP(scan) == '<')
				5047	return val < n;
				5048	return val == n;
				5049	}
				5050
				5051
				5052	#ifdef DEBUG
				5053
				5054	/*
				5055	* regdump - dump a regexp onto stdout in vaguely comprehensible form
				5056	*/
				5057	static void
				5058	regdump(pattern, r)
				5059	char_u *pattern;
				5060	regprog_T *r;
				5061	{
				5062	char_u *s;
				5063	int op = EXACTLY; /* Arbitrary non-END op. */
				5064	char_u *next;
				5065	char_u *end = NULL;
				5066
				5067	printf("\r\nregcomp(%s):\r\n", pattern);
				5068
				5069	s = r->program + 1;
				5070	/*
				5071	* Loop until we find the END that isn't before a referred next (an END
				5072	* can also appear in a NOMATCH operand).
				5073	*/
				5074	while (op != END \|\| s <= end)
				5075	{
				5076	op = OP(s);
				5077	printf("%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
				5078	next = regnext(s);
				5079	if (next == NULL) /* Next ptr. */
				5080	printf("(0)");
				5081	else
				5082	printf("(%d)", (int)((s - r->program) + (next - s)));
				5083	if (end < next)
				5084	end = next;
				5085	if (op == BRACE_LIMITS)
				5086	{
				5087	/* Two short ints */
				5088	printf(" minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
				5089	s += 8;
				5090	}
				5091	s += 3;
				5092	if (op == ANYOF \|\| op == ANYOF + ADD_NL
				5093	\|\| op == ANYBUT \|\| op == ANYBUT + ADD_NL
				5094	\|\| op == EXACTLY)
				5095	{
				5096	/* Literal string, where present. */
				5097	while (*s != NUL)
				5098	printf("%c", *s++);
				5099	s++;
				5100	}
				5101	printf("\r\n");
				5102	}
				5103
				5104	/* Header fields of interest. */
				5105	if (r->regstart != NUL)
				5106	printf("start `%s' 0x%x; ", r->regstart < 256
				5107	? (char *)transchar(r->regstart)
				5108	: "multibyte", r->regstart);
				5109	if (r->reganch)
				5110	printf("anchored; ");
				5111	if (r->regmust != NULL)
				5112	printf("must have \"%s\"", r->regmust);
				5113	printf("\r\n");
				5114	}
				5115
				5116	/*
				5117	* regprop - printable representation of opcode
				5118	*/
				5119	static char_u *
				5120	regprop(op)
				5121	char_u *op;
				5122	{
				5123	char_u *p;
				5124	static char_u buf[50];
				5125
				5126	(void) strcpy(buf, ":");
				5127
				5128	switch (OP(op))
				5129	{
				5130	case BOL:
				5131	p = "BOL";
				5132	break;
				5133	case EOL:
				5134	p = "EOL";
				5135	break;
				5136	case RE_BOF:
				5137	p = "BOF";
				5138	break;
				5139	case RE_EOF:
				5140	p = "EOF";
				5141	break;
				5142	case CURSOR:
				5143	p = "CURSOR";
				5144	break;
				5145	case RE_LNUM:
				5146	p = "RE_LNUM";
				5147	break;
				5148	case RE_COL:
				5149	p = "RE_COL";
				5150	break;
				5151	case RE_VCOL:
				5152	p = "RE_VCOL";
				5153	break;
				5154	case BOW:
				5155	p = "BOW";
				5156	break;
				5157	case EOW:
				5158	p = "EOW";
				5159	break;
				5160	case ANY:
				5161	p = "ANY";
				5162	break;
				5163	case ANY + ADD_NL:
				5164	p = "ANY+NL";
				5165	break;
				5166	case ANYOF:
				5167	p = "ANYOF";
				5168	break;
				5169	case ANYOF + ADD_NL:
				5170	p = "ANYOF+NL";
				5171	break;
				5172	case ANYBUT:
				5173	p = "ANYBUT";
				5174	break;
				5175	case ANYBUT + ADD_NL:
				5176	p = "ANYBUT+NL";
				5177	break;
				5178	case IDENT:
				5179	p = "IDENT";
				5180	break;
				5181	case IDENT + ADD_NL:
				5182	p = "IDENT+NL";
				5183	break;
				5184	case SIDENT:
				5185	p = "SIDENT";
				5186	break;
				5187	case SIDENT + ADD_NL:
				5188	p = "SIDENT+NL";
				5189	break;
				5190	case KWORD:
				5191	p = "KWORD";
				5192	break;
				5193	case KWORD + ADD_NL:
				5194	p = "KWORD+NL";
				5195	break;
				5196	case SKWORD:
				5197	p = "SKWORD";
				5198	break;
				5199	case SKWORD + ADD_NL:
				5200	p = "SKWORD+NL";
				5201	break;
				5202	case FNAME:
				5203	p = "FNAME";
				5204	break;
				5205	case FNAME + ADD_NL:
				5206	p = "FNAME+NL";
				5207	break;
				5208	case SFNAME:
				5209	p = "SFNAME";
				5210	break;
				5211	case SFNAME + ADD_NL:
				5212	p = "SFNAME+NL";
				5213	break;
				5214	case PRINT:
				5215	p = "PRINT";
				5216	break;
				5217	case PRINT + ADD_NL:
				5218	p = "PRINT+NL";
				5219	break;
				5220	case SPRINT:
				5221	p = "SPRINT";
				5222	break;
				5223	case SPRINT + ADD_NL:
				5224	p = "SPRINT+NL";
				5225	break;
				5226	case WHITE:
				5227	p = "WHITE";
				5228	break;
				5229	case WHITE + ADD_NL:
				5230	p = "WHITE+NL";
				5231	break;
				5232	case NWHITE:
				5233	p = "NWHITE";
				5234	break;
				5235	case NWHITE + ADD_NL:
				5236	p = "NWHITE+NL";
				5237	break;
				5238	case DIGIT:
				5239	p = "DIGIT";
				5240	break;
				5241	case DIGIT + ADD_NL:
				5242	p = "DIGIT+NL";
				5243	break;
				5244	case NDIGIT:
				5245	p = "NDIGIT";
				5246	break;
				5247	case NDIGIT + ADD_NL:
				5248	p = "NDIGIT+NL";
				5249	break;
				5250	case HEX:
				5251	p = "HEX";
				5252	break;
				5253	case HEX + ADD_NL:
				5254	p = "HEX+NL";
				5255	break;
				5256	case NHEX:
				5257	p = "NHEX";
				5258	break;
				5259	case NHEX + ADD_NL:
				5260	p = "NHEX+NL";
				5261	break;
				5262	case OCTAL:
				5263	p = "OCTAL";
				5264	break;
				5265	case OCTAL + ADD_NL:
				5266	p = "OCTAL+NL";
				5267	break;
				5268	case NOCTAL:
				5269	p = "NOCTAL";
				5270	break;
				5271	case NOCTAL + ADD_NL:
				5272	p = "NOCTAL+NL";
				5273	break;
				5274	case WORD:
				5275	p = "WORD";
				5276	break;
				5277	case WORD + ADD_NL:
				5278	p = "WORD+NL";
				5279	break;
				5280	case NWORD:
				5281	p = "NWORD";
				5282	break;
				5283	case NWORD + ADD_NL:
				5284	p = "NWORD+NL";
				5285	break;
				5286	case HEAD:
				5287	p = "HEAD";
				5288	break;
				5289	case HEAD + ADD_NL:
				5290	p = "HEAD+NL";
				5291	break;
				5292	case NHEAD:
				5293	p = "NHEAD";
				5294	break;
				5295	case NHEAD + ADD_NL:
				5296	p = "NHEAD+NL";
				5297	break;
				5298	case ALPHA:
				5299	p = "ALPHA";
				5300	break;
				5301	case ALPHA + ADD_NL:
				5302	p = "ALPHA+NL";
				5303	break;
				5304	case NALPHA:
				5305	p = "NALPHA";
				5306	break;
				5307	case NALPHA + ADD_NL:
				5308	p = "NALPHA+NL";
				5309	break;
				5310	case LOWER:
				5311	p = "LOWER";
				5312	break;
				5313	case LOWER + ADD_NL:
				5314	p = "LOWER+NL";
				5315	break;
				5316	case NLOWER:
				5317	p = "NLOWER";
				5318	break;
				5319	case NLOWER + ADD_NL:
				5320	p = "NLOWER+NL";
				5321	break;
				5322	case UPPER:
				5323	p = "UPPER";
				5324	break;
				5325	case UPPER + ADD_NL:
				5326	p = "UPPER+NL";
				5327	break;
				5328	case NUPPER:
				5329	p = "NUPPER";
				5330	break;
				5331	case NUPPER + ADD_NL:
				5332	p = "NUPPER+NL";
				5333	break;
				5334	case BRANCH:
				5335	p = "BRANCH";
				5336	break;
				5337	case EXACTLY:
				5338	p = "EXACTLY";
				5339	break;
				5340	case NOTHING:
				5341	p = "NOTHING";
				5342	break;
				5343	case BACK:
				5344	p = "BACK";
				5345	break;
				5346	case END:
				5347	p = "END";
				5348	break;
				5349	case MOPEN + 0:
				5350	p = "MATCH START";
				5351	break;
				5352	case MOPEN + 1:
				5353	case MOPEN + 2:
				5354	case MOPEN + 3:
				5355	case MOPEN + 4:
				5356	case MOPEN + 5:
				5357	case MOPEN + 6:
				5358	case MOPEN + 7:
				5359	case MOPEN + 8:
				5360	case MOPEN + 9:
				5361	sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
				5362	p = NULL;
				5363	break;
				5364	case MCLOSE + 0:
				5365	p = "MATCH END";
				5366	break;
				5367	case MCLOSE + 1:
				5368	case MCLOSE + 2:
				5369	case MCLOSE + 3:
				5370	case MCLOSE + 4:
				5371	case MCLOSE + 5:
				5372	case MCLOSE + 6:
				5373	case MCLOSE + 7:
				5374	case MCLOSE + 8:
				5375	case MCLOSE + 9:
				5376	sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
				5377	p = NULL;
				5378	break;
				5379	case BACKREF + 1:
				5380	case BACKREF + 2:
				5381	case BACKREF + 3:
				5382	case BACKREF + 4:
				5383	case BACKREF + 5:
				5384	case BACKREF + 6:
				5385	case BACKREF + 7:
				5386	case BACKREF + 8:
				5387	case BACKREF + 9:
				5388	sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
				5389	p = NULL;
				5390	break;
				5391	case NOPEN:
				5392	p = "NOPEN";
				5393	break;
				5394	case NCLOSE:
				5395	p = "NCLOSE";
				5396	break;
				5397	#ifdef FEAT_SYN_HL
				5398	case ZOPEN + 1:
				5399	case ZOPEN + 2:
				5400	case ZOPEN + 3:
				5401	case ZOPEN + 4:
				5402	case ZOPEN + 5:
				5403	case ZOPEN + 6:
				5404	case ZOPEN + 7:
				5405	case ZOPEN + 8:
				5406	case ZOPEN + 9:
				5407	sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
				5408	p = NULL;
				5409	break;
				5410	case ZCLOSE + 1:
				5411	case ZCLOSE + 2:
				5412	case ZCLOSE + 3:
				5413	case ZCLOSE + 4:
				5414	case ZCLOSE + 5:
				5415	case ZCLOSE + 6:
				5416	case ZCLOSE + 7:
				5417	case ZCLOSE + 8:
				5418	case ZCLOSE + 9:
				5419	sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
				5420	p = NULL;
				5421	break;
				5422	case ZREF + 1:
				5423	case ZREF + 2:
				5424	case ZREF + 3:
				5425	case ZREF + 4:
				5426	case ZREF + 5:
				5427	case ZREF + 6:
				5428	case ZREF + 7:
				5429	case ZREF + 8:
				5430	case ZREF + 9:
				5431	sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
				5432	p = NULL;
				5433	break;
				5434	#endif
				5435	case STAR:
				5436	p = "STAR";
				5437	break;
				5438	case PLUS:
				5439	p = "PLUS";
				5440	break;
				5441	case NOMATCH:
				5442	p = "NOMATCH";
				5443	break;
				5444	case MATCH:
				5445	p = "MATCH";
				5446	break;
				5447	case BEHIND:
				5448	p = "BEHIND";
				5449	break;
				5450	case NOBEHIND:
				5451	p = "NOBEHIND";
				5452	break;
				5453	case SUBPAT:
				5454	p = "SUBPAT";
				5455	break;
				5456	case BRACE_LIMITS:
				5457	p = "BRACE_LIMITS";
				5458	break;
				5459	case BRACE_SIMPLE:
				5460	p = "BRACE_SIMPLE";
				5461	break;
				5462	case BRACE_COMPLEX + 0:
				5463	case BRACE_COMPLEX + 1:
				5464	case BRACE_COMPLEX + 2:
				5465	case BRACE_COMPLEX + 3:
				5466	case BRACE_COMPLEX + 4:
				5467	case BRACE_COMPLEX + 5:
				5468	case BRACE_COMPLEX + 6:
				5469	case BRACE_COMPLEX + 7:
				5470	case BRACE_COMPLEX + 8:
				5471	case BRACE_COMPLEX + 9:
				5472	sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
				5473	p = NULL;
				5474	break;
				5475	#ifdef FEAT_MBYTE
				5476	case MULTIBYTECODE:
				5477	p = "MULTIBYTECODE";
				5478	break;
				5479	#endif
				5480	case NEWL:
				5481	p = "NEWL";
				5482	break;
				5483	default:
				5484	sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
				5485	p = NULL;
				5486	break;
				5487	}
				5488	if (p != NULL)
				5489	(void) strcat(buf, p);
				5490	return buf;
				5491	}
				5492	#endif
				5493
				5494	#ifdef FEAT_MBYTE
				5495	static void mb_decompose __ARGS((int c, int c1, int c2, int *c3));
				5496
				5497	typedef struct
				5498	{
				5499	int a, b, c;
				5500	} decomp_T;
				5501
				5502
				5503	/* 0xfb20 - 0xfb4f */
				5504	decomp_T decomp_table[0xfb4f-0xfb20+1] =
				5505	{
				5506	{0x5e2,0,0}, /* 0xfb20 alt ayin */
				5507	{0x5d0,0,0}, /* 0xfb21 alt alef */
				5508	{0x5d3,0,0}, /* 0xfb22 alt dalet */
				5509	{0x5d4,0,0}, /* 0xfb23 alt he */
				5510	{0x5db,0,0}, /* 0xfb24 alt kaf */
				5511	{0x5dc,0,0}, /* 0xfb25 alt lamed */
				5512	{0x5dd,0,0}, /* 0xfb26 alt mem-sofit */
				5513	{0x5e8,0,0}, /* 0xfb27 alt resh */
				5514	{0x5ea,0,0}, /* 0xfb28 alt tav */
				5515	{'+', 0, 0}, /* 0xfb29 alt plus */
				5516	{0x5e9, 0x5c1, 0}, /* 0xfb2a shin+shin-dot */
				5517	{0x5e9, 0x5c2, 0}, /* 0xfb2b shin+sin-dot */
				5518	{0x5e9, 0x5c1, 0x5bc}, /* 0xfb2c shin+shin-dot+dagesh */
				5519	{0x5e9, 0x5c2, 0x5bc}, /* 0xfb2d shin+sin-dot+dagesh */
				5520	{0x5d0, 0x5b7, 0}, /* 0xfb2e alef+patah */
				5521	{0x5d0, 0x5b8, 0}, /* 0xfb2f alef+qamats */
				5522	{0x5d0, 0x5b4, 0}, /* 0xfb30 alef+hiriq */
				5523	{0x5d1, 0x5bc, 0}, /* 0xfb31 bet+dagesh */
				5524	{0x5d2, 0x5bc, 0}, /* 0xfb32 gimel+dagesh */
				5525	{0x5d3, 0x5bc, 0}, /* 0xfb33 dalet+dagesh */
				5526	{0x5d4, 0x5bc, 0}, /* 0xfb34 he+dagesh */
				5527	{0x5d5, 0x5bc, 0}, /* 0xfb35 vav+dagesh */
				5528	{0x5d6, 0x5bc, 0}, /* 0xfb36 zayin+dagesh */
				5529	{0xfb37, 0, 0}, /* 0xfb37 -- UNUSED */
				5530	{0x5d8, 0x5bc, 0}, /* 0xfb38 tet+dagesh */
				5531	{0x5d9, 0x5bc, 0}, /* 0xfb39 yud+dagesh */
				5532	{0x5da, 0x5bc, 0}, /* 0xfb3a kaf sofit+dagesh */
				5533	{0x5db, 0x5bc, 0}, /* 0xfb3b kaf+dagesh */
				5534	{0x5dc, 0x5bc, 0}, /* 0xfb3c lamed+dagesh */
				5535	{0xfb3d, 0, 0}, /* 0xfb3d -- UNUSED */
				5536	{0x5de, 0x5bc, 0}, /* 0xfb3e mem+dagesh */
				5537	{0xfb3f, 0, 0}, /* 0xfb3f -- UNUSED */
				5538	{0x5e0, 0x5bc, 0}, /* 0xfb40 nun+dagesh */
				5539	{0x5e1, 0x5bc, 0}, /* 0xfb41 samech+dagesh */
				5540	{0xfb42, 0, 0}, /* 0xfb42 -- UNUSED */
				5541	{0x5e3, 0x5bc, 0}, /* 0xfb43 pe sofit+dagesh */
				5542	{0x5e4, 0x5bc,0}, /* 0xfb44 pe+dagesh */
				5543	{0xfb45, 0, 0}, /* 0xfb45 -- UNUSED */
				5544	{0x5e6, 0x5bc, 0}, /* 0xfb46 tsadi+dagesh */
				5545	{0x5e7, 0x5bc, 0}, /* 0xfb47 qof+dagesh */
				5546	{0x5e8, 0x5bc, 0}, /* 0xfb48 resh+dagesh */
				5547	{0x5e9, 0x5bc, 0}, /* 0xfb49 shin+dagesh */
				5548	{0x5ea, 0x5bc, 0}, /* 0xfb4a tav+dagesh */
				5549	{0x5d5, 0x5b9, 0}, /* 0xfb4b vav+holam */
				5550	{0x5d1, 0x5bf, 0}, /* 0xfb4c bet+rafe */
				5551	{0x5db, 0x5bf, 0}, /* 0xfb4d kaf+rafe */
				5552	{0x5e4, 0x5bf, 0}, /* 0xfb4e pe+rafe */
				5553	{0x5d0, 0x5dc, 0} /* 0xfb4f alef-lamed */
				5554	};
				5555
				5556	static void
				5557	mb_decompose(c, c1, c2, c3)
				5558	int c, c1, c2, *c3;
				5559	{
				5560	decomp_T d;
				5561
				5562	if (c >= 0x4b20 && c <= 0xfb4f)
				5563	{
				5564	d = decomp_table[c - 0xfb20];
				5565	*c1 = d.a;
				5566	*c2 = d.b;
				5567	*c3 = d.c;
				5568	}
				5569	else
				5570	{
				5571	*c1 = c;
				5572	c2 = c3 = 0;
				5573	}
				5574	}
				5575	#endif
				5576
				5577	/*
				5578	* Compare two strings, ignore case if ireg_ic set.
				5579	* Return 0 if strings match, non-zero otherwise.
				5580	* Correct the length "*n" when composing characters are ignored.
				5581	*/
				5582	static int
				5583	cstrncmp(s1, s2, n)
				5584	char_u s1, s2;
				5585	int *n;
				5586	{
				5587	int result;
				5588
				5589	if (!ireg_ic)
				5590	result = STRNCMP(s1, s2, *n);
				5591	else
				5592	result = MB_STRNICMP(s1, s2, *n);
				5593
				5594	#ifdef FEAT_MBYTE
				5595	/* if it failed and it's utf8 and we want to combineignore: */
				5596	if (result != 0 && enc_utf8 && ireg_icombine)
				5597	{
				5598	char_u str1, str2;
				5599	int c1, c2, c11, c12;
				5600	int ix;
				5601	int junk;
				5602
				5603	/* we have to handle the strcmp ourselves, since it is necessary to
				5604	* deal with the composing characters by ignoring them: */
				5605	str1 = s1;
				5606	str2 = s2;
				5607	c1 = c2 = 0;
				5608	for (ix = 0; ix < *n; )
				5609	{
				5610	c1 = mb_ptr2char_adv(&str1);
				5611	c2 = mb_ptr2char_adv(&str2);
				5612	ix += utf_char2len(c1);
				5613
				5614	/* decompose the character if necessary, into 'base' characters
				5615	* because I don't care about Arabic, I will hard-code the Hebrew
				5616	* which I do care about! So sue me... */
				5617	if (c1 != c2 && (!ireg_ic \|\| utf_fold(c1) != utf_fold(c2)))
				5618	{
				5619	/* decomposition necessary? */
				5620	mb_decompose(c1, &c11, &junk, &junk);
				5621	mb_decompose(c2, &c12, &junk, &junk);
				5622	c1 = c11;
				5623	c2 = c12;
				5624	if (c11 != c12 && (!ireg_ic \|\| utf_fold(c11) != utf_fold(c12)))
				5625	break;
				5626	}
				5627	}
				5628	result = c2 - c1;
				5629	if (result == 0)
				5630	*n = (int)(str2 - s2);
				5631	}
				5632	#endif
				5633
				5634	return result;
				5635	}
				5636
				5637	/*
				5638	* cstrchr: This function is used a lot for simple searches, keep it fast!
				5639	*/
				5640	static char_u *
				5641	cstrchr(s, c)
				5642	char_u *s;
				5643	int c;
				5644	{
				5645	char_u *p;
				5646	int cc;
				5647
				5648	if (!ireg_ic
				5649	#ifdef FEAT_MBYTE
				5650	\|\| (!enc_utf8 && mb_char2len(c) > 1)
				5651	#endif
				5652	)
				5653	return vim_strchr(s, c);
				5654
				5655	/* tolower() and toupper() can be slow, comparing twice should be a lot
				5656	* faster (esp. when using MS Visual C++!).
				5657	* For UTF-8 need to use folded case. */
				5658	#ifdef FEAT_MBYTE
				5659	if (enc_utf8 && c > 0x80)
				5660	cc = utf_fold(c);
				5661	else
				5662	#endif
				5663	if (isupper(c))
				5664	cc = TOLOWER_LOC(c);
				5665	else if (islower(c))
				5666	cc = TOUPPER_LOC(c);
				5667	else
				5668	return vim_strchr(s, c);
				5669
				5670	#ifdef FEAT_MBYTE
				5671	if (has_mbyte)
				5672	{
				5673	for (p = s; p != NUL; p += (mb_ptr2len_check)(p))
				5674	{
				5675	if (enc_utf8 && c > 0x80)
				5676	{
				5677	if (utf_fold(utf_ptr2char(p)) == cc)
				5678	return p;
				5679	}
				5680	else if (p == c \|\| p == cc)
				5681	return p;
				5682	}
				5683	}
				5684	else
				5685	#endif
				5686	/* Faster version for when there are no multi-byte characters. */
				5687	for (p = s; *p != NUL; ++p)
				5688	if (p == c \|\| p == cc)
				5689	return p;
				5690
				5691	return NULL;
				5692	}
				5693
				5694	/***************************************************************
				5695	* regsub stuff *
				5696	***************************************************************/
				5697
				5698	/* This stuff below really confuses cc on an SGI -- webb */
				5699	#ifdef __sgi
				5700	# undef __ARGS
				5701	# define __ARGS(x) ()
				5702	#endif
				5703
				5704	/*
				5705	* We should define ftpr as a pointer to a function returning a pointer to
				5706	* a function returning a pointer to a function ...
				5707	* This is impossible, so we declare a pointer to a function returning a
				5708	* pointer to a function returning void. This should work for all compilers.
				5709	*/
				5710	typedef void ((fptr) __ARGS((char_u *, int)))();
				5711
				5712	static fptr do_upper __ARGS((char_u *, int));
				5713	static fptr do_Upper __ARGS((char_u *, int));
				5714	static fptr do_lower __ARGS((char_u *, int));
				5715	static fptr do_Lower __ARGS((char_u *, int));
				5716
				5717	static int vim_regsub_both __ARGS((char_u source, char_u dest, int copy, int magic, int backslash));
				5718
				5719	static fptr
				5720	do_upper(d, c)
				5721	char_u *d;
				5722	int c;
				5723	{
				5724	*d = TOUPPER_LOC(c);
				5725
				5726	return (fptr)NULL;
				5727	}
				5728
				5729	static fptr
				5730	do_Upper(d, c)
				5731	char_u *d;
				5732	int c;
				5733	{
				5734	*d = TOUPPER_LOC(c);
				5735
				5736	return (fptr)do_Upper;
				5737	}
				5738
				5739	static fptr
				5740	do_lower(d, c)
				5741	char_u *d;
				5742	int c;
				5743	{
				5744	*d = TOLOWER_LOC(c);
				5745
				5746	return (fptr)NULL;
				5747	}
				5748
				5749	static fptr
				5750	do_Lower(d, c)
				5751	char_u *d;
				5752	int c;
				5753	{
				5754	*d = TOLOWER_LOC(c);
				5755
				5756	return (fptr)do_Lower;
				5757	}
				5758
				5759	/*
				5760	* regtilde(): Replace tildes in the pattern by the old pattern.
				5761	*
				5762	* Short explanation of the tilde: It stands for the previous replacement
				5763	* pattern. If that previous pattern also contains a ~ we should go back a
				5764	* step further... But we insert the previous pattern into the current one
				5765	* and remember that.
				5766	* This still does not handle the case where "magic" changes. TODO?
				5767	*
				5768	* The tildes are parsed once before the first call to vim_regsub().
				5769	*/
				5770	char_u *
				5771	regtilde(source, magic)
				5772	char_u *source;
				5773	int magic;
				5774	{
				5775	char_u *newsub = source;
				5776	char_u *tmpsub;
				5777	char_u *p;
				5778	int len;
				5779	int prevlen;
				5780
				5781	for (p = newsub; *p; ++p)
				5782	{
				5783	if ((p == '~' && magic) \|\| (p == '\\' && *(p + 1) == '~' && !magic))
				5784	{
				5785	if (reg_prev_sub != NULL)
				5786	{
				5787	/* length = len(newsub) - 1 + len(prev_sub) + 1 */
				5788	prevlen = (int)STRLEN(reg_prev_sub);
				5789	tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen));
				5790	if (tmpsub != NULL)
				5791	{
				5792	/* copy prefix */
				5793	len = (int)(p - newsub); /* not including ~ */
				5794	mch_memmove(tmpsub, newsub, (size_t)len);
				5795	/* interpretate tilde */
				5796	mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
				5797	/* copy postfix */
				5798	if (!magic)
				5799	++p; /* back off \ */
				5800	STRCPY(tmpsub + len + prevlen, p + 1);
				5801
				5802	if (newsub != source) /* already allocated newsub */
				5803	vim_free(newsub);
				5804	newsub = tmpsub;
				5805	p = newsub + len + prevlen;
				5806	}
				5807	}
				5808	else if (magic)
				5809	STRCPY(p, p + 1); /* remove '~' */
				5810	else
				5811	STRCPY(p, p + 2); /* remove '\~' */
				5812	--p;
				5813	}
				5814	else
				5815	{
				5816	if (p == '\\' && p[1]) / skip escaped characters */
				5817	++p;
				5818	#ifdef FEAT_MBYTE
				5819	if (has_mbyte)
				5820	p += (*mb_ptr2len_check)(p) - 1;
				5821	#endif
				5822	}
				5823	}
				5824
				5825	vim_free(reg_prev_sub);
				5826	if (newsub != source) /* newsub was allocated, just keep it */
				5827	reg_prev_sub = newsub;
				5828	else /* no ~ found, need to save newsub */
				5829	reg_prev_sub = vim_strsave(newsub);
				5830	return newsub;
				5831	}
				5832
				5833	#ifdef FEAT_EVAL
				5834	static int can_f_submatch = FALSE; /* TRUE when submatch() can be used */
				5835
				5836	/* These pointers are used instead of reg_match and reg_mmatch for
				5837	* reg_submatch(). Needed for when the substitution string is an expression
				5838	* that contains a call to substitute() and submatch(). */
				5839	static regmatch_T *submatch_match;
				5840	static regmmatch_T *submatch_mmatch;
				5841	#endif
				5842
				5843	#if defined(FEAT_MODIFY_FNAME) \|\| defined(FEAT_EVAL) \|\| defined(PROTO)
				5844	/*
				5845	* vim_regsub() - perform substitutions after a vim_regexec() or
				5846	* vim_regexec_multi() match.
				5847	*
				5848	* If "copy" is TRUE really copy into "dest".
				5849	* If "copy" is FALSE nothing is copied, this is just to find out the length
				5850	* of the result.
				5851	*
				5852	* If "backslash" is TRUE, a backslash will be removed later, need to double
				5853	* them to keep them, and insert a backslash before a CR to avoid it being
				5854	* replaced with a line break later.
				5855	*
				5856	* Note: The matched text must not change between the call of
				5857	* vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
				5858	* references invalid!
				5859	*
				5860	* Returns the size of the replacement, including terminating NUL.
				5861	*/
				5862	int
				5863	vim_regsub(rmp, source, dest, copy, magic, backslash)
				5864	regmatch_T *rmp;
				5865	char_u *source;
				5866	char_u *dest;
				5867	int copy;
				5868	int magic;
				5869	int backslash;
				5870	{
				5871	reg_match = rmp;
				5872	reg_mmatch = NULL;
				5873	reg_maxline = 0;
				5874	return vim_regsub_both(source, dest, copy, magic, backslash);
				5875	}
				5876	#endif
				5877
				5878	int
				5879	vim_regsub_multi(rmp, lnum, source, dest, copy, magic, backslash)
				5880	regmmatch_T *rmp;
				5881	linenr_T lnum;
				5882	char_u *source;
				5883	char_u *dest;
				5884	int copy;
				5885	int magic;
				5886	int backslash;
				5887	{
				5888	reg_match = NULL;
				5889	reg_mmatch = rmp;
				5890	reg_buf = curbuf; /* always works on the current buffer! */
				5891	reg_firstlnum = lnum;
				5892	reg_maxline = curbuf->b_ml.ml_line_count - lnum;
				5893	return vim_regsub_both(source, dest, copy, magic, backslash);
				5894	}
				5895
				5896	static int
				5897	vim_regsub_both(source, dest, copy, magic, backslash)
				5898	char_u *source;
				5899	char_u *dest;
				5900	int copy;
				5901	int magic;
				5902	int backslash;
				5903	{
				5904	char_u *src;
				5905	char_u *dst;
				5906	char_u *s;
				5907	int c;
				5908	int no = -1;
				5909	fptr func = (fptr)NULL;
				5910	linenr_T clnum = 0; /* init for GCC */
				5911	int len = 0; /* init for GCC */
				5912	#ifdef FEAT_EVAL
				5913	static char_u *eval_result = NULL;
				5914	#endif
				5915	#ifdef FEAT_MBYTE
				5916	int l;
				5917	#endif
				5918
				5919
				5920	/* Be paranoid... */
				5921	if (source == NULL \|\| dest == NULL)
				5922	{
				5923	EMSG(_(e_null));
				5924	return 0;
				5925	}
				5926	if (prog_magic_wrong())
				5927	return 0;
				5928	src = source;
				5929	dst = dest;
				5930
				5931	/*
				5932	* When the substitute part starts with "\=" evaluate it as an expression.
				5933	*/
				5934	if (source[0] == '\\' && source[1] == '='
				5935	#ifdef FEAT_EVAL
				5936	&& !can_f_submatch /* can't do this recursively */
				5937	#endif
				5938	)
				5939	{
				5940	#ifdef FEAT_EVAL
				5941	/* To make sure that the length doesn't change between checking the
				5942	* length and copying the string, and to speed up things, the
				5943	* resulting string is saved from the call with "copy" == FALSE to the
				5944	* call with "copy" == TRUE. */
				5945	if (copy)
				5946	{
				5947	if (eval_result != NULL)
				5948	{
				5949	STRCPY(dest, eval_result);
				5950	dst += STRLEN(eval_result);
				5951	vim_free(eval_result);
				5952	eval_result = NULL;
				5953	}
				5954	}
				5955	else
				5956	{
				5957	linenr_T save_reg_maxline;
				5958	win_T *save_reg_win;
				5959	int save_ireg_ic;
				5960
				5961	vim_free(eval_result);
				5962
				5963	/* The expression may contain substitute(), which calls us
				5964	* recursively. Make sure submatch() gets the text from the first
				5965	* level. Don't need to save "reg_buf", because
				5966	* vim_regexec_multi() can't be called recursively. */
				5967	submatch_match = reg_match;
				5968	submatch_mmatch = reg_mmatch;
				5969	save_reg_maxline = reg_maxline;
				5970	save_reg_win = reg_win;
				5971	save_ireg_ic = ireg_ic;
				5972	can_f_submatch = TRUE;
				5973
				5974	eval_result = eval_to_string(source + 2, NULL);
				5975	if (eval_result != NULL)
				5976	{
				5977	for (s = eval_result; *s != NUL; ++s)
				5978	{
				5979	/* Change NL to CR, so that it becomes a line break.
				5980	* Skip over a backslashed character. */
				5981	if (*s == NL)
				5982	*s = CAR;
				5983	else if (*s == '\\' && s[1] != NUL)
				5984	++s;
				5985	#ifdef FEAT_MBYTE
				5986	if (has_mbyte)
				5987	s += (*mb_ptr2len_check)(s) - 1;
				5988	#endif
				5989	}
				5990
				5991	dst += STRLEN(eval_result);
				5992	}
				5993
				5994	reg_match = submatch_match;
				5995	reg_mmatch = submatch_mmatch;
				5996	reg_maxline = save_reg_maxline;
				5997	reg_win = save_reg_win;
				5998	ireg_ic = save_ireg_ic;
				5999	can_f_submatch = FALSE;
				6000	}
				6001	#endif
				6002	}
				6003	else
				6004	while ((c = *src++) != NUL)
				6005	{
				6006	if (c == '&' && magic)
				6007	no = 0;
				6008	else if (c == '\\' && *src != NUL)
				6009	{
				6010	if (*src == '&' && !magic)
				6011	{
				6012	++src;
				6013	no = 0;
				6014	}
				6015	else if ('0' <= src && src <= '9')
				6016	{
				6017	no = *src++ - '0';
				6018	}
				6019	else if (vim_strchr((char_u )"uUlLeE", src))
				6020	{
				6021	switch (*src++)
				6022	{
				6023	case 'u': func = (fptr)do_upper;
				6024	continue;
				6025	case 'U': func = (fptr)do_Upper;
				6026	continue;
				6027	case 'l': func = (fptr)do_lower;
				6028	continue;
				6029	case 'L': func = (fptr)do_Lower;
				6030	continue;
				6031	case 'e':
				6032	case 'E': func = (fptr)NULL;
				6033	continue;
				6034	}
				6035	}
				6036	}
				6037	if (no < 0) /* Ordinary character. */
				6038	{
				6039	if (c == '\\' && *src != NUL)
				6040	{
				6041	/* Check for abbreviations -- webb */
				6042	switch (*src)
				6043	{
				6044	case 'r': c = CAR; ++src; break;
				6045	case 'n': c = NL; ++src; break;
				6046	case 't': c = TAB; ++src; break;
				6047	/* Oh no! \e already has meaning in subst pat :-( */
				6048	/* case 'e': c = ESC; ++src; break; */
				6049	case 'b': c = Ctrl_H; ++src; break;
				6050
				6051	/* If "backslash" is TRUE the backslash will be removed
				6052	* later. Used to insert a literal CR. */
				6053	default: if (backslash)
				6054	{
				6055	if (copy)
				6056	*dst = '\\';
				6057	++dst;
				6058	}
				6059	c = *src++;
				6060	}
				6061	}
				6062
				6063	/* Write to buffer, if copy is set. */
				6064	#ifdef FEAT_MBYTE
				6065	if (has_mbyte && (l = (*mb_ptr2len_check)(src - 1)) > 1)
				6066	{
				6067	/* TODO: should use "func" here. */
				6068	if (copy)
				6069	mch_memmove(dst, src - 1, l);
				6070	dst += l - 1;
				6071	src += l - 1;
				6072	}
				6073	else
				6074	{
				6075	#endif
				6076	if (copy)
				6077	{
				6078	if (func == (fptr)NULL) /* just copy */
				6079	*dst = c;
				6080	else /* change case */
				6081	func = (fptr)(func(dst, c));
				6082	/* Turbo C complains without the typecast */
				6083	}
				6084	#ifdef FEAT_MBYTE
				6085	}
				6086	#endif
				6087	dst++;
				6088	}
				6089	else
				6090	{
				6091	if (REG_MULTI)
				6092	{
				6093	clnum = reg_mmatch->startpos[no].lnum;
				6094	if (clnum < 0 \|\| reg_mmatch->endpos[no].lnum < 0)
				6095	s = NULL;
				6096	else
				6097	{
				6098	s = reg_getline(clnum) + reg_mmatch->startpos[no].col;
				6099	if (reg_mmatch->endpos[no].lnum == clnum)
				6100	len = reg_mmatch->endpos[no].col
				6101	- reg_mmatch->startpos[no].col;
				6102	else
				6103	len = (int)STRLEN(s);
				6104	}
				6105	}
				6106	else
				6107	{
				6108	s = reg_match->startp[no];
				6109	if (reg_match->endp[no] == NULL)
				6110	s = NULL;
				6111	else
				6112	len = (int)(reg_match->endp[no] - s);
				6113	}
				6114	if (s != NULL)
				6115	{
				6116	for (;;)
				6117	{
				6118	if (len == 0)
				6119	{
				6120	if (REG_MULTI)
				6121	{
				6122	if (reg_mmatch->endpos[no].lnum == clnum)
				6123	break;
				6124	if (copy)
				6125	*dst = CAR;
				6126	++dst;
				6127	s = reg_getline(++clnum);
				6128	if (reg_mmatch->endpos[no].lnum == clnum)
				6129	len = reg_mmatch->endpos[no].col;
				6130	else
				6131	len = (int)STRLEN(s);
				6132	}
				6133	else
				6134	break;
				6135	}
				6136	else if (s == NUL) / we hit NUL. */
				6137	{
				6138	if (copy)
				6139	EMSG(_(e_re_damg));
				6140	goto exit;
				6141	}
				6142	else
				6143	{
				6144	if (backslash && (s == CAR \|\| s == '\\'))
				6145	{
				6146	/*
				6147	* Insert a backslash in front of a CR, otherwise
				6148	* it will be replaced by a line break.
				6149	* Number of backslashes will be halved later,
				6150	* double them here.
				6151	*/
				6152	if (copy)
				6153	{
				6154	dst[0] = '\\';
				6155	dst[1] = *s;
				6156	}
				6157	dst += 2;
				6158	}
				6159	#ifdef FEAT_MBYTE
				6160	else if (has_mbyte && (l = (*mb_ptr2len_check)(s)) > 1)
				6161	{
				6162	/* TODO: should use "func" here. */
				6163	if (copy)
				6164	mch_memmove(dst, s, l);
				6165	dst += l;
				6166	s += l - 1;
				6167	len -= l - 1;
				6168	}
				6169	#endif
				6170	else
				6171	{
				6172	if (copy)
				6173	{
				6174	if (func == (fptr)NULL) /* just copy */
				6175	dst = s;
				6176	else /* change case */
				6177	func = (fptr)(func(dst, *s));
				6178	/* Turbo C complains without the typecast */
				6179	}
				6180	++dst;
				6181	}
				6182	++s;
				6183	--len;
				6184	}
				6185	}
				6186	}
				6187	no = -1;
				6188	}
				6189	}
				6190	if (copy)
				6191	*dst = NUL;
				6192
				6193	exit:
				6194	return (int)((dst - dest) + 1);
				6195	}
				6196
				6197	#ifdef FEAT_EVAL
				6198	/*
				6199	* Used for the submatch() function: get the string from tne n'th submatch in
				6200	* allocated memory.
				6201	* Returns NULL when not in a ":s" command and for a non-existing submatch.
				6202	*/
				6203	char_u *
				6204	reg_submatch(no)
				6205	int no;
				6206	{
				6207	char_u *retval = NULL;
				6208	char_u *s;
				6209	int len;
				6210	int round;
				6211	linenr_T lnum;
				6212
				6213	if (!can_f_submatch)
				6214	return NULL;
				6215
				6216	if (submatch_match == NULL)
				6217	{
				6218	/*
				6219	* First round: compute the length and allocate memory.
				6220	* Second round: copy the text.
				6221	*/
				6222	for (round = 1; round <= 2; ++round)
				6223	{
				6224	lnum = submatch_mmatch->startpos[no].lnum;
				6225	if (lnum < 0 \|\| submatch_mmatch->endpos[no].lnum < 0)
				6226	return NULL;
				6227
				6228	s = reg_getline(lnum) + submatch_mmatch->startpos[no].col;
				6229	if (s == NULL) /* anti-crash check, cannot happen? */
				6230	break;
				6231	if (submatch_mmatch->endpos[no].lnum == lnum)
				6232	{
				6233	/* Within one line: take form start to end col. */
				6234	len = submatch_mmatch->endpos[no].col
				6235	- submatch_mmatch->startpos[no].col;
				6236	if (round == 2)
				6237	{
				6238	STRNCPY(retval, s, len);
				6239	retval[len] = NUL;
				6240	}
				6241	++len;
				6242	}
				6243	else
				6244	{
				6245	/* Multiple lines: take start line from start col, middle
				6246	* lines completely and end line up to end col. */
				6247	len = (int)STRLEN(s);
				6248	if (round == 2)
				6249	{
				6250	STRCPY(retval, s);
				6251	retval[len] = '\n';
				6252	}
				6253	++len;
				6254	++lnum;
				6255	while (lnum < submatch_mmatch->endpos[no].lnum)
				6256	{
				6257	s = reg_getline(lnum++);
				6258	if (round == 2)
				6259	STRCPY(retval + len, s);
				6260	len += (int)STRLEN(s);
				6261	if (round == 2)
				6262	retval[len] = '\n';
				6263	++len;
				6264	}
				6265	if (round == 2)
				6266	STRNCPY(retval + len, reg_getline(lnum),
				6267	submatch_mmatch->endpos[no].col);
				6268	len += submatch_mmatch->endpos[no].col;
				6269	if (round == 2)
				6270	retval[len] = NUL;
				6271	++len;
				6272	}
				6273
				6274	if (round == 1)
				6275	{
				6276	retval = lalloc((long_u)len, TRUE);
				6277	if (s == NULL)
				6278	return NULL;
				6279	}
				6280	}
				6281	}
				6282	else
				6283	{
				6284	if (submatch_match->endp[no] == NULL)
				6285	retval = NULL;
				6286	else
				6287	{
				6288	s = submatch_match->startp[no];
				6289	retval = vim_strnsave(s, (int)(submatch_match->endp[no] - s));
				6290	}
				6291	}
				6292
				6293	return retval;
				6294	}
				6295	#endif