Blame - src/regexp.c - android_external_vim

blob: bf5f7afd1f2a2594bc366c48d4719a1f5f54ede5 [file] [log] [blame]

Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1	/* vi:set ts=8 sts=4 sw=4:
				2	*
				3	* Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
				4	*
				5	* NOTICE:
				6	*
				7	* This is NOT the original regular expression code as written by Henry
				8	* Spencer. This code has been modified specifically for use with the VIM
				9	* editor, and should not be used separately from Vim. If you want a good
				10	* regular expression library, get the original code. The copyright notice
				11	* that follows is from the original.
				12	*
				13	* END NOTICE
				14	*
				15	* Copyright (c) 1986 by University of Toronto.
				16	* Written by Henry Spencer. Not derived from licensed software.
				17	*
				18	* Permission is granted to anyone to use this software for any
				19	* purpose on any computer system, and to redistribute it freely,
				20	* subject to the following restrictions:
				21	*
				22	* 1. The author is not responsible for the consequences of use of
				23	* this software, no matter how awful, even if they arise
				24	* from defects in it.
				25	*
				26	* 2. The origin of this software must not be misrepresented, either
				27	* by explicit claim or by omission.
				28	*
				29	* 3. Altered versions must be plainly marked as such, and must not
				30	* be misrepresented as being the original software.
				31	*
				32	* Beware that some of this code is subtly aware of the way operator
				33	* precedence is structured in regular expressions. Serious changes in
				34	* regular-expression syntax might require a total rethink.
				35	*
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	36	* Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
				37	* Webb, Ciaran McCreesh and Bram Moolenaar.
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	38	* Named character class support added by Walter Briscoe (1998 Jul 01)
				39	*/
				40
				41	#include "vim.h"
				42
				43	#undef DEBUG
				44
				45	/*
				46	* The "internal use only" fields in regexp.h are present to pass info from
				47	* compile to execute that permits the execute phase to run lots faster on
				48	* simple cases. They are:
				49	*
				50	* regstart char that must begin a match; NUL if none obvious; Can be a
				51	* multi-byte character.
				52	* reganch is the match anchored (at beginning-of-line only)?
				53	* regmust string (pointer into program) that match must include, or NULL
				54	* regmlen length of regmust string
				55	* regflags RF_ values or'ed together
				56	*
				57	* Regstart and reganch permit very fast decisions on suitable starting points
				58	* for a match, cutting down the work a lot. Regmust permits fast rejection
				59	* of lines that cannot possibly match. The regmust tests are costly enough
				60	* that vim_regcomp() supplies a regmust only if the r.e. contains something
				61	* potentially expensive (at present, the only such thing detected is * or +
				62	* at the start of the r.e., which can involve a lot of backup). Regmlen is
				63	* supplied because the test in vim_regexec() needs it and vim_regcomp() is
				64	* computing it anyway.
				65	*/
				66
				67	/*
				68	* Structure for regexp "program". This is essentially a linear encoding
				69	* of a nondeterministic finite-state machine (aka syntax charts or
				70	* "railroad normal form" in parsing technology). Each node is an opcode
				71	* plus a "next" pointer, possibly plus an operand. "Next" pointers of
				72	* all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
				73	* pointer with a BRANCH on both ends of it is connecting two alternatives.
				74	* (Here we have one of the subtle syntax dependencies: an individual BRANCH
				75	* (as opposed to a collection of them) is never concatenated with anything
				76	* because of operator precedence). The "next" pointer of a BRACES_COMPLEX
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	77	* node points to the node after the stuff to be repeated.
				78	* The operand of some types of node is a literal string; for others, it is a
				79	* node leading into a sub-FSM. In particular, the operand of a BRANCH node
				80	* is the first node of the branch.
				81	* (NB this is not a tree structure: the tail of the branch connects to the
				82	* thing following the set of BRANCHes.)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	83	*
				84	* pattern is coded like:
				85	*
				86	* +-----------------+
				87	* \| V
				88	* <aa>\\|<bb> BRANCH <aa> BRANCH <bb> --> END
				89	* \| ^ \| ^
				90	* +------+ +----------+
				91	*
				92	*
				93	* +------------------+
				94	* V \|
				95	* <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
				96	* \| \| ^ ^
				97	* \| +---------------+ \|
				98	* +---------------------------------------------+
				99	*
				100	*
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	101	* +----------------------+
				102	* V \|
				103	* <aa>\+ BRANCH <aa> --> BRANCH --> BACK BRANCH --> NOTHING --> END
				104	* \| \| ^ ^
				105	* \| +----------+ \|
				106	* +-------------------------------------------------+
				107	*
				108	*
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	109	* +-------------------------+
				110	* V \|
				111	* <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
				112	* \| \| ^
				113	* \| +----------------+
				114	* +-----------------------------------------------+
				115	*
				116	*
				117	* <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END
				118	* \| \| ^ ^
				119	* \| +----------------+ \|
				120	* +--------------------------------+
				121	*
				122	* +---------+
				123	* \| V
				124	* \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END
				125	* \| \| \| \| ^ ^
				126	* \| \| \| +-----+ \|
				127	* \| \| +----------------+ \|
				128	* \| +---------------------------+ \|
				129	* +------------------------------------------------------+
				130	*
				131	* They all start with a BRANCH for "\\|" alternaties, even when there is only
				132	* one alternative.
				133	*/
				134
				135	/*
				136	* The opcodes are:
				137	*/
				138
				139	/* definition number opnd? meaning */
				140	#define END 0 /* End of program or NOMATCH operand. */
				141	#define BOL 1 /* Match "" at beginning of line. */
				142	#define EOL 2 /* Match "" at end of line. */
				143	#define BRANCH 3 /* node Match this alternative, or the
				144	* next... */
				145	#define BACK 4 /* Match "", "next" ptr points backward. */
				146	#define EXACTLY 5 /* str Match this string. */
				147	#define NOTHING 6 /* Match empty string. */
				148	#define STAR 7 /* node Match this (simple) thing 0 or more
				149	* times. */
				150	#define PLUS 8 /* node Match this (simple) thing 1 or more
				151	* times. */
				152	#define MATCH 9 /* node match the operand zero-width */
				153	#define NOMATCH 10 /* node check for no match with operand */
				154	#define BEHIND 11 /* node look behind for a match with operand */
				155	#define NOBEHIND 12 /* node look behind for no match with operand */
				156	#define SUBPAT 13 /* node match the operand here */
				157	#define BRACE_SIMPLE 14 /* node Match this (simple) thing between m and
				158	* n times (\{m,n\}). */
				159	#define BOW 15 /* Match "" after [^a-zA-Z0-9_] */
				160	#define EOW 16 /* Match "" at [^a-zA-Z0-9_] */
				161	#define BRACE_LIMITS 17 /* nr nr define the min & max for BRACE_SIMPLE
				162	* and BRACE_COMPLEX. */
				163	#define NEWL 18 /* Match line-break */
				164	#define BHPOS 19 /* End position for BEHIND or NOBEHIND */
				165
				166
				167	/* character classes: 20-48 normal, 50-78 include a line-break */
				168	#define ADD_NL 30
				169	#define FIRST_NL ANY + ADD_NL
				170	#define ANY 20 /* Match any one character. */
				171	#define ANYOF 21 /* str Match any character in this string. */
				172	#define ANYBUT 22 /* str Match any character not in this
				173	* string. */
				174	#define IDENT 23 /* Match identifier char */
				175	#define SIDENT 24 /* Match identifier char but no digit */
				176	#define KWORD 25 /* Match keyword char */
				177	#define SKWORD 26 /* Match word char but no digit */
				178	#define FNAME 27 /* Match file name char */
				179	#define SFNAME 28 /* Match file name char but no digit */
				180	#define PRINT 29 /* Match printable char */
				181	#define SPRINT 30 /* Match printable char but no digit */
				182	#define WHITE 31 /* Match whitespace char */
				183	#define NWHITE 32 /* Match non-whitespace char */
				184	#define DIGIT 33 /* Match digit char */
				185	#define NDIGIT 34 /* Match non-digit char */
				186	#define HEX 35 /* Match hex char */
				187	#define NHEX 36 /* Match non-hex char */
				188	#define OCTAL 37 /* Match octal char */
				189	#define NOCTAL 38 /* Match non-octal char */
				190	#define WORD 39 /* Match word char */
				191	#define NWORD 40 /* Match non-word char */
				192	#define HEAD 41 /* Match head char */
				193	#define NHEAD 42 /* Match non-head char */
				194	#define ALPHA 43 /* Match alpha char */
				195	#define NALPHA 44 /* Match non-alpha char */
				196	#define LOWER 45 /* Match lowercase char */
				197	#define NLOWER 46 /* Match non-lowercase char */
				198	#define UPPER 47 /* Match uppercase char */
				199	#define NUPPER 48 /* Match non-uppercase char */
				200	#define LAST_NL NUPPER + ADD_NL
				201	#define WITH_NL(op) ((op) >= FIRST_NL && (op) <= LAST_NL)
				202
				203	#define MOPEN 80 /* -89 Mark this point in input as start of
				204	* \( subexpr. MOPEN + 0 marks start of
				205	* match. */
				206	#define MCLOSE 90 /* -99 Analogous to MOPEN. MCLOSE + 0 marks
				207	* end of match. */
				208	#define BACKREF 100 /* -109 node Match same string again \1-\9 */
				209
				210	#ifdef FEAT_SYN_HL
				211	# define ZOPEN 110 /* -119 Mark this point in input as start of
				212	* \z( subexpr. */
				213	# define ZCLOSE 120 /* -129 Analogous to ZOPEN. */
				214	# define ZREF 130 /* -139 node Match external submatch \z1-\z9 */
				215	#endif
				216
				217	#define BRACE_COMPLEX 140 /* -149 node Match nodes between m & n times */
				218
				219	#define NOPEN 150 /* Mark this point in input as start of
				220	\%( subexpr. */
				221	#define NCLOSE 151 /* Analogous to NOPEN. */
				222
				223	#define MULTIBYTECODE 200 /* mbc Match one multi-byte character */
				224	#define RE_BOF 201 /* Match "" at beginning of file. */
				225	#define RE_EOF 202 /* Match "" at end of file. */
				226	#define CURSOR 203 /* Match location of cursor. */
				227
				228	#define RE_LNUM 204 /* nr cmp Match line number */
				229	#define RE_COL 205 /* nr cmp Match column number */
				230	#define RE_VCOL 206 /* nr cmp Match virtual column number */
				231
				232	/*
				233	* Magic characters have a special meaning, they don't match literally.
				234	* Magic characters are negative. This separates them from literal characters
				235	* (possibly multi-byte). Only ASCII characters can be Magic.
				236	*/
				237	#define Magic(x) ((int)(x) - 256)
				238	#define un_Magic(x) ((x) + 256)
				239	#define is_Magic(x) ((x) < 0)
				240
				241	static int no_Magic __ARGS((int x));
				242	static int toggle_Magic __ARGS((int x));
				243
				244	static int
				245	no_Magic(x)
				246	int x;
				247	{
				248	if (is_Magic(x))
				249	return un_Magic(x);
				250	return x;
				251	}
				252
				253	static int
				254	toggle_Magic(x)
				255	int x;
				256	{
				257	if (is_Magic(x))
				258	return un_Magic(x);
				259	return Magic(x);
				260	}
				261
				262	/*
				263	* The first byte of the regexp internal "program" is actually this magic
				264	* number; the start node begins in the second byte. It's used to catch the
				265	* most severe mutilation of the program by the caller.
				266	*/
				267
				268	#define REGMAGIC 0234
				269
				270	/*
				271	* Opcode notes:
				272	*
				273	* BRANCH The set of branches constituting a single choice are hooked
				274	* together with their "next" pointers, since precedence prevents
				275	* anything being concatenated to any individual branch. The
				276	* "next" pointer of the last BRANCH in a choice points to the
				277	* thing following the whole choice. This is also where the
				278	* final "next" pointer of each individual branch points; each
				279	* branch starts with the operand node of a BRANCH node.
				280	*
				281	* BACK Normal "next" pointers all implicitly point forward; BACK
				282	* exists to make loop structures possible.
				283	*
				284	* STAR,PLUS '=', and complex '*' and '+', are implemented as circular
				285	* BRANCH structures using BACK. Simple cases (one character
				286	* per match) are implemented with STAR and PLUS for speed
				287	* and to minimize recursive plunges.
				288	*
				289	* BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
				290	* node, and defines the min and max limits to be used for that
				291	* node.
				292	*
				293	* MOPEN,MCLOSE ...are numbered at compile time.
				294	* ZOPEN,ZCLOSE ...ditto
				295	*/
				296
				297	/*
				298	* A node is one char of opcode followed by two chars of "next" pointer.
				299	* "Next" pointers are stored as two 8-bit bytes, high order first. The
				300	* value is a positive offset from the opcode of the node containing it.
				301	* An operand, if any, simply follows the node. (Note that much of the
				302	* code generation knows about this implicit relationship.)
				303	*
				304	* Using two bytes for the "next" pointer is vast overkill for most things,
				305	* but allows patterns to get big without disasters.
				306	*/
				307	#define OP(p) ((int)*(p))
				308	#define NEXT(p) (((((p) + 1) & 0377) << 8) + (((p) + 2) & 0377))
				309	#define OPERAND(p) ((p) + 3)
				310	/* Obtain an operand that was stored as four bytes, MSB first. */
				311	#define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
				312	+ ((long)(p)[5] << 8) + (long)(p)[6])
				313	/* Obtain a second operand stored as four bytes. */
				314	#define OPERAND_MAX(p) OPERAND_MIN((p) + 4)
				315	/* Obtain a second single-byte operand stored after a four bytes operand. */
				316	#define OPERAND_CMP(p) (p)[7]
				317
				318	/*
				319	* Utility definitions.
				320	*/
				321	#define UCHARAT(p) ((int)(char_u )(p))
				322
				323	/* Used for an error (down from) vim_regcomp(): give the error message, set
				324	* rc_did_emsg and return NULL */
				325	#define EMSG_RET_NULL(m) { EMSG(m); rc_did_emsg = TRUE; return NULL; }
				326	#define EMSG_M_RET_NULL(m, c) { EMSG2(m, c ? "" : "\\"); rc_did_emsg = TRUE; return NULL; }
				327	#define EMSG_RET_FAIL(m) { EMSG(m); rc_did_emsg = TRUE; return FAIL; }
				328	#define EMSG_ONE_RET_NULL EMSG_M_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
				329
				330	#define MAX_LIMIT (32767L << 16L)
				331
				332	static int re_multi_type __ARGS((int));
				333	static int cstrncmp __ARGS((char_u s1, char_u s2, int *n));
				334	static char_u cstrchr __ARGS((char_u , int));
				335
				336	#ifdef DEBUG
				337	static void regdump __ARGS((char_u , regprog_T ));
				338	static char_u regprop __ARGS((char_u ));
				339	#endif
				340
				341	#define NOT_MULTI 0
				342	#define MULTI_ONE 1
				343	#define MULTI_MULT 2
				344	/*
				345	* Return NOT_MULTI if c is not a "multi" operator.
				346	* Return MULTI_ONE if c is a single "multi" operator.
				347	* Return MULTI_MULT if c is a multi "multi" operator.
				348	*/
				349	static int
				350	re_multi_type(c)
				351	int c;
				352	{
				353	if (c == Magic('@') \|\| c == Magic('=') \|\| c == Magic('?'))
				354	return MULTI_ONE;
				355	if (c == Magic('*') \|\| c == Magic('+') \|\| c == Magic('{'))
				356	return MULTI_MULT;
				357	return NOT_MULTI;
				358	}
				359
				360	/*
				361	* Flags to be passed up and down.
				362	*/
				363	#define HASWIDTH 0x1 /* Known never to match null string. */
				364	#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */
				365	#define SPSTART 0x4 /* Starts with * or +. */
				366	#define HASNL 0x8 /* Contains some \n. */
				367	#define HASLOOKBH 0x10 /* Contains "\@<=" or "\@<!". */
				368	#define WORST 0 /* Worst case. */
				369
				370	/*
				371	* When regcode is set to this value, code is not emitted and size is computed
				372	* instead.
				373	*/
				374	#define JUST_CALC_SIZE ((char_u *) -1)
				375
				376	static char_u *reg_prev_sub;
				377
				378	/*
				379	* REGEXP_INRANGE contains all characters which are always special in a []
				380	* range after '\'.
				381	* REGEXP_ABBR contains all characters which act as abbreviations after '\'.
				382	* These are:
				383	* \n - New line (NL).
				384	* \r - Carriage Return (CR).
				385	* \t - Tab (TAB).
				386	* \e - Escape (ESC).
				387	* \b - Backspace (Ctrl_H).
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	388	* \d - Character code in decimal, eg \d123
				389	* \o - Character code in octal, eg \o80
				390	* \x - Character code in hex, eg \x4a
				391	* \u - Multibyte character code, eg \u20ac
				392	* \U - Long multibyte character code, eg \U12345678
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	393	*/
				394	static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	395	static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	396
				397	static int backslash_trans __ARGS((int c));
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	398	static int get_char_class __ARGS((char_u **pp));
				399	static int get_equi_class __ARGS((char_u **pp));
				400	static void reg_equi_class __ARGS((int c));
				401	static int get_coll_element __ARGS((char_u **pp));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	402	static char_u skip_anyof __ARGS((char_u p));
				403	static void init_class_tab __ARGS((void));
				404
				405	/*
				406	* Translate '\x' to its control character, except "\n", which is Magic.
				407	*/
				408	static int
				409	backslash_trans(c)
				410	int c;
				411	{
				412	switch (c)
				413	{
				414	case 'r': return CAR;
				415	case 't': return TAB;
				416	case 'e': return ESC;
				417	case 'b': return BS;
				418	}
				419	return c;
				420	}
				421
				422	/*
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	423	* Check for a character class name "[:name:]". "pp" points to the '['.
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	424	* Returns one of the CLASS_ items. CLASS_NONE means that no item was
				425	* recognized. Otherwise "pp" is advanced to after the item.
				426	*/
				427	static int
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	428	get_char_class(pp)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	429	char_u **pp;
				430	{
				431	static const char *(class_names[]) =
				432	{
				433	"alnum:]",
				434	#define CLASS_ALNUM 0
				435	"alpha:]",
				436	#define CLASS_ALPHA 1
				437	"blank:]",
				438	#define CLASS_BLANK 2
				439	"cntrl:]",
				440	#define CLASS_CNTRL 3
				441	"digit:]",
				442	#define CLASS_DIGIT 4
				443	"graph:]",
				444	#define CLASS_GRAPH 5
				445	"lower:]",
				446	#define CLASS_LOWER 6
				447	"print:]",
				448	#define CLASS_PRINT 7
				449	"punct:]",
				450	#define CLASS_PUNCT 8
				451	"space:]",
				452	#define CLASS_SPACE 9
				453	"upper:]",
				454	#define CLASS_UPPER 10
				455	"xdigit:]",
				456	#define CLASS_XDIGIT 11
				457	"tab:]",
				458	#define CLASS_TAB 12
				459	"return:]",
				460	#define CLASS_RETURN 13
				461	"backspace:]",
				462	#define CLASS_BACKSPACE 14
				463	"escape:]",
				464	#define CLASS_ESCAPE 15
				465	};
				466	#define CLASS_NONE 99
				467	int i;
				468
				469	if ((*pp)[1] == ':')
				470	{
				471	for (i = 0; i < sizeof(class_names) / sizeof(*class_names); ++i)
				472	if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
				473	{
				474	*pp += STRLEN(class_names[i]) + 2;
				475	return i;
				476	}
				477	}
				478	return CLASS_NONE;
				479	}
				480
				481	/*
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	482	* Specific version of character class functions.
				483	* Using a table to keep this fast.
				484	*/
				485	static short class_tab[256];
				486
				487	#define RI_DIGIT 0x01
				488	#define RI_HEX 0x02
				489	#define RI_OCTAL 0x04
				490	#define RI_WORD 0x08
				491	#define RI_HEAD 0x10
				492	#define RI_ALPHA 0x20
				493	#define RI_LOWER 0x40
				494	#define RI_UPPER 0x80
				495	#define RI_WHITE 0x100
				496
				497	static void
				498	init_class_tab()
				499	{
				500	int i;
				501	static int done = FALSE;
				502
				503	if (done)
				504	return;
				505
				506	for (i = 0; i < 256; ++i)
				507	{
				508	if (i >= '0' && i <= '7')
				509	class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
				510	else if (i >= '8' && i <= '9')
				511	class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
				512	else if (i >= 'a' && i <= 'f')
				513	class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
				514	#ifdef EBCDIC
				515	else if ((i >= 'g' && i <= 'i') \|\| (i >= 'j' && i <= 'r')
				516	\|\| (i >= 's' && i <= 'z'))
				517	#else
				518	else if (i >= 'g' && i <= 'z')
				519	#endif
				520	class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
				521	else if (i >= 'A' && i <= 'F')
				522	class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
				523	#ifdef EBCDIC
				524	else if ((i >= 'G' && i <= 'I') \|\| ( i >= 'J' && i <= 'R')
				525	\|\| (i >= 'S' && i <= 'Z'))
				526	#else
				527	else if (i >= 'G' && i <= 'Z')
				528	#endif
				529	class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
				530	else if (i == '_')
				531	class_tab[i] = RI_WORD + RI_HEAD;
				532	else
				533	class_tab[i] = 0;
				534	}
				535	class_tab[' '] \|= RI_WHITE;
				536	class_tab['\t'] \|= RI_WHITE;
				537	done = TRUE;
				538	}
				539
				540	#ifdef FEAT_MBYTE
				541	# define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT))
				542	# define ri_hex(c) (c < 0x100 && (class_tab[c] & RI_HEX))
				543	# define ri_octal(c) (c < 0x100 && (class_tab[c] & RI_OCTAL))
				544	# define ri_word(c) (c < 0x100 && (class_tab[c] & RI_WORD))
				545	# define ri_head(c) (c < 0x100 && (class_tab[c] & RI_HEAD))
				546	# define ri_alpha(c) (c < 0x100 && (class_tab[c] & RI_ALPHA))
				547	# define ri_lower(c) (c < 0x100 && (class_tab[c] & RI_LOWER))
				548	# define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER))
				549	# define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE))
				550	#else
				551	# define ri_digit(c) (class_tab[c] & RI_DIGIT)
				552	# define ri_hex(c) (class_tab[c] & RI_HEX)
				553	# define ri_octal(c) (class_tab[c] & RI_OCTAL)
				554	# define ri_word(c) (class_tab[c] & RI_WORD)
				555	# define ri_head(c) (class_tab[c] & RI_HEAD)
				556	# define ri_alpha(c) (class_tab[c] & RI_ALPHA)
				557	# define ri_lower(c) (class_tab[c] & RI_LOWER)
				558	# define ri_upper(c) (class_tab[c] & RI_UPPER)
				559	# define ri_white(c) (class_tab[c] & RI_WHITE)
				560	#endif
				561
				562	/* flags for regflags */
				563	#define RF_ICASE 1 /* ignore case */
				564	#define RF_NOICASE 2 /* don't ignore case */
				565	#define RF_HASNL 4 /* can match a NL */
				566	#define RF_ICOMBINE 8 /* ignore combining characters */
				567	#define RF_LOOKBH 16 /* uses "\@<=" or "\@<!" */
				568
				569	/*
				570	* Global work variables for vim_regcomp().
				571	*/
				572
				573	static char_u regparse; / Input-scan pointer. */
				574	static int prevchr_len; /* byte length of previous char */
				575	static int num_complex_braces; /* Complex \{...} count */
				576	static int regnpar; /* () count. */
				577	#ifdef FEAT_SYN_HL
				578	static int regnzpar; /* \z() count. */
				579	static int re_has_z; /* \z item detected */
				580	#endif
				581	static char_u regcode; / Code-emit pointer, or JUST_CALC_SIZE */
				582	static long regsize; /* Code size. */
				583	static char_u had_endbrace[NSUBEXP]; /* flags, TRUE if end of () found */
				584	static unsigned regflags; /* RF_ flags for prog */
				585	static long brace_min[10]; /* Minimums for complex brace repeats */
				586	static long brace_max[10]; /* Maximums for complex brace repeats */
				587	static int brace_count[10]; /* Current counts for complex brace repeats */
				588	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				589	static int had_eol; /* TRUE when EOL found by vim_regcomp() */
				590	#endif
				591	static int one_exactly = FALSE; /* only do one char for EXACTLY */
				592
				593	static int reg_magic; /* magicness of the pattern: */
				594	#define MAGIC_NONE 1 /* "\V" very unmagic */
				595	#define MAGIC_OFF 2 /* "\M" or 'magic' off */
				596	#define MAGIC_ON 3 /* "\m" or 'magic' */
				597	#define MAGIC_ALL 4 /* "\v" very magic */
				598
				599	static int reg_string; /* matching with a string instead of a buffer
				600	line */
				601
				602	/*
				603	* META contains all characters that may be magic, except '^' and '$'.
				604	*/
				605
				606	#ifdef EBCDIC
				607	static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{\|~";
				608	#else
				609	/* META[] is used often enough to justify turning it into a table. */
				610	static char_u META_flags[] = {
				611	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				612	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				613	/* % & ( ) * + . */
				614	0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
				615	/* 1 2 3 4 5 6 7 8 9 < = > ? */
				616	0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
				617	/* @ A C D F H I K L M O */
				618	1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
				619	/* P S U V W X Z [ _ */
				620	1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
				621	/* a c d f h i k l m n o */
				622	0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
				623	/* p s u v w x z { \| ~ */
				624	1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
				625	};
				626	#endif
				627
				628	static int curchr;
				629
				630	/* arguments for reg() */
				631	#define REG_NOPAREN 0 /* toplevel reg() */
				632	#define REG_PAREN 1 /* */
				633	#define REG_ZPAREN 2 /* \z(\) */
				634	#define REG_NPAREN 3 /* \%(\) */
				635
				636	/*
				637	* Forward declarations for vim_regcomp()'s friends.
				638	*/
				639	static void initchr __ARGS((char_u *));
				640	static int getchr __ARGS((void));
				641	static void skipchr_keepstart __ARGS((void));
				642	static int peekchr __ARGS((void));
				643	static void skipchr __ARGS((void));
				644	static void ungetchr __ARGS((void));
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	645	static int gethexchrs __ARGS((int maxinputlen));
				646	static int getoctchrs __ARGS((void));
				647	static int getdecchrs __ARGS((void));
				648	static int coll_get_char __ARGS((void));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	649	static void regcomp_start __ARGS((char_u *expr, int flags));
				650	static char_u reg __ARGS((int, int ));
				651	static char_u regbranch __ARGS((int flagp));
				652	static char_u regconcat __ARGS((int flagp));
				653	static char_u regpiece __ARGS((int ));
				654	static char_u regatom __ARGS((int ));
				655	static char_u *regnode __ARGS((int));
				656	static int prog_magic_wrong __ARGS((void));
				657	static char_u regnext __ARGS((char_u ));
				658	static void regc __ARGS((int b));
				659	#ifdef FEAT_MBYTE
				660	static void regmbc __ARGS((int c));
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	661	#else
				662	# define regmbc(c) regc(c)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	663	#endif
				664	static void reginsert __ARGS((int, char_u *));
				665	static void reginsert_limits __ARGS((int, long, long, char_u *));
				666	static char_u re_put_long __ARGS((char_u pr, long_u val));
				667	static int read_limits __ARGS((long , long ));
				668	static void regtail __ARGS((char_u , char_u ));
				669	static void regoptail __ARGS((char_u , char_u ));
				670
				671	/*
				672	* Return TRUE if compiled regular expression "prog" can match a line break.
				673	*/
				674	int
				675	re_multiline(prog)
				676	regprog_T *prog;
				677	{
				678	return (prog->regflags & RF_HASNL);
				679	}
				680
				681	/*
				682	* Return TRUE if compiled regular expression "prog" looks before the start
				683	* position (pattern contains "\@<=" or "\@<!").
				684	*/
				685	int
				686	re_lookbehind(prog)
				687	regprog_T *prog;
				688	{
				689	return (prog->regflags & RF_LOOKBH);
				690	}
				691
				692	/*
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	693	* Check for an equivalence class name "[=a=]". "pp" points to the '['.
				694	* Returns a character representing the class. Zero means that no item was
				695	* recognized. Otherwise "pp" is advanced to after the item.
				696	*/
				697	static int
				698	get_equi_class(pp)
				699	char_u **pp;
				700	{
				701	int c;
				702	int l = 1;
				703	char_u p = pp;
				704
				705	if (p[1] == '=')
				706	{
				707	#ifdef FEAT_MBYTE
				708	if (has_mbyte)
				709	l = mb_ptr2len_check(p + 2);
				710	#endif
				711	if (p[l + 2] == '=' && p[l + 3] == ']')
				712	{
				713	#ifdef FEAT_MBYTE
				714	if (has_mbyte)
				715	c = mb_ptr2char(p + 2);
				716	else
				717	#endif
				718	c = p[2];
				719	*pp += l + 4;
				720	return c;
				721	}
				722	}
				723	return 0;
				724	}
				725
				726	/*
				727	* Produce the bytes for equivalence class "c".
				728	* Currently only handles latin1, latin9 and utf-8.
				729	*/
				730	static void
				731	reg_equi_class(c)
				732	int c;
				733	{
				734	#ifdef FEAT_MBYTE
				735	if (enc_utf8 \|\| STRCMP(p_enc, "latin1") == 0
				736	\|\| STRCMP(p_enc, "latin9") == 0)
				737	#endif
				738	{
				739	switch (c)
				740	{
				741	case 'A': case 'À': case 'Á': case 'Â':
				742	case 'Ã': case 'Ä': case 'Å':
				743	regmbc('A'); regmbc('À'); regmbc('Á'); regmbc('Â');
				744	regmbc('Ã'); regmbc('Ä'); regmbc('Å');
				745	return;
				746	case 'C': case 'Ç':
				747	regmbc('C'); regmbc('Ç');
				748	return;
				749	case 'E': case 'È': case 'É': case 'Ê': case 'Ë':
				750	regmbc('E'); regmbc('È'); regmbc('É'); regmbc('Ê');
				751	regmbc('Ë');
				752	return;
				753	case 'I': case 'Ì': case 'Í': case 'Î': case 'Ï':
				754	regmbc('I'); regmbc('Ì'); regmbc('Í'); regmbc('Î');
				755	regmbc('Ï');
				756	return;
				757	case 'N': case 'Ñ':
				758	regmbc('N'); regmbc('Ñ');
				759	return;
				760	case 'O': case 'Ò': case 'Ó': case 'Ô': case 'Õ': case 'Ö':
				761	regmbc('O'); regmbc('Ò'); regmbc('Ó'); regmbc('Ô');
				762	regmbc('Õ'); regmbc('Ö');
				763	return;
				764	case 'U': case 'Ù': case 'Ú': case 'Û': case 'Ü':
				765	regmbc('U'); regmbc('Ù'); regmbc('Ú'); regmbc('Û');
				766	regmbc('Ü');
				767	return;
				768	case 'Y': case 'Ý':
				769	regmbc('Y'); regmbc('Ý');
				770	return;
				771	case 'a': case 'à': case 'á': case 'â':
				772	case 'ã': case 'ä': case 'å':
				773	regmbc('a'); regmbc('à'); regmbc('á'); regmbc('â');
				774	regmbc('ã'); regmbc('ä'); regmbc('å');
				775	return;
				776	case 'c': case 'ç':
				777	regmbc('c'); regmbc('ç');
				778	return;
				779	case 'e': case 'è': case 'é': case 'ê': case 'ë':
				780	regmbc('e'); regmbc('è'); regmbc('é'); regmbc('ê');
				781	regmbc('ë');
				782	return;
				783	case 'i': case 'ì': case 'í': case 'î': case 'ï':
				784	regmbc('i'); regmbc('ì'); regmbc('í'); regmbc('î');
				785	regmbc('ï');
				786	return;
				787	case 'n': case 'ñ':
				788	regmbc('n'); regmbc('ñ');
				789	return;
				790	case 'o': case 'ò': case 'ó': case 'ô': case 'õ': case 'ö':
				791	regmbc('o'); regmbc('ò'); regmbc('ó'); regmbc('ô');
				792	regmbc('õ'); regmbc('ö');
				793	return;
				794	case 'u': case 'ù': case 'ú': case 'û': case 'ü':
				795	regmbc('u'); regmbc('ù'); regmbc('ú'); regmbc('û');
				796	regmbc('ü');
				797	return;
				798	case 'y': case 'ý': case 'ÿ':
				799	regmbc('y'); regmbc('ý'); regmbc('ÿ');
				800	return;
				801	}
				802	}
				803	regmbc(c);
				804	}
				805
				806	/*
				807	* Check for a collating element "[.a.]". "pp" points to the '['.
				808	* Returns a character. Zero means that no item was recognized. Otherwise
				809	* "pp" is advanced to after the item.
				810	* Currently only single characters are recognized!
				811	*/
				812	static int
				813	get_coll_element(pp)
				814	char_u **pp;
				815	{
				816	int c;
				817	int l = 1;
				818	char_u p = pp;
				819
				820	if (p[1] == '.')
				821	{
				822	#ifdef FEAT_MBYTE
				823	if (has_mbyte)
				824	l = mb_ptr2len_check(p + 2);
				825	#endif
				826	if (p[l + 2] == '.' && p[l + 3] == ']')
				827	{
				828	#ifdef FEAT_MBYTE
				829	if (has_mbyte)
				830	c = mb_ptr2char(p + 2);
				831	else
				832	#endif
				833	c = p[2];
				834	*pp += l + 4;
				835	return c;
				836	}
				837	}
				838	return 0;
				839	}
				840
				841
				842	/*
				843	* Skip over a "[]" range.
				844	* "p" must point to the character after the '['.
				845	* The returned pointer is on the matching ']', or the terminating NUL.
				846	*/
				847	static char_u *
				848	skip_anyof(p)
				849	char_u *p;
				850	{
				851	int cpo_lit; /* 'cpoptions' contains 'l' flag */
				852	int cpo_bsl; /* 'cpoptions' contains '\' flag */
				853	#ifdef FEAT_MBYTE
				854	int l;
				855	#endif
				856
				857	cpo_lit = (!reg_syn && vim_strchr(p_cpo, CPO_LITERAL) != NULL);
				858	cpo_bsl = (!reg_syn && vim_strchr(p_cpo, CPO_BACKSL) != NULL);
				859
				860	if (p == '^') / Complement of range. */
				861	++p;
				862	if (p == ']' \|\| p == '-')
				863	++p;
				864	while (p != NUL && p != ']')
				865	{
				866	#ifdef FEAT_MBYTE
				867	if (has_mbyte && (l = (*mb_ptr2len_check)(p)) > 1)
				868	p += l;
				869	else
				870	#endif
				871	if (*p == '-')
				872	{
				873	++p;
				874	if (p != ']' && p != NUL)
				875	mb_ptr_adv(p);
				876	}
				877	else if (*p == '\\'
				878	&& !cpo_bsl
				879	&& (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
				880	\|\| (!cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
				881	p += 2;
				882	else if (*p == '[')
				883	{
				884	if (get_char_class(&p) == CLASS_NONE
				885	&& get_equi_class(&p) == 0
				886	&& get_coll_element(&p) == 0)
				887	++p; /* It was not a class name */
				888	}
				889	else
				890	++p;
				891	}
				892
				893	return p;
				894	}
				895
				896	/*
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	897	* Skip past regular expression.
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	898	* Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	899	* Take care of characters with a backslash in front of it.
				900	* Skip strings inside [ and ].
				901	* When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
				902	* expression and change "\?" to "?". If "*newp" is not NULL the expression
				903	* is changed in-place.
				904	*/
				905	char_u *
				906	skip_regexp(startp, dirc, magic, newp)
				907	char_u *startp;
				908	int dirc;
				909	int magic;
				910	char_u **newp;
				911	{
				912	int mymagic;
				913	char_u *p = startp;
				914
				915	if (magic)
				916	mymagic = MAGIC_ON;
				917	else
				918	mymagic = MAGIC_OFF;
				919
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	920	for (; p[0] != NUL; mb_ptr_adv(p))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	921	{
				922	if (p[0] == dirc) /* found end of regexp */
				923	break;
				924	if ((p[0] == '[' && mymagic >= MAGIC_ON)
				925	\|\| (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
				926	{
				927	p = skip_anyof(p + 1);
				928	if (p[0] == NUL)
				929	break;
				930	}
				931	else if (p[0] == '\\' && p[1] != NUL)
				932	{
				933	if (dirc == '?' && newp != NULL && p[1] == '?')
				934	{
				935	/* change "\?" to "?", make a copy first. */
				936	if (*newp == NULL)
				937	{
				938	*newp = vim_strsave(startp);
				939	if (*newp != NULL)
				940	p = *newp + (p - startp);
				941	}
				942	if (*newp != NULL)
				943	mch_memmove(p, p + 1, STRLEN(p));
				944	else
				945	++p;
				946	}
				947	else
				948	++p; /* skip next character */
				949	if (*p == 'v')
				950	mymagic = MAGIC_ALL;
				951	else if (*p == 'V')
				952	mymagic = MAGIC_NONE;
				953	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	954	}
				955	return p;
				956	}
				957
				958	/*
Bram Moolenaar	86b6835	2004-12-27 21:59:20 +0000	[diff] [blame]	959	* vim_regcomp() - compile a regular expression into internal code
				960	* Returns the program in allocated space. Returns NULL for an error.
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	961	*
				962	* We can't allocate space until we know how big the compiled form will be,
				963	* but we can't compile it (and thus know how big it is) until we've got a
				964	* place to put the code. So we cheat: we compile it twice, once with code
				965	* generation turned off and size counting turned on, and once "for real".
				966	* This also means that we don't allocate space until we are sure that the
				967	* thing really will compile successfully, and we never have to move the
				968	* code and thus invalidate pointers into it. (Note that it has to be in
				969	* one piece because vim_free() must be able to free it all.)
				970	*
				971	* Whether upper/lower case is to be ignored is decided when executing the
				972	* program, it does not matter here.
				973	*
				974	* Beware that the optimization-preparation code in here knows about some
				975	* of the structure of the compiled regexp.
				976	* "re_flags": RE_MAGIC and/or RE_STRING.
				977	*/
				978	regprog_T *
				979	vim_regcomp(expr, re_flags)
				980	char_u *expr;
				981	int re_flags;
				982	{
				983	regprog_T *r;
				984	char_u *scan;
				985	char_u *longest;
				986	int len;
				987	int flags;
				988
				989	if (expr == NULL)
				990	EMSG_RET_NULL(_(e_null));
				991
				992	init_class_tab();
				993
				994	/*
				995	* First pass: determine size, legality.
				996	*/
				997	regcomp_start(expr, re_flags);
				998	regcode = JUST_CALC_SIZE;
				999	regc(REGMAGIC);
				1000	if (reg(REG_NOPAREN, &flags) == NULL)
				1001	return NULL;
				1002
				1003	/* Small enough for pointer-storage convention? */
				1004	#ifdef SMALL_MALLOC /* 16 bit storage allocation */
				1005	if (regsize >= 65536L - 256L)
				1006	EMSG_RET_NULL(_("E339: Pattern too long"));
				1007	#endif
				1008
				1009	/* Allocate space. */
				1010	r = (regprog_T *)lalloc(sizeof(regprog_T) + regsize, TRUE);
				1011	if (r == NULL)
				1012	return NULL;
				1013
				1014	/*
				1015	* Second pass: emit code.
				1016	*/
				1017	regcomp_start(expr, re_flags);
				1018	regcode = r->program;
				1019	regc(REGMAGIC);
				1020	if (reg(REG_NOPAREN, &flags) == NULL)
				1021	{
				1022	vim_free(r);
				1023	return NULL;
				1024	}
				1025
				1026	/* Dig out information for optimizations. */
				1027	r->regstart = NUL; /* Worst-case defaults. */
				1028	r->reganch = 0;
				1029	r->regmust = NULL;
				1030	r->regmlen = 0;
				1031	r->regflags = regflags;
				1032	if (flags & HASNL)
				1033	r->regflags \|= RF_HASNL;
				1034	if (flags & HASLOOKBH)
				1035	r->regflags \|= RF_LOOKBH;
				1036	#ifdef FEAT_SYN_HL
				1037	/* Remember whether this pattern has any \z specials in it. */
				1038	r->reghasz = re_has_z;
				1039	#endif
				1040	scan = r->program + 1; /* First BRANCH. */
				1041	if (OP(regnext(scan)) == END) /* Only one top-level choice. */
				1042	{
				1043	scan = OPERAND(scan);
				1044
				1045	/* Starting-point info. */
				1046	if (OP(scan) == BOL \|\| OP(scan) == RE_BOF)
				1047	{
				1048	r->reganch++;
				1049	scan = regnext(scan);
				1050	}
				1051
				1052	if (OP(scan) == EXACTLY)
				1053	{
				1054	#ifdef FEAT_MBYTE
				1055	if (has_mbyte)
				1056	r->regstart = (*mb_ptr2char)(OPERAND(scan));
				1057	else
				1058	#endif
				1059	r->regstart = *OPERAND(scan);
				1060	}
				1061	else if ((OP(scan) == BOW
				1062	\|\| OP(scan) == EOW
				1063	\|\| OP(scan) == NOTHING
				1064	\|\| OP(scan) == MOPEN + 0 \|\| OP(scan) == NOPEN
				1065	\|\| OP(scan) == MCLOSE + 0 \|\| OP(scan) == NCLOSE)
				1066	&& OP(regnext(scan)) == EXACTLY)
				1067	{
				1068	#ifdef FEAT_MBYTE
				1069	if (has_mbyte)
				1070	r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
				1071	else
				1072	#endif
				1073	r->regstart = *OPERAND(regnext(scan));
				1074	}
				1075
				1076	/*
				1077	* If there's something expensive in the r.e., find the longest
				1078	* literal string that must appear and make it the regmust. Resolve
				1079	* ties in favor of later strings, since the regstart check works
				1080	* with the beginning of the r.e. and avoiding duplication
				1081	* strengthens checking. Not a strong reason, but sufficient in the
				1082	* absence of others.
				1083	*/
				1084	/*
				1085	* When the r.e. starts with BOW, it is faster to look for a regmust
				1086	* first. Used a lot for "#" and "*" commands. (Added by mool).
				1087	*/
				1088	if ((flags & SPSTART \|\| OP(scan) == BOW \|\| OP(scan) == EOW)
				1089	&& !(flags & HASNL))
				1090	{
				1091	longest = NULL;
				1092	len = 0;
				1093	for (; scan != NULL; scan = regnext(scan))
				1094	if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
				1095	{
				1096	longest = OPERAND(scan);
				1097	len = (int)STRLEN(OPERAND(scan));
				1098	}
				1099	r->regmust = longest;
				1100	r->regmlen = len;
				1101	}
				1102	}
				1103	#ifdef DEBUG
				1104	regdump(expr, r);
				1105	#endif
				1106	return r;
				1107	}
				1108
				1109	/*
				1110	* Setup to parse the regexp. Used once to get the length and once to do it.
				1111	*/
				1112	static void
				1113	regcomp_start(expr, re_flags)
				1114	char_u *expr;
				1115	int re_flags; /* see vim_regcomp() */
				1116	{
				1117	initchr(expr);
				1118	if (re_flags & RE_MAGIC)
				1119	reg_magic = MAGIC_ON;
				1120	else
				1121	reg_magic = MAGIC_OFF;
				1122	reg_string = (re_flags & RE_STRING);
				1123
				1124	num_complex_braces = 0;
				1125	regnpar = 1;
				1126	vim_memset(had_endbrace, 0, sizeof(had_endbrace));
				1127	#ifdef FEAT_SYN_HL
				1128	regnzpar = 1;
				1129	re_has_z = 0;
				1130	#endif
				1131	regsize = 0L;
				1132	regflags = 0;
				1133	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				1134	had_eol = FALSE;
				1135	#endif
				1136	}
				1137
				1138	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				1139	/*
				1140	* Check if during the previous call to vim_regcomp the EOL item "$" has been
				1141	* found. This is messy, but it works fine.
				1142	*/
				1143	int
				1144	vim_regcomp_had_eol()
				1145	{
				1146	return had_eol;
				1147	}
				1148	#endif
				1149
				1150	/*
				1151	* reg - regular expression, i.e. main body or parenthesized thing
				1152	*
				1153	* Caller must absorb opening parenthesis.
				1154	*
				1155	* Combining parenthesis handling with the base level of regular expression
				1156	* is a trifle forced, but the need to tie the tails of the branches to what
				1157	* follows makes it hard to avoid.
				1158	*/
				1159	static char_u *
				1160	reg(paren, flagp)
				1161	int paren; /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
				1162	int *flagp;
				1163	{
				1164	char_u *ret;
				1165	char_u *br;
				1166	char_u *ender;
				1167	int parno = 0;
				1168	int flags;
				1169
				1170	flagp = HASWIDTH; / Tentatively. */
				1171
				1172	#ifdef FEAT_SYN_HL
				1173	if (paren == REG_ZPAREN)
				1174	{
				1175	/* Make a ZOPEN node. */
				1176	if (regnzpar >= NSUBEXP)
				1177	EMSG_RET_NULL(_("E50: Too many \\z("));
				1178	parno = regnzpar;
				1179	regnzpar++;
				1180	ret = regnode(ZOPEN + parno);
				1181	}
				1182	else
				1183	#endif
				1184	if (paren == REG_PAREN)
				1185	{
				1186	/* Make a MOPEN node. */
				1187	if (regnpar >= NSUBEXP)
				1188	EMSG_M_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
				1189	parno = regnpar;
				1190	++regnpar;
				1191	ret = regnode(MOPEN + parno);
				1192	}
				1193	else if (paren == REG_NPAREN)
				1194	{
				1195	/* Make a NOPEN node. */
				1196	ret = regnode(NOPEN);
				1197	}
				1198	else
				1199	ret = NULL;
				1200
				1201	/* Pick up the branches, linking them together. */
				1202	br = regbranch(&flags);
				1203	if (br == NULL)
				1204	return NULL;
				1205	if (ret != NULL)
				1206	regtail(ret, br); /* [MZ]OPEN -> first. */
				1207	else
				1208	ret = br;
				1209	/* If one of the branches can be zero-width, the whole thing can.
				1210	* If one of the branches has * at start or matches a line-break, the
				1211	* whole thing can. */
				1212	if (!(flags & HASWIDTH))
				1213	*flagp &= ~HASWIDTH;
				1214	*flagp \|= flags & (SPSTART \| HASNL \| HASLOOKBH);
				1215	while (peekchr() == Magic('\|'))
				1216	{
				1217	skipchr();
				1218	br = regbranch(&flags);
				1219	if (br == NULL)
				1220	return NULL;
				1221	regtail(ret, br); /* BRANCH -> BRANCH. */
				1222	if (!(flags & HASWIDTH))
				1223	*flagp &= ~HASWIDTH;
				1224	*flagp \|= flags & (SPSTART \| HASNL \| HASLOOKBH);
				1225	}
				1226
				1227	/* Make a closing node, and hook it on the end. */
				1228	ender = regnode(
				1229	#ifdef FEAT_SYN_HL
				1230	paren == REG_ZPAREN ? ZCLOSE + parno :
				1231	#endif
				1232	paren == REG_PAREN ? MCLOSE + parno :
				1233	paren == REG_NPAREN ? NCLOSE : END);
				1234	regtail(ret, ender);
				1235
				1236	/* Hook the tails of the branches to the closing node. */
				1237	for (br = ret; br != NULL; br = regnext(br))
				1238	regoptail(br, ender);
				1239
				1240	/* Check for proper termination. */
				1241	if (paren != REG_NOPAREN && getchr() != Magic(')'))
				1242	{
				1243	#ifdef FEAT_SYN_HL
				1244	if (paren == REG_ZPAREN)
				1245	EMSG_RET_NULL(_("E52: Unmatched \\z("))
				1246	else
				1247	#endif
				1248	if (paren == REG_NPAREN)
				1249	EMSG_M_RET_NULL(_("E53: Unmatched %s%%("), reg_magic == MAGIC_ALL)
				1250	else
				1251	EMSG_M_RET_NULL(_("E54: Unmatched %s("), reg_magic == MAGIC_ALL)
				1252	}
				1253	else if (paren == REG_NOPAREN && peekchr() != NUL)
				1254	{
				1255	if (curchr == Magic(')'))
				1256	EMSG_M_RET_NULL(_("E55: Unmatched %s)"), reg_magic == MAGIC_ALL)
				1257	else
				1258	EMSG_RET_NULL(_(e_trailing)) /* "Can't happen". */
				1259	/* NOTREACHED */
				1260	}
				1261	/*
				1262	* Here we set the flag allowing back references to this set of
				1263	* parentheses.
				1264	*/
				1265	if (paren == REG_PAREN)
				1266	had_endbrace[parno] = TRUE; /* have seen the close paren */
				1267	return ret;
				1268	}
				1269
				1270	/*
				1271	* regbranch - one alternative of an \| operator
				1272	*
				1273	* Implements the & operator.
				1274	*/
				1275	static char_u *
				1276	regbranch(flagp)
				1277	int *flagp;
				1278	{
				1279	char_u *ret;
				1280	char_u *chain = NULL;
				1281	char_u *latest;
				1282	int flags;
				1283
				1284	flagp = WORST \| HASNL; / Tentatively. */
				1285
				1286	ret = regnode(BRANCH);
				1287	for (;;)
				1288	{
				1289	latest = regconcat(&flags);
				1290	if (latest == NULL)
				1291	return NULL;
				1292	/* If one of the branches has width, the whole thing has. If one of
				1293	* the branches anchors at start-of-line, the whole thing does.
				1294	* If one of the branches uses look-behind, the whole thing does. */
				1295	*flagp \|= flags & (HASWIDTH \| SPSTART \| HASLOOKBH);
				1296	/* If one of the branches doesn't match a line-break, the whole thing
				1297	* doesn't. */
				1298	*flagp &= ~HASNL \| (flags & HASNL);
				1299	if (chain != NULL)
				1300	regtail(chain, latest);
				1301	if (peekchr() != Magic('&'))
				1302	break;
				1303	skipchr();
				1304	regtail(latest, regnode(END)); /* operand ends */
				1305	reginsert(MATCH, latest);
				1306	chain = latest;
				1307	}
				1308
				1309	return ret;
				1310	}
				1311
				1312	/*
				1313	* regbranch - one alternative of an \| or & operator
				1314	*
				1315	* Implements the concatenation operator.
				1316	*/
				1317	static char_u *
				1318	regconcat(flagp)
				1319	int *flagp;
				1320	{
				1321	char_u *first = NULL;
				1322	char_u *chain = NULL;
				1323	char_u *latest;
				1324	int flags;
				1325	int cont = TRUE;
				1326
				1327	flagp = WORST; / Tentatively. */
				1328
				1329	while (cont)
				1330	{
				1331	switch (peekchr())
				1332	{
				1333	case NUL:
				1334	case Magic('\|'):
				1335	case Magic('&'):
				1336	case Magic(')'):
				1337	cont = FALSE;
				1338	break;
				1339	case Magic('Z'):
				1340	#ifdef FEAT_MBYTE
				1341	regflags \|= RF_ICOMBINE;
				1342	#endif
				1343	skipchr_keepstart();
				1344	break;
				1345	case Magic('c'):
				1346	regflags \|= RF_ICASE;
				1347	skipchr_keepstart();
				1348	break;
				1349	case Magic('C'):
				1350	regflags \|= RF_NOICASE;
				1351	skipchr_keepstart();
				1352	break;
				1353	case Magic('v'):
				1354	reg_magic = MAGIC_ALL;
				1355	skipchr_keepstart();
				1356	curchr = -1;
				1357	break;
				1358	case Magic('m'):
				1359	reg_magic = MAGIC_ON;
				1360	skipchr_keepstart();
				1361	curchr = -1;
				1362	break;
				1363	case Magic('M'):
				1364	reg_magic = MAGIC_OFF;
				1365	skipchr_keepstart();
				1366	curchr = -1;
				1367	break;
				1368	case Magic('V'):
				1369	reg_magic = MAGIC_NONE;
				1370	skipchr_keepstart();
				1371	curchr = -1;
				1372	break;
				1373	default:
				1374	latest = regpiece(&flags);
				1375	if (latest == NULL)
				1376	return NULL;
				1377	*flagp \|= flags & (HASWIDTH \| HASNL \| HASLOOKBH);
				1378	if (chain == NULL) /* First piece. */
				1379	*flagp \|= flags & SPSTART;
				1380	else
				1381	regtail(chain, latest);
				1382	chain = latest;
				1383	if (first == NULL)
				1384	first = latest;
				1385	break;
				1386	}
				1387	}
				1388	if (first == NULL) /* Loop ran zero times. */
				1389	first = regnode(NOTHING);
				1390	return first;
				1391	}
				1392
				1393	/*
				1394	* regpiece - something followed by possible [*+=]
				1395	*
				1396	* Note that the branching code sequences used for = and the general cases
				1397	* of * and + are somewhat optimized: they use the same NOTHING node as
				1398	* both the endmarker for their branch list and the body of the last branch.
				1399	* It might seem that this node could be dispensed with entirely, but the
				1400	* endmarker role is not redundant.
				1401	*/
				1402	static char_u *
				1403	regpiece(flagp)
				1404	int *flagp;
				1405	{
				1406	char_u *ret;
				1407	int op;
				1408	char_u *next;
				1409	int flags;
				1410	long minval;
				1411	long maxval;
				1412
				1413	ret = regatom(&flags);
				1414	if (ret == NULL)
				1415	return NULL;
				1416
				1417	op = peekchr();
				1418	if (re_multi_type(op) == NOT_MULTI)
				1419	{
				1420	*flagp = flags;
				1421	return ret;
				1422	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1423	/* default flags */
				1424	*flagp = (WORST \| SPSTART \| (flags & (HASNL \| HASLOOKBH)));
				1425
				1426	skipchr();
				1427	switch (op)
				1428	{
				1429	case Magic('*'):
				1430	if (flags & SIMPLE)
				1431	reginsert(STAR, ret);
				1432	else
				1433	{
				1434	/* Emit x* as (x&\|), where & means "self". */
				1435	reginsert(BRANCH, ret); /* Either x */
				1436	regoptail(ret, regnode(BACK)); /* and loop */
				1437	regoptail(ret, ret); /* back */
				1438	regtail(ret, regnode(BRANCH)); /* or */
				1439	regtail(ret, regnode(NOTHING)); /* null. */
				1440	}
				1441	break;
				1442
				1443	case Magic('+'):
				1444	if (flags & SIMPLE)
				1445	reginsert(PLUS, ret);
				1446	else
				1447	{
				1448	/* Emit x+ as x(&\|), where & means "self". */
				1449	next = regnode(BRANCH); /* Either */
				1450	regtail(ret, next);
				1451	regtail(regnode(BACK), ret); /* loop back */
				1452	regtail(next, regnode(BRANCH)); /* or */
				1453	regtail(ret, regnode(NOTHING)); /* null. */
				1454	}
				1455	*flagp = (WORST \| HASWIDTH \| (flags & (HASNL \| HASLOOKBH)));
				1456	break;
				1457
				1458	case Magic('@'):
				1459	{
				1460	int lop = END;
				1461
				1462	switch (no_Magic(getchr()))
				1463	{
				1464	case '=': lop = MATCH; break; /* \@= */
				1465	case '!': lop = NOMATCH; break; /* \@! */
				1466	case '>': lop = SUBPAT; break; /* \@> */
				1467	case '<': switch (no_Magic(getchr()))
				1468	{
				1469	case '=': lop = BEHIND; break; /* \@<= */
				1470	case '!': lop = NOBEHIND; break; /* \@<! */
				1471	}
				1472	}
				1473	if (lop == END)
				1474	EMSG_M_RET_NULL(_("E59: invalid character after %s@"),
				1475	reg_magic == MAGIC_ALL);
				1476	/* Look behind must match with behind_pos. */
				1477	if (lop == BEHIND \|\| lop == NOBEHIND)
				1478	{
				1479	regtail(ret, regnode(BHPOS));
				1480	*flagp \|= HASLOOKBH;
				1481	}
				1482	regtail(ret, regnode(END)); /* operand ends */
				1483	reginsert(lop, ret);
				1484	break;
				1485	}
				1486
				1487	case Magic('?'):
				1488	case Magic('='):
				1489	/* Emit x= as (x\|) */
				1490	reginsert(BRANCH, ret); /* Either x */
				1491	regtail(ret, regnode(BRANCH)); /* or */
				1492	next = regnode(NOTHING); /* null. */
				1493	regtail(ret, next);
				1494	regoptail(ret, next);
				1495	break;
				1496
				1497	case Magic('{'):
				1498	if (!read_limits(&minval, &maxval))
				1499	return NULL;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1500	if (flags & SIMPLE)
				1501	{
				1502	reginsert(BRACE_SIMPLE, ret);
				1503	reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
				1504	}
				1505	else
				1506	{
				1507	if (num_complex_braces >= 10)
				1508	EMSG_M_RET_NULL(_("E60: Too many complex %s{...}s"),
				1509	reg_magic == MAGIC_ALL);
				1510	reginsert(BRACE_COMPLEX + num_complex_braces, ret);
				1511	regoptail(ret, regnode(BACK));
				1512	regoptail(ret, ret);
				1513	reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
				1514	++num_complex_braces;
				1515	}
				1516	if (minval > 0 && maxval > 0)
				1517	*flagp = (HASWIDTH \| (flags & (HASNL \| HASLOOKBH)));
				1518	break;
				1519	}
				1520	if (re_multi_type(peekchr()) != NOT_MULTI)
				1521	{
				1522	/* Can't have a multi follow a multi. */
				1523	if (peekchr() == Magic('*'))
				1524	sprintf((char )IObuff, _("E61: Nested %s"),
				1525	reg_magic >= MAGIC_ON ? "" : "\\");
				1526	else
				1527	sprintf((char *)IObuff, _("E62: Nested %s%c"),
				1528	reg_magic == MAGIC_ALL ? "" : "\\", no_Magic(peekchr()));
				1529	EMSG_RET_NULL(IObuff);
				1530	}
				1531
				1532	return ret;
				1533	}
				1534
				1535	/*
				1536	* regatom - the lowest level
				1537	*
				1538	* Optimization: gobbles an entire sequence of ordinary characters so that
				1539	* it can turn them into a single node, which is smaller to store and
				1540	* faster to run. Don't do this when one_exactly is set.
				1541	*/
				1542	static char_u *
				1543	regatom(flagp)
				1544	int *flagp;
				1545	{
				1546	char_u *ret;
				1547	int flags;
				1548	int cpo_lit; /* 'cpoptions' contains 'l' flag */
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	1549	int cpo_bsl; /* 'cpoptions' contains '\' flag */
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1550	int c;
				1551	static char_u classchars = (char_u )".iIkKfFpPsSdDxXoOwWhHaAlLuU";
				1552	static int classcodes[] = {ANY, IDENT, SIDENT, KWORD, SKWORD,
				1553	FNAME, SFNAME, PRINT, SPRINT,
				1554	WHITE, NWHITE, DIGIT, NDIGIT,
				1555	HEX, NHEX, OCTAL, NOCTAL,
				1556	WORD, NWORD, HEAD, NHEAD,
				1557	ALPHA, NALPHA, LOWER, NLOWER,
				1558	UPPER, NUPPER
				1559	};
				1560	char_u *p;
				1561	int extra = 0;
				1562
				1563	flagp = WORST; / Tentatively. */
				1564	cpo_lit = (!reg_syn && vim_strchr(p_cpo, CPO_LITERAL) != NULL);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	1565	cpo_bsl = (!reg_syn && vim_strchr(p_cpo, CPO_BACKSL) != NULL);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1566
				1567	c = getchr();
				1568	switch (c)
				1569	{
				1570	case Magic('^'):
				1571	ret = regnode(BOL);
				1572	break;
				1573
				1574	case Magic('$'):
				1575	ret = regnode(EOL);
				1576	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				1577	had_eol = TRUE;
				1578	#endif
				1579	break;
				1580
				1581	case Magic('<'):
				1582	ret = regnode(BOW);
				1583	break;
				1584
				1585	case Magic('>'):
				1586	ret = regnode(EOW);
				1587	break;
				1588
				1589	case Magic('_'):
				1590	c = no_Magic(getchr());
				1591	if (c == '^') /* "\_^" is start-of-line */
				1592	{
				1593	ret = regnode(BOL);
				1594	break;
				1595	}
				1596	if (c == '$') /* "\_$" is end-of-line */
				1597	{
				1598	ret = regnode(EOL);
				1599	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				1600	had_eol = TRUE;
				1601	#endif
				1602	break;
				1603	}
				1604
				1605	extra = ADD_NL;
				1606	*flagp \|= HASNL;
				1607
				1608	/* "\_[" is character range plus newline */
				1609	if (c == '[')
				1610	goto collection;
				1611
				1612	/* "\_x" is character class plus newline */
				1613	/FALLTHROUGH/
				1614
				1615	/*
				1616	* Character classes.
				1617	*/
				1618	case Magic('.'):
				1619	case Magic('i'):
				1620	case Magic('I'):
				1621	case Magic('k'):
				1622	case Magic('K'):
				1623	case Magic('f'):
				1624	case Magic('F'):
				1625	case Magic('p'):
				1626	case Magic('P'):
				1627	case Magic('s'):
				1628	case Magic('S'):
				1629	case Magic('d'):
				1630	case Magic('D'):
				1631	case Magic('x'):
				1632	case Magic('X'):
				1633	case Magic('o'):
				1634	case Magic('O'):
				1635	case Magic('w'):
				1636	case Magic('W'):
				1637	case Magic('h'):
				1638	case Magic('H'):
				1639	case Magic('a'):
				1640	case Magic('A'):
				1641	case Magic('l'):
				1642	case Magic('L'):
				1643	case Magic('u'):
				1644	case Magic('U'):
				1645	p = vim_strchr(classchars, no_Magic(c));
				1646	if (p == NULL)
				1647	EMSG_RET_NULL(_("E63: invalid use of \\_"));
				1648	ret = regnode(classcodes[p - classchars] + extra);
				1649	*flagp \|= HASWIDTH \| SIMPLE;
				1650	break;
				1651
				1652	case Magic('n'):
				1653	if (reg_string)
				1654	{
				1655	/* In a string "\n" matches a newline character. */
				1656	ret = regnode(EXACTLY);
				1657	regc(NL);
				1658	regc(NUL);
				1659	*flagp \|= HASWIDTH \| SIMPLE;
				1660	}
				1661	else
				1662	{
				1663	/* In buffer text "\n" matches the end of a line. */
				1664	ret = regnode(NEWL);
				1665	*flagp \|= HASWIDTH \| HASNL;
				1666	}
				1667	break;
				1668
				1669	case Magic('('):
				1670	if (one_exactly)
				1671	EMSG_ONE_RET_NULL;
				1672	ret = reg(REG_PAREN, &flags);
				1673	if (ret == NULL)
				1674	return NULL;
				1675	*flagp \|= flags & (HASWIDTH \| SPSTART \| HASNL \| HASLOOKBH);
				1676	break;
				1677
				1678	case NUL:
				1679	case Magic('\|'):
				1680	case Magic('&'):
				1681	case Magic(')'):
				1682	EMSG_RET_NULL(_(e_internal)); /* Supposed to be caught earlier. */
				1683	/* NOTREACHED */
				1684
				1685	case Magic('='):
				1686	case Magic('?'):
				1687	case Magic('+'):
				1688	case Magic('@'):
				1689	case Magic('{'):
				1690	case Magic('*'):
				1691	c = no_Magic(c);
				1692	sprintf((char *)IObuff, _("E64: %s%c follows nothing"),
				1693	(c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL)
				1694	? "" : "\\", c);
				1695	EMSG_RET_NULL(IObuff);
				1696	/* NOTREACHED */
				1697
				1698	case Magic('~'): /* previous substitute pattern */
				1699	if (reg_prev_sub)
				1700	{
				1701	char_u *lp;
				1702
				1703	ret = regnode(EXACTLY);
				1704	lp = reg_prev_sub;
				1705	while (*lp != NUL)
				1706	regc(*lp++);
				1707	regc(NUL);
				1708	if (*reg_prev_sub != NUL)
				1709	{
				1710	*flagp \|= HASWIDTH;
				1711	if ((lp - reg_prev_sub) == 1)
				1712	*flagp \|= SIMPLE;
				1713	}
				1714	}
				1715	else
				1716	EMSG_RET_NULL(_(e_nopresub));
				1717	break;
				1718
				1719	case Magic('1'):
				1720	case Magic('2'):
				1721	case Magic('3'):
				1722	case Magic('4'):
				1723	case Magic('5'):
				1724	case Magic('6'):
				1725	case Magic('7'):
				1726	case Magic('8'):
				1727	case Magic('9'):
				1728	{
				1729	int refnum;
				1730
				1731	refnum = c - Magic('0');
				1732	/*
				1733	* Check if the back reference is legal. We must have seen the
				1734	* close brace.
				1735	* TODO: Should also check that we don't refer to something
				1736	* that is repeated (+*=): what instance of the repetition
				1737	* should we match?
				1738	*/
				1739	if (!had_endbrace[refnum])
				1740	{
				1741	/* Trick: check if "@<=" or "@<!" follows, in which case
				1742	* the \1 can appear before the referenced match. */
				1743	for (p = regparse; *p != NUL; ++p)
				1744	if (p[0] == '@' && p[1] == '<'
				1745	&& (p[2] == '!' \|\| p[2] == '='))
				1746	break;
				1747	if (*p == NUL)
				1748	EMSG_RET_NULL(_("E65: Illegal back reference"));
				1749	}
				1750	ret = regnode(BACKREF + refnum);
				1751	}
				1752	break;
				1753
				1754	#ifdef FEAT_SYN_HL
				1755	case Magic('z'):
				1756	{
				1757	c = no_Magic(getchr());
				1758	switch (c)
				1759	{
				1760	case '(': if (reg_do_extmatch != REX_SET)
				1761	EMSG_RET_NULL(_("E66: \\z( not allowed here"));
				1762	if (one_exactly)
				1763	EMSG_ONE_RET_NULL;
				1764	ret = reg(REG_ZPAREN, &flags);
				1765	if (ret == NULL)
				1766	return NULL;
				1767	*flagp \|= flags & (HASWIDTH\|SPSTART\|HASNL\|HASLOOKBH);
				1768	re_has_z = REX_SET;
				1769	break;
				1770
				1771	case '1':
				1772	case '2':
				1773	case '3':
				1774	case '4':
				1775	case '5':
				1776	case '6':
				1777	case '7':
				1778	case '8':
				1779	case '9': if (reg_do_extmatch != REX_USE)
				1780	EMSG_RET_NULL(_("E67: \\z1 et al. not allowed here"));
				1781	ret = regnode(ZREF + c - '0');
				1782	re_has_z = REX_USE;
				1783	break;
				1784
				1785	case 's': ret = regnode(MOPEN + 0);
				1786	break;
				1787
				1788	case 'e': ret = regnode(MCLOSE + 0);
				1789	break;
				1790
				1791	default: EMSG_RET_NULL(_("E68: Invalid character after \\z"));
				1792	}
				1793	}
				1794	break;
				1795	#endif
				1796
				1797	case Magic('%'):
				1798	{
				1799	c = no_Magic(getchr());
				1800	switch (c)
				1801	{
				1802	/* () without a back reference */
				1803	case '(':
				1804	if (one_exactly)
				1805	EMSG_ONE_RET_NULL;
				1806	ret = reg(REG_NPAREN, &flags);
				1807	if (ret == NULL)
				1808	return NULL;
				1809	*flagp \|= flags & (HASWIDTH \| SPSTART \| HASNL \| HASLOOKBH);
				1810	break;
				1811
				1812	/* Catch \%^ and \%$ regardless of where they appear in the
				1813	* pattern -- regardless of whether or not it makes sense. */
				1814	case '^':
				1815	ret = regnode(RE_BOF);
				1816	break;
				1817
				1818	case '$':
				1819	ret = regnode(RE_EOF);
				1820	break;
				1821
				1822	case '#':
				1823	ret = regnode(CURSOR);
				1824	break;
				1825
				1826	/* \%[abc]: Emit as a list of branches, all ending at the last
				1827	* branch which matches nothing. */
				1828	case '[':
				1829	if (one_exactly) /* doesn't nest */
				1830	EMSG_ONE_RET_NULL;
				1831	{
				1832	char_u *lastbranch;
				1833	char_u *lastnode = NULL;
				1834	char_u *br;
				1835
				1836	ret = NULL;
				1837	while ((c = getchr()) != ']')
				1838	{
				1839	if (c == NUL)
				1840	EMSG_M_RET_NULL(_("E69: Missing ] after %s%%["),
				1841	reg_magic == MAGIC_ALL);
				1842	br = regnode(BRANCH);
				1843	if (ret == NULL)
				1844	ret = br;
				1845	else
				1846	regtail(lastnode, br);
				1847
				1848	ungetchr();
				1849	one_exactly = TRUE;
				1850	lastnode = regatom(flagp);
				1851	one_exactly = FALSE;
				1852	if (lastnode == NULL)
				1853	return NULL;
				1854	}
				1855	if (ret == NULL)
				1856	EMSG_M_RET_NULL(_("E70: Empty %s%%[]"),
				1857	reg_magic == MAGIC_ALL);
				1858	lastbranch = regnode(BRANCH);
				1859	br = regnode(NOTHING);
				1860	if (ret != JUST_CALC_SIZE)
				1861	{
				1862	regtail(lastnode, br);
				1863	regtail(lastbranch, br);
				1864	/* connect all branches to the NOTHING
				1865	* branch at the end */
				1866	for (br = ret; br != lastnode; )
				1867	{
				1868	if (OP(br) == BRANCH)
				1869	{
				1870	regtail(br, lastbranch);
				1871	br = OPERAND(br);
				1872	}
				1873	else
				1874	br = regnext(br);
				1875	}
				1876	}
				1877	*flagp &= ~HASWIDTH;
				1878	break;
				1879	}
				1880
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	1881	case 'd': /* %d123 decimal */
				1882	case 'o': /* %o123 octal */
				1883	case 'x': /* %xab hex 2 */
				1884	case 'u': /* %uabcd hex 4 */
				1885	case 'U': /* %U1234abcd hex 8 */
				1886	{
				1887	int i;
				1888
				1889	switch (c)
				1890	{
				1891	case 'd': i = getdecchrs(); break;
				1892	case 'o': i = getoctchrs(); break;
				1893	case 'x': i = gethexchrs(2); break;
				1894	case 'u': i = gethexchrs(4); break;
				1895	case 'U': i = gethexchrs(8); break;
				1896	default: i = -1; break;
				1897	}
				1898
				1899	if (i < 0)
				1900	EMSG_M_RET_NULL(
				1901	_("E678: Invalid character after %s%%[dxouU]"),
				1902	reg_magic == MAGIC_ALL);
				1903	ret = regnode(EXACTLY);
				1904	if (i == 0)
				1905	regc(0x0a);
				1906	else
				1907	#ifdef FEAT_MBYTE
				1908	regmbc(i);
				1909	#else
				1910	regc(i);
				1911	#endif
				1912	regc(NUL);
				1913	*flagp \|= HASWIDTH;
				1914	break;
				1915	}
				1916
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1917	default:
				1918	if (VIM_ISDIGIT(c) \|\| c == '<' \|\| c == '>')
				1919	{
				1920	long_u n = 0;
				1921	int cmp;
				1922
				1923	cmp = c;
				1924	if (cmp == '<' \|\| cmp == '>')
				1925	c = getchr();
				1926	while (VIM_ISDIGIT(c))
				1927	{
				1928	n = n * 10 + (c - '0');
				1929	c = getchr();
				1930	}
				1931	if (c == 'l' \|\| c == 'c' \|\| c == 'v')
				1932	{
				1933	if (c == 'l')
				1934	ret = regnode(RE_LNUM);
				1935	else if (c == 'c')
				1936	ret = regnode(RE_COL);
				1937	else
				1938	ret = regnode(RE_VCOL);
				1939	if (ret == JUST_CALC_SIZE)
				1940	regsize += 5;
				1941	else
				1942	{
				1943	/* put the number and the optional
				1944	* comparator after the opcode */
				1945	regcode = re_put_long(regcode, n);
				1946	*regcode++ = cmp;
				1947	}
				1948	break;
				1949	}
				1950	}
				1951
				1952	EMSG_M_RET_NULL(_("E71: Invalid character after %s%%"),
				1953	reg_magic == MAGIC_ALL);
				1954	}
				1955	}
				1956	break;
				1957
				1958	case Magic('['):
				1959	collection:
				1960	{
				1961	char_u *lp;
				1962
				1963	/*
				1964	* If there is no matching ']', we assume the '[' is a normal
				1965	* character. This makes 'incsearch' and ":help [" work.
				1966	*/
				1967	lp = skip_anyof(regparse);
				1968	if (lp == ']') / there is a matching ']' */
				1969	{
				1970	int startc = -1; /* > 0 when next '-' is a range */
				1971	int endc;
				1972
				1973	/*
				1974	* In a character class, different parsing rules apply.
				1975	* Not even \ is special anymore, nothing is.
				1976	*/
				1977	if (regparse == '^') / Complement of range. */
				1978	{
				1979	ret = regnode(ANYBUT + extra);
				1980	regparse++;
				1981	}
				1982	else
				1983	ret = regnode(ANYOF + extra);
				1984
				1985	/* At the start ']' and '-' mean the literal character. */
				1986	if (regparse == ']' \|\| regparse == '-')
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	1987	{
				1988	startc = *regparse;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1989	regc(*regparse++);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	1990	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1991
				1992	while (regparse != NUL && regparse != ']')
				1993	{
				1994	if (*regparse == '-')
				1995	{
				1996	++regparse;
				1997	/* The '-' is not used for a range at the end and
				1998	* after or before a '\n'. */
				1999	if (regparse == ']' \|\| regparse == NUL
				2000	\|\| startc == -1
				2001	\|\| (regparse[0] == '\\' && regparse[1] == 'n'))
				2002	{
				2003	regc('-');
				2004	startc = '-'; /* [--x] is a range */
				2005	}
				2006	else
				2007	{
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2008	/* Also accept "a-[.z.]" */
				2009	endc = 0;
				2010	if (*regparse == '[')
				2011	endc = get_coll_element(&regparse);
				2012	if (endc == 0)
				2013	{
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2014	#ifdef FEAT_MBYTE
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2015	if (has_mbyte)
				2016	endc = mb_ptr2char_adv(&regparse);
				2017	else
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2018	#endif
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2019	endc = *regparse++;
				2020	}
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	2021
				2022	/* Handle \o40, \x20 and \u20AC style sequences */
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2023	if (endc == '\\' && !cpo_lit && !cpo_bsl)
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	2024	endc = coll_get_char();
				2025
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2026	if (startc > endc)
				2027	EMSG_RET_NULL(_(e_invrange));
				2028	#ifdef FEAT_MBYTE
				2029	if (has_mbyte && ((*mb_char2len)(startc) > 1
				2030	\|\| (*mb_char2len)(endc) > 1))
				2031	{
				2032	/* Limit to a range of 256 chars */
				2033	if (endc > startc + 256)
				2034	EMSG_RET_NULL(_(e_invrange));
				2035	while (++startc <= endc)
				2036	regmbc(startc);
				2037	}
				2038	else
				2039	#endif
				2040	{
				2041	#ifdef EBCDIC
				2042	int alpha_only = FALSE;
				2043
				2044	/* for alphabetical range skip the gaps
				2045	* 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'. */
				2046	if (isalpha(startc) && isalpha(endc))
				2047	alpha_only = TRUE;
				2048	#endif
				2049	while (++startc <= endc)
				2050	#ifdef EBCDIC
				2051	if (!alpha_only \|\| isalpha(startc))
				2052	#endif
				2053	regc(startc);
				2054	}
				2055	startc = -1;
				2056	}
				2057	}
				2058	/*
				2059	* Only "\]", "\^", "\]" and "\\" are special in Vi. Vim
				2060	* accepts "\t", "\e", etc., but only when the 'l' flag in
				2061	* 'cpoptions' is not included.
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2062	* Posix doesn't recognize backslash at all.
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2063	*/
				2064	else if (*regparse == '\\'
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2065	&& !cpo_bsl
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2066	&& (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
				2067	\|\| (!cpo_lit
				2068	&& vim_strchr(REGEXP_ABBR,
				2069	regparse[1]) != NULL)))
				2070	{
				2071	regparse++;
				2072	if (*regparse == 'n')
				2073	{
				2074	/* '\n' in range: also match NL */
				2075	if (ret != JUST_CALC_SIZE)
				2076	{
				2077	if (*ret == ANYBUT)
				2078	*ret = ANYBUT + ADD_NL;
				2079	else if (*ret == ANYOF)
				2080	*ret = ANYOF + ADD_NL;
				2081	/* else: must have had a \n already */
				2082	}
				2083	*flagp \|= HASNL;
				2084	regparse++;
				2085	startc = -1;
				2086	}
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	2087	else if (*regparse == 'd'
				2088	\|\| *regparse == 'o'
				2089	\|\| *regparse == 'x'
				2090	\|\| *regparse == 'u'
				2091	\|\| *regparse == 'U')
				2092	{
				2093	startc = coll_get_char();
				2094	if (startc == 0)
				2095	regc(0x0a);
				2096	else
				2097	#ifdef FEAT_MBYTE
				2098	regmbc(startc);
				2099	#else
				2100	regc(startc);
				2101	#endif
				2102	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2103	else
				2104	{
				2105	startc = backslash_trans(*regparse++);
				2106	regc(startc);
				2107	}
				2108	}
				2109	else if (*regparse == '[')
				2110	{
				2111	int c_class;
				2112	int cu;
				2113
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2114	c_class = get_char_class(&regparse);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2115	startc = -1;
				2116	/* Characters assumed to be 8 bits! */
				2117	switch (c_class)
				2118	{
				2119	case CLASS_NONE:
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2120	c_class = get_equi_class(&regparse);
				2121	if (c_class != 0)
				2122	{
				2123	/* produce equivalence class */
				2124	reg_equi_class(c_class);
				2125	}
				2126	else if ((c_class =
				2127	get_coll_element(&regparse)) != 0)
				2128	{
				2129	/* produce a collating element */
				2130	regmbc(c_class);
				2131	}
				2132	else
				2133	{
				2134	/* literal '[', allow [[-x] as a range */
				2135	startc = *regparse++;
				2136	regc(startc);
				2137	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2138	break;
				2139	case CLASS_ALNUM:
				2140	for (cu = 1; cu <= 255; cu++)
				2141	if (isalnum(cu))
				2142	regc(cu);
				2143	break;
				2144	case CLASS_ALPHA:
				2145	for (cu = 1; cu <= 255; cu++)
				2146	if (isalpha(cu))
				2147	regc(cu);
				2148	break;
				2149	case CLASS_BLANK:
				2150	regc(' ');
				2151	regc('\t');
				2152	break;
				2153	case CLASS_CNTRL:
				2154	for (cu = 1; cu <= 255; cu++)
				2155	if (iscntrl(cu))
				2156	regc(cu);
				2157	break;
				2158	case CLASS_DIGIT:
				2159	for (cu = 1; cu <= 255; cu++)
				2160	if (VIM_ISDIGIT(cu))
				2161	regc(cu);
				2162	break;
				2163	case CLASS_GRAPH:
				2164	for (cu = 1; cu <= 255; cu++)
				2165	if (isgraph(cu))
				2166	regc(cu);
				2167	break;
				2168	case CLASS_LOWER:
				2169	for (cu = 1; cu <= 255; cu++)
				2170	if (islower(cu))
				2171	regc(cu);
				2172	break;
				2173	case CLASS_PRINT:
				2174	for (cu = 1; cu <= 255; cu++)
				2175	if (vim_isprintc(cu))
				2176	regc(cu);
				2177	break;
				2178	case CLASS_PUNCT:
				2179	for (cu = 1; cu <= 255; cu++)
				2180	if (ispunct(cu))
				2181	regc(cu);
				2182	break;
				2183	case CLASS_SPACE:
				2184	for (cu = 9; cu <= 13; cu++)
				2185	regc(cu);
				2186	regc(' ');
				2187	break;
				2188	case CLASS_UPPER:
				2189	for (cu = 1; cu <= 255; cu++)
				2190	if (isupper(cu))
				2191	regc(cu);
				2192	break;
				2193	case CLASS_XDIGIT:
				2194	for (cu = 1; cu <= 255; cu++)
				2195	if (vim_isxdigit(cu))
				2196	regc(cu);
				2197	break;
				2198	case CLASS_TAB:
				2199	regc('\t');
				2200	break;
				2201	case CLASS_RETURN:
				2202	regc('\r');
				2203	break;
				2204	case CLASS_BACKSPACE:
				2205	regc('\b');
				2206	break;
				2207	case CLASS_ESCAPE:
				2208	regc('\033');
				2209	break;
				2210	}
				2211	}
				2212	else
				2213	{
				2214	#ifdef FEAT_MBYTE
				2215	if (has_mbyte)
				2216	{
				2217	int len;
				2218
				2219	/* produce a multibyte character, including any
				2220	* following composing characters */
				2221	startc = mb_ptr2char(regparse);
				2222	len = (*mb_ptr2len_check)(regparse);
				2223	if (enc_utf8 && utf_char2len(startc) != len)
				2224	startc = -1; /* composing chars */
				2225	while (--len >= 0)
				2226	regc(*regparse++);
				2227	}
				2228	else
				2229	#endif
				2230	{
				2231	startc = *regparse++;
				2232	regc(startc);
				2233	}
				2234	}
				2235	}
				2236	regc(NUL);
				2237	prevchr_len = 1; /* last char was the ']' */
				2238	if (*regparse != ']')
				2239	EMSG_RET_NULL(_(e_toomsbra)); /* Cannot happen? */
				2240	skipchr(); /* let's be friends with the lexer again */
				2241	*flagp \|= HASWIDTH \| SIMPLE;
				2242	break;
				2243	}
				2244	}
				2245	/* FALLTHROUGH */
				2246
				2247	default:
				2248	{
				2249	int len;
				2250
				2251	#ifdef FEAT_MBYTE
				2252	/* A multi-byte character is handled as a separate atom if it's
				2253	* before a multi. */
				2254	if (has_mbyte && (*mb_char2len)(c) > 1
				2255	&& re_multi_type(peekchr()) != NOT_MULTI)
				2256	{
				2257	ret = regnode(MULTIBYTECODE);
				2258	regmbc(c);
				2259	*flagp \|= HASWIDTH \| SIMPLE;
				2260	break;
				2261	}
				2262	#endif
				2263
				2264	ret = regnode(EXACTLY);
				2265
				2266	/*
				2267	* Append characters as long as:
				2268	* - there is no following multi, we then need the character in
				2269	* front of it as a single character operand
				2270	* - not running into a Magic character
				2271	* - "one_exactly" is not set
				2272	* But always emit at least one character. Might be a Multi,
				2273	* e.g., a "[" without matching "]".
				2274	*/
				2275	for (len = 0; c != NUL && (len == 0
				2276	\|\| (re_multi_type(peekchr()) == NOT_MULTI
				2277	&& !one_exactly
				2278	&& !is_Magic(c))); ++len)
				2279	{
				2280	c = no_Magic(c);
				2281	#ifdef FEAT_MBYTE
				2282	if (has_mbyte)
				2283	{
				2284	regmbc(c);
				2285	if (enc_utf8)
				2286	{
				2287	int off;
				2288	int l;
				2289
				2290	/* Need to get composing character too, directly
				2291	* access regparse for that, because skipchr() skips
				2292	* over composing chars. */
				2293	ungetchr();
				2294	if (*regparse == '\\' && regparse[1] != NUL)
				2295	off = 1;
				2296	else
				2297	off = 0;
				2298	for (;;)
				2299	{
				2300	l = utf_ptr2len_check(regparse + off);
				2301	if (!UTF_COMPOSINGLIKE(regparse + off,
				2302	regparse + off + l))
				2303	break;
				2304	off += l;
				2305	regmbc(utf_ptr2char(regparse + off));
				2306	}
				2307	skipchr();
				2308	}
				2309	}
				2310	else
				2311	#endif
				2312	regc(c);
				2313	c = getchr();
				2314	}
				2315	ungetchr();
				2316
				2317	regc(NUL);
				2318	*flagp \|= HASWIDTH;
				2319	if (len == 1)
				2320	*flagp \|= SIMPLE;
				2321	}
				2322	break;
				2323	}
				2324
				2325	return ret;
				2326	}
				2327
				2328	/*
				2329	* emit a node
				2330	* Return pointer to generated code.
				2331	*/
				2332	static char_u *
				2333	regnode(op)
				2334	int op;
				2335	{
				2336	char_u *ret;
				2337
				2338	ret = regcode;
				2339	if (ret == JUST_CALC_SIZE)
				2340	regsize += 3;
				2341	else
				2342	{
				2343	*regcode++ = op;
				2344	regcode++ = NUL; / Null "next" pointer. */
				2345	*regcode++ = NUL;
				2346	}
				2347	return ret;
				2348	}
				2349
				2350	/*
				2351	* Emit (if appropriate) a byte of code
				2352	*/
				2353	static void
				2354	regc(b)
				2355	int b;
				2356	{
				2357	if (regcode == JUST_CALC_SIZE)
				2358	regsize++;
				2359	else
				2360	*regcode++ = b;
				2361	}
				2362
				2363	#ifdef FEAT_MBYTE
				2364	/*
				2365	* Emit (if appropriate) a multi-byte character of code
				2366	*/
				2367	static void
				2368	regmbc(c)
				2369	int c;
				2370	{
				2371	if (regcode == JUST_CALC_SIZE)
				2372	regsize += (*mb_char2len)(c);
				2373	else
				2374	regcode += (*mb_char2bytes)(c, regcode);
				2375	}
				2376	#endif
				2377
				2378	/*
				2379	* reginsert - insert an operator in front of already-emitted operand
				2380	*
				2381	* Means relocating the operand.
				2382	*/
				2383	static void
				2384	reginsert(op, opnd)
				2385	int op;
				2386	char_u *opnd;
				2387	{
				2388	char_u *src;
				2389	char_u *dst;
				2390	char_u *place;
				2391
				2392	if (regcode == JUST_CALC_SIZE)
				2393	{
				2394	regsize += 3;
				2395	return;
				2396	}
				2397	src = regcode;
				2398	regcode += 3;
				2399	dst = regcode;
				2400	while (src > opnd)
				2401	--dst = --src;
				2402
				2403	place = opnd; /* Op node, where operand used to be. */
				2404	*place++ = op;
				2405	*place++ = NUL;
				2406	*place = NUL;
				2407	}
				2408
				2409	/*
				2410	* reginsert_limits - insert an operator in front of already-emitted operand.
				2411	* The operator has the given limit values as operands. Also set next pointer.
				2412	*
				2413	* Means relocating the operand.
				2414	*/
				2415	static void
				2416	reginsert_limits(op, minval, maxval, opnd)
				2417	int op;
				2418	long minval;
				2419	long maxval;
				2420	char_u *opnd;
				2421	{
				2422	char_u *src;
				2423	char_u *dst;
				2424	char_u *place;
				2425
				2426	if (regcode == JUST_CALC_SIZE)
				2427	{
				2428	regsize += 11;
				2429	return;
				2430	}
				2431	src = regcode;
				2432	regcode += 11;
				2433	dst = regcode;
				2434	while (src > opnd)
				2435	--dst = --src;
				2436
				2437	place = opnd; /* Op node, where operand used to be. */
				2438	*place++ = op;
				2439	*place++ = NUL;
				2440	*place++ = NUL;
				2441	place = re_put_long(place, (long_u)minval);
				2442	place = re_put_long(place, (long_u)maxval);
				2443	regtail(opnd, place);
				2444	}
				2445
				2446	/*
				2447	* Write a long as four bytes at "p" and return pointer to the next char.
				2448	*/
				2449	static char_u *
				2450	re_put_long(p, val)
				2451	char_u *p;
				2452	long_u val;
				2453	{
				2454	*p++ = (char_u) ((val >> 24) & 0377);
				2455	*p++ = (char_u) ((val >> 16) & 0377);
				2456	*p++ = (char_u) ((val >> 8) & 0377);
				2457	*p++ = (char_u) (val & 0377);
				2458	return p;
				2459	}
				2460
				2461	/*
				2462	* regtail - set the next-pointer at the end of a node chain
				2463	*/
				2464	static void
				2465	regtail(p, val)
				2466	char_u *p;
				2467	char_u *val;
				2468	{
				2469	char_u *scan;
				2470	char_u *temp;
				2471	int offset;
				2472
				2473	if (p == JUST_CALC_SIZE)
				2474	return;
				2475
				2476	/* Find last node. */
				2477	scan = p;
				2478	for (;;)
				2479	{
				2480	temp = regnext(scan);
				2481	if (temp == NULL)
				2482	break;
				2483	scan = temp;
				2484	}
				2485
				2486	if (OP(scan) == BACK)
				2487	offset = (int)(scan - val);
				2488	else
				2489	offset = (int)(val - scan);
				2490	*(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
				2491	*(scan + 2) = (char_u) (offset & 0377);
				2492	}
				2493
				2494	/*
				2495	* regoptail - regtail on item after a BRANCH; nop if none
				2496	*/
				2497	static void
				2498	regoptail(p, val)
				2499	char_u *p;
				2500	char_u *val;
				2501	{
				2502	/* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
				2503	if (p == NULL \|\| p == JUST_CALC_SIZE
				2504	\|\| (OP(p) != BRANCH
				2505	&& (OP(p) < BRACE_COMPLEX \|\| OP(p) > BRACE_COMPLEX + 9)))
				2506	return;
				2507	regtail(OPERAND(p), val);
				2508	}
				2509
				2510	/*
				2511	* getchr() - get the next character from the pattern. We know about
				2512	* magic and such, so therefore we need a lexical analyzer.
				2513	*/
				2514
				2515	/* static int curchr; */
				2516	static int prevprevchr;
				2517	static int prevchr;
				2518	static int nextchr; /* used for ungetchr() */
				2519	/*
				2520	* Note: prevchr is sometimes -1 when we are not at the start,
				2521	* eg in /[ ^I]^ the pattern was never found even if it existed, because ^ was
				2522	* taken to be magic -- webb
				2523	*/
				2524	static int at_start; /* True when on the first character */
				2525	static int prev_at_start; /* True when on the second character */
				2526
				2527	static void
				2528	initchr(str)
				2529	char_u *str;
				2530	{
				2531	regparse = str;
				2532	prevchr_len = 0;
				2533	curchr = prevprevchr = prevchr = nextchr = -1;
				2534	at_start = TRUE;
				2535	prev_at_start = FALSE;
				2536	}
				2537
				2538	static int
				2539	peekchr()
				2540	{
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2541	static int after_slash = FALSE;
				2542
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2543	if (curchr == -1)
				2544	{
				2545	switch (curchr = regparse[0])
				2546	{
				2547	case '.':
				2548	case '[':
				2549	case '~':
				2550	/* magic when 'magic' is on */
				2551	if (reg_magic >= MAGIC_ON)
				2552	curchr = Magic(curchr);
				2553	break;
				2554	case '(':
				2555	case ')':
				2556	case '{':
				2557	case '%':
				2558	case '+':
				2559	case '=':
				2560	case '?':
				2561	case '@':
				2562	case '!':
				2563	case '&':
				2564	case '\|':
				2565	case '<':
				2566	case '>':
				2567	case '#': /* future ext. */
				2568	case '"': /* future ext. */
				2569	case '\'': /* future ext. */
				2570	case ',': /* future ext. */
				2571	case '-': /* future ext. */
				2572	case ':': /* future ext. */
				2573	case ';': /* future ext. */
				2574	case '`': /* future ext. */
				2575	case '/': /* Can't be used in / command */
				2576	/* magic only after "\v" */
				2577	if (reg_magic == MAGIC_ALL)
				2578	curchr = Magic(curchr);
				2579	break;
				2580	case '*':
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2581	/* * is not magic as the very first character, eg "?*ptr", when
				2582	* after '^', eg "/^*ptr" and when after "\(", "\\|", "\&". But
				2583	* "\(\" is not magic, thus must be magic if "after_slash" /
				2584	if (reg_magic >= MAGIC_ON
				2585	&& !at_start
				2586	&& !(prev_at_start && prevchr == Magic('^'))
				2587	&& (after_slash
				2588	\|\| (prevchr != Magic('(')
				2589	&& prevchr != Magic('&')
				2590	&& prevchr != Magic('\|'))))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2591	curchr = Magic('*');
				2592	break;
				2593	case '^':
				2594	/* '^' is only magic as the very first character and if it's after
				2595	* "\(", "\\|", "\&' or "\n" */
				2596	if (reg_magic >= MAGIC_OFF
				2597	&& (at_start
				2598	\|\| reg_magic == MAGIC_ALL
				2599	\|\| prevchr == Magic('(')
				2600	\|\| prevchr == Magic('\|')
				2601	\|\| prevchr == Magic('&')
				2602	\|\| prevchr == Magic('n')
				2603	\|\| (no_Magic(prevchr) == '('
				2604	&& prevprevchr == Magic('%'))))
				2605	{
				2606	curchr = Magic('^');
				2607	at_start = TRUE;
				2608	prev_at_start = FALSE;
				2609	}
				2610	break;
				2611	case '$':
				2612	/* '$' is only magic as the very last char and if it's in front of
				2613	* either "\\|", "\)", "\&", or "\n" */
				2614	if (reg_magic >= MAGIC_OFF)
				2615	{
				2616	char_u *p = regparse + 1;
				2617
				2618	/* ignore \c \C \m and \M after '$' */
				2619	while (p[0] == '\\' && (p[1] == 'c' \|\| p[1] == 'C'
				2620	\|\| p[1] == 'm' \|\| p[1] == 'M' \|\| p[1] == 'Z'))
				2621	p += 2;
				2622	if (p[0] == NUL
				2623	\|\| (p[0] == '\\'
				2624	&& (p[1] == '\|' \|\| p[1] == '&' \|\| p[1] == ')'
				2625	\|\| p[1] == 'n'))
				2626	\|\| reg_magic == MAGIC_ALL)
				2627	curchr = Magic('$');
				2628	}
				2629	break;
				2630	case '\\':
				2631	{
				2632	int c = regparse[1];
				2633
				2634	if (c == NUL)
				2635	curchr = '\\'; /* trailing '\' */
				2636	else if (
				2637	#ifdef EBCDIC
				2638	vim_strchr(META, c)
				2639	#else
				2640	c <= '~' && META_flags[c]
				2641	#endif
				2642	)
				2643	{
				2644	/*
				2645	* META contains everything that may be magic sometimes,
				2646	* except ^ and $ ("\^" and "\$" are only magic after
				2647	* "\v"). We now fetch the next character and toggle its
				2648	* magicness. Therefore, \ is so meta-magic that it is
				2649	* not in META.
				2650	*/
				2651	curchr = -1;
				2652	prev_at_start = at_start;
				2653	at_start = FALSE; /* be able to say "/\ptr" /
				2654	++regparse;
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2655	++after_slash;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2656	peekchr();
				2657	--regparse;
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2658	--after_slash;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2659	curchr = toggle_Magic(curchr);
				2660	}
				2661	else if (vim_strchr(REGEXP_ABBR, c))
				2662	{
				2663	/*
				2664	* Handle abbreviations, like "\t" for TAB -- webb
				2665	*/
				2666	curchr = backslash_trans(c);
				2667	}
				2668	else if (reg_magic == MAGIC_NONE && (c == '$' \|\| c == '^'))
				2669	curchr = toggle_Magic(c);
				2670	else
				2671	{
				2672	/*
				2673	* Next character can never be (made) magic?
				2674	* Then backslashing it won't do anything.
				2675	*/
				2676	#ifdef FEAT_MBYTE
				2677	if (has_mbyte)
				2678	curchr = (*mb_ptr2char)(regparse + 1);
				2679	else
				2680	#endif
				2681	curchr = c;
				2682	}
				2683	break;
				2684	}
				2685
				2686	#ifdef FEAT_MBYTE
				2687	default:
				2688	if (has_mbyte)
				2689	curchr = (*mb_ptr2char)(regparse);
				2690	#endif
				2691	}
				2692	}
				2693
				2694	return curchr;
				2695	}
				2696
				2697	/*
				2698	* Eat one lexed character. Do this in a way that we can undo it.
				2699	*/
				2700	static void
				2701	skipchr()
				2702	{
				2703	/* peekchr() eats a backslash, do the same here */
				2704	if (*regparse == '\\')
				2705	prevchr_len = 1;
				2706	else
				2707	prevchr_len = 0;
				2708	if (regparse[prevchr_len] != NUL)
				2709	{
				2710	#ifdef FEAT_MBYTE
				2711	if (has_mbyte)
				2712	prevchr_len += (*mb_ptr2len_check)(regparse + prevchr_len);
				2713	else
				2714	#endif
				2715	++prevchr_len;
				2716	}
				2717	regparse += prevchr_len;
				2718	prev_at_start = at_start;
				2719	at_start = FALSE;
				2720	prevprevchr = prevchr;
				2721	prevchr = curchr;
				2722	curchr = nextchr; /* use previously unget char, or -1 */
				2723	nextchr = -1;
				2724	}
				2725
				2726	/*
				2727	* Skip a character while keeping the value of prev_at_start for at_start.
				2728	* prevchr and prevprevchr are also kept.
				2729	*/
				2730	static void
				2731	skipchr_keepstart()
				2732	{
				2733	int as = prev_at_start;
				2734	int pr = prevchr;
				2735	int prpr = prevprevchr;
				2736
				2737	skipchr();
				2738	at_start = as;
				2739	prevchr = pr;
				2740	prevprevchr = prpr;
				2741	}
				2742
				2743	static int
				2744	getchr()
				2745	{
				2746	int chr = peekchr();
				2747
				2748	skipchr();
				2749	return chr;
				2750	}
				2751
				2752	/*
				2753	* put character back. Works only once!
				2754	*/
				2755	static void
				2756	ungetchr()
				2757	{
				2758	nextchr = curchr;
				2759	curchr = prevchr;
				2760	prevchr = prevprevchr;
				2761	at_start = prev_at_start;
				2762	prev_at_start = FALSE;
				2763
				2764	/* Backup regparse, so that it's at the same position as before the
				2765	* getchr(). */
				2766	regparse -= prevchr_len;
				2767	}
				2768
				2769	/*
Bram Moolenaar	7b0294c	2004-10-11 10:16:09 +0000	[diff] [blame]	2770	* Get and return the value of the hex string at the current position.
				2771	* Return -1 if there is no valid hex number.
				2772	* The position is updated:
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	2773	* blahblah\%x20asdf
				2774	* before-^ ^-after
				2775	* The parameter controls the maximum number of input characters. This will be
				2776	* 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
				2777	*/
				2778	static int
				2779	gethexchrs(maxinputlen)
				2780	int maxinputlen;
				2781	{
				2782	int nr = 0;
				2783	int c;
				2784	int i;
				2785
				2786	for (i = 0; i < maxinputlen; ++i)
				2787	{
				2788	c = regparse[0];
				2789	if (!vim_isxdigit(c))
				2790	break;
				2791	nr <<= 4;
				2792	nr \|= hex2nr(c);
				2793	++regparse;
				2794	}
				2795
				2796	if (i == 0)
				2797	return -1;
				2798	return nr;
				2799	}
				2800
				2801	/*
				2802	* get and return the value of the decimal string immediately after the
				2803	* current position. Return -1 for invalid. Consumes all digits.
				2804	*/
				2805	static int
				2806	getdecchrs()
				2807	{
				2808	int nr = 0;
				2809	int c;
				2810	int i;
				2811
				2812	for (i = 0; ; ++i)
				2813	{
				2814	c = regparse[0];
				2815	if (c < '0' \|\| c > '9')
				2816	break;
				2817	nr *= 10;
				2818	nr += c - '0';
				2819	++regparse;
				2820	}
				2821
				2822	if (i == 0)
				2823	return -1;
				2824	return nr;
				2825	}
				2826
				2827	/*
				2828	* get and return the value of the octal string immediately after the current
				2829	* position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
				2830	* numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
				2831	* treat 8 or 9 as recognised characters. Position is updated:
				2832	* blahblah\%o210asdf
				2833	* before-^ ^-after
				2834	*/
				2835	static int
				2836	getoctchrs()
				2837	{
				2838	int nr = 0;
				2839	int c;
				2840	int i;
				2841
				2842	for (i = 0; i < 3 && nr < 040; ++i)
				2843	{
				2844	c = regparse[0];
				2845	if (c < '0' \|\| c > '7')
				2846	break;
				2847	nr <<= 3;
				2848	nr \|= hex2nr(c);
				2849	++regparse;
				2850	}
				2851
				2852	if (i == 0)
				2853	return -1;
				2854	return nr;
				2855	}
				2856
				2857	/*
				2858	* Get a number after a backslash that is inside [].
				2859	* When nothing is recognized return a backslash.
				2860	*/
				2861	static int
				2862	coll_get_char()
				2863	{
				2864	int nr = -1;
				2865
				2866	switch (*regparse++)
				2867	{
				2868	case 'd': nr = getdecchrs(); break;
				2869	case 'o': nr = getoctchrs(); break;
				2870	case 'x': nr = gethexchrs(2); break;
				2871	case 'u': nr = gethexchrs(4); break;
				2872	case 'U': nr = gethexchrs(8); break;
				2873	}
				2874	if (nr < 0)
				2875	{
				2876	/* If getting the number fails be backwards compatible: the character
				2877	* is a backslash. */
				2878	--regparse;
				2879	nr = '\\';
				2880	}
				2881	return nr;
				2882	}
				2883
				2884	/*
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2885	* read_limits - Read two integers to be taken as a minimum and maximum.
				2886	* If the first character is '-', then the range is reversed.
				2887	* Should end with 'end'. If minval is missing, zero is default, if maxval is
				2888	* missing, a very big number is the default.
				2889	*/
				2890	static int
				2891	read_limits(minval, maxval)
				2892	long *minval;
				2893	long *maxval;
				2894	{
				2895	int reverse = FALSE;
				2896	char_u *first_char;
				2897	long tmp;
				2898
				2899	if (*regparse == '-')
				2900	{
				2901	/* Starts with '-', so reverse the range later */
				2902	regparse++;
				2903	reverse = TRUE;
				2904	}
				2905	first_char = regparse;
				2906	*minval = getdigits(&regparse);
				2907	if (regparse == ',') / There is a comma */
				2908	{
				2909	if (vim_isdigit(*++regparse))
				2910	*maxval = getdigits(&regparse);
				2911	else
				2912	*maxval = MAX_LIMIT;
				2913	}
				2914	else if (VIM_ISDIGIT(*first_char))
				2915	maxval = minval; /* It was \{n} or \{-n} */
				2916	else
				2917	maxval = MAX_LIMIT; / It was \{} or \{-} */
				2918	if (*regparse == '\\')
				2919	regparse++; /* Allow either \{...} or \{...\} */
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	2920	if (*regparse != '}')
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2921	{
				2922	sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"),
				2923	reg_magic == MAGIC_ALL ? "" : "\\");
				2924	EMSG_RET_FAIL(IObuff);
				2925	}
				2926
				2927	/*
				2928	* Reverse the range if there was a '-', or make sure it is in the right
				2929	* order otherwise.
				2930	*/
				2931	if ((!reverse && minval > maxval) \|\| (reverse && minval < maxval))
				2932	{
				2933	tmp = *minval;
				2934	minval = maxval;
				2935	*maxval = tmp;
				2936	}
				2937	skipchr(); /* let's be friends with the lexer again */
				2938	return OK;
				2939	}
				2940
				2941	/*
				2942	* vim_regexec and friends
				2943	*/
				2944
				2945	/*
				2946	* Global work variables for vim_regexec().
				2947	*/
				2948
				2949	/* The current match-position is remembered with these variables: */
				2950	static linenr_T reglnum; /* line number, relative to first line */
				2951	static char_u regline; / start of current line */
				2952	static char_u reginput; / current input, points into "regline" */
				2953
				2954	static int need_clear_subexpr; /* subexpressions still need to be
				2955	* cleared */
				2956	#ifdef FEAT_SYN_HL
				2957	static int need_clear_zsubexpr = FALSE; /* extmatch subexpressions
				2958	* still need to be cleared */
				2959	#endif
				2960
				2961	static int out_of_stack; /* TRUE when ran out of stack space */
				2962
				2963	/*
				2964	* Structure used to save the current input state, when it needs to be
				2965	* restored after trying a match. Used by reg_save() and reg_restore().
				2966	*/
				2967	typedef struct
				2968	{
				2969	union
				2970	{
				2971	char_u ptr; / reginput pointer, for single-line regexp */
				2972	lpos_T pos; /* reginput pos, for multi-line regexp */
				2973	} rs_u;
				2974	} regsave_T;
				2975
				2976	/* struct to save start/end pointer/position in for */
				2977	typedef struct
				2978	{
				2979	union
				2980	{
				2981	char_u *ptr;
				2982	lpos_T pos;
				2983	} se_u;
				2984	} save_se_T;
				2985
				2986	static char_u *reg_getline __ARGS((linenr_T lnum));
				2987	static long vim_regexec_both __ARGS((char_u *line, colnr_T col));
				2988	static long regtry __ARGS((regprog_T *prog, colnr_T col));
				2989	static void cleanup_subexpr __ARGS((void));
				2990	#ifdef FEAT_SYN_HL
				2991	static void cleanup_zsubexpr __ARGS((void));
				2992	#endif
				2993	static void reg_nextline __ARGS((void));
				2994	static void reg_save __ARGS((regsave_T *save));
				2995	static void reg_restore __ARGS((regsave_T *save));
				2996	static int reg_save_equal __ARGS((regsave_T *save));
				2997	static void save_se_multi __ARGS((save_se_T savep, lpos_T posp));
				2998	static void save_se_one __ARGS((save_se_T savep, char_u *pp));
				2999
				3000	/* Save the sub-expressions before attempting a match. */
				3001	#define save_se(savep, posp, pp) \
				3002	REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
				3003
				3004	/* After a failed match restore the sub-expressions. */
				3005	#define restore_se(savep, posp, pp) { \
				3006	if (REG_MULTI) \
				3007	*(posp) = (savep)->se_u.pos; \
				3008	else \
				3009	*(pp) = (savep)->se_u.ptr; }
				3010
				3011	static int re_num_cmp __ARGS((long_u val, char_u *scan));
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	3012	static int regmatch __ARGS((char_u prog, regsave_T startp));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3013	static int regrepeat __ARGS((char_u *p, long maxcount));
				3014
				3015	#ifdef DEBUG
				3016	int regnarrate = 0;
				3017	#endif
				3018
				3019	/*
				3020	* Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
				3021	* Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
				3022	* contains '\c' or '\C' the value is overruled.
				3023	*/
				3024	static int ireg_ic;
				3025
				3026	#ifdef FEAT_MBYTE
				3027	/*
				3028	* Similar to ireg_ic, but only for 'combining' characters. Set with \Z flag
				3029	* in the regexp. Defaults to false, always.
				3030	*/
				3031	static int ireg_icombine;
				3032	#endif
				3033
				3034	/*
				3035	* Sometimes need to save a copy of a line. Since alloc()/free() is very
				3036	* slow, we keep one allocated piece of memory and only re-allocate it when
				3037	* it's too small. It's freed in vim_regexec_both() when finished.
				3038	*/
				3039	static char_u *reg_tofree;
				3040	static unsigned reg_tofreelen;
				3041
				3042	/*
				3043	* These variables are set when executing a regexp to speed up the execution.
				3044	* Which ones are set depends on whethere a single-line or multi-line match is
				3045	* done:
				3046	* single-line multi-line
				3047	* reg_match &regmatch_T NULL
				3048	* reg_mmatch NULL &regmmatch_T
				3049	* reg_startp reg_match->startp <invalid>
				3050	* reg_endp reg_match->endp <invalid>
				3051	* reg_startpos <invalid> reg_mmatch->startpos
				3052	* reg_endpos <invalid> reg_mmatch->endpos
				3053	* reg_win NULL window in which to search
				3054	* reg_buf <invalid> buffer in which to search
				3055	* reg_firstlnum <invalid> first line in which to search
				3056	* reg_maxline 0 last line nr
				3057	* reg_line_lbr FALSE or TRUE FALSE
				3058	*/
				3059	static regmatch_T *reg_match;
				3060	static regmmatch_T *reg_mmatch;
				3061	static char_u **reg_startp = NULL;
				3062	static char_u **reg_endp = NULL;
				3063	static lpos_T *reg_startpos = NULL;
				3064	static lpos_T *reg_endpos = NULL;
				3065	static win_T *reg_win;
				3066	static buf_T *reg_buf;
				3067	static linenr_T reg_firstlnum;
				3068	static linenr_T reg_maxline;
				3069	static int reg_line_lbr; /* "\n" in string is line break */
				3070
				3071	/*
				3072	* Get pointer to the line "lnum", which is relative to "reg_firstlnum".
				3073	*/
				3074	static char_u *
				3075	reg_getline(lnum)
				3076	linenr_T lnum;
				3077	{
				3078	/* when looking behind for a match/no-match lnum is negative. But we
				3079	* can't go before line 1 */
				3080	if (reg_firstlnum + lnum < 1)
				3081	return NULL;
				3082	return ml_get_buf(reg_buf, reg_firstlnum + lnum, FALSE);
				3083	}
				3084
				3085	static regsave_T behind_pos;
				3086
				3087	#ifdef FEAT_SYN_HL
				3088	static char_u reg_startzp[NSUBEXP]; / Workspace to mark beginning */
				3089	static char_u reg_endzp[NSUBEXP]; / and end of \z(...\) matches */
				3090	static lpos_T reg_startzpos[NSUBEXP]; /* idem, beginning pos */
				3091	static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
				3092	#endif
				3093
				3094	/* TRUE if using multi-line regexp. */
				3095	#define REG_MULTI (reg_match == NULL)
				3096
				3097	/*
				3098	* Match a regexp against a string.
				3099	* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
				3100	* Uses curbuf for line count and 'iskeyword'.
				3101	*
				3102	* Return TRUE if there is a match, FALSE if not.
				3103	*/
				3104	int
				3105	vim_regexec(rmp, line, col)
				3106	regmatch_T *rmp;
				3107	char_u line; / string to match against */
				3108	colnr_T col; /* column to start looking for match */
				3109	{
				3110	reg_match = rmp;
				3111	reg_mmatch = NULL;
				3112	reg_maxline = 0;
				3113	reg_line_lbr = FALSE;
				3114	reg_win = NULL;
				3115	ireg_ic = rmp->rm_ic;
				3116	#ifdef FEAT_MBYTE
				3117	ireg_icombine = FALSE;
				3118	#endif
				3119	return (vim_regexec_both(line, col) != 0);
				3120	}
				3121
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	3122	#if defined(FEAT_MODIFY_FNAME) \|\| defined(FEAT_EVAL) \
				3123	\|\| defined(FIND_REPLACE_DIALOG) \|\| defined(PROTO)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3124	/*
				3125	* Like vim_regexec(), but consider a "\n" in "line" to be a line break.
				3126	*/
				3127	int
				3128	vim_regexec_nl(rmp, line, col)
				3129	regmatch_T *rmp;
				3130	char_u line; / string to match against */
				3131	colnr_T col; /* column to start looking for match */
				3132	{
				3133	reg_match = rmp;
				3134	reg_mmatch = NULL;
				3135	reg_maxline = 0;
				3136	reg_line_lbr = TRUE;
				3137	reg_win = NULL;
				3138	ireg_ic = rmp->rm_ic;
				3139	#ifdef FEAT_MBYTE
				3140	ireg_icombine = FALSE;
				3141	#endif
				3142	return (vim_regexec_both(line, col) != 0);
				3143	}
				3144	#endif
				3145
				3146	/*
				3147	* Match a regexp against multiple lines.
				3148	* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
				3149	* Uses curbuf for line count and 'iskeyword'.
				3150	*
				3151	* Return zero if there is no match. Return number of lines contained in the
				3152	* match otherwise.
				3153	*/
				3154	long
				3155	vim_regexec_multi(rmp, win, buf, lnum, col)
				3156	regmmatch_T *rmp;
				3157	win_T win; / window in which to search or NULL */
				3158	buf_T buf; / buffer in which to search */
				3159	linenr_T lnum; /* nr of line to start looking for match */
				3160	colnr_T col; /* column to start looking for match */
				3161	{
				3162	long r;
				3163	buf_T *save_curbuf = curbuf;
				3164
				3165	reg_match = NULL;
				3166	reg_mmatch = rmp;
				3167	reg_buf = buf;
				3168	reg_win = win;
				3169	reg_firstlnum = lnum;
				3170	reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
				3171	reg_line_lbr = FALSE;
				3172	ireg_ic = rmp->rmm_ic;
				3173	#ifdef FEAT_MBYTE
				3174	ireg_icombine = FALSE;
				3175	#endif
				3176
				3177	/* Need to switch to buffer "buf" to make vim_iswordc() work. */
				3178	curbuf = buf;
				3179	r = vim_regexec_both(NULL, col);
				3180	curbuf = save_curbuf;
				3181
				3182	return r;
				3183	}
				3184
Bram Moolenaar	8cd06ca	2005-02-28 22:44:58 +0000	[diff] [blame]	3185	#if 0 /* this does not appear to work... */
				3186	# ifdef __MINGW32__
				3187	# define MINGW_TRY
				3188	# endif
				3189	#endif
				3190
				3191	#ifdef MINGW_TRY
				3192	/*
				3193	* Special assembly code for MingW to simulate __try / __except.
				3194	* Does not work with the optimizer!
				3195	*/
				3196	# include <excpt.h>
				3197
				3198	static void ESP_save; / used as _ESP below */
				3199	static void EBP_save; / used as _EBP below */
				3200
				3201	__attribute__ ((cdecl))
				3202	EXCEPTION_DISPOSITION
				3203	_except_regexec_handler(
				3204	struct _EXCEPTION_RECORD *ExceptionRecord,
				3205	void *EstablisherFrame,
				3206	struct _CONTEXT *ContextRecord,
				3207	void *DispatcherContext)
				3208	{
				3209	__asm__ __volatile__ (
				3210	"jmp regexec_reentry");
				3211	return 0; /* Function does not return */
				3212	}
				3213	#endif
				3214
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3215	/*
				3216	* Match a regexp against a string ("line" points to the string) or multiple
				3217	* lines ("line" is NULL, use reg_getline()).
				3218	*/
				3219	#ifdef HAVE_SETJMP_H
				3220	static long
				3221	vim_regexec_both(line_arg, col_arg)
				3222	char_u *line_arg;
				3223	colnr_T col_arg; /* column to start looking for match */
				3224	#else
				3225	static long
				3226	vim_regexec_both(line, col)
				3227	char_u *line;
				3228	colnr_T col; /* column to start looking for match */
				3229	#endif
				3230	{
				3231	regprog_T *prog;
				3232	char_u *s;
				3233	long retval;
				3234	#ifdef HAVE_SETJMP_H
				3235	char_u *line;
				3236	colnr_T col;
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	3237	int did_mch_startjmp = FALSE;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3238	#endif
				3239
				3240	reg_tofree = NULL;
				3241
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3242	#ifdef HAVE_SETJMP_H
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3243	/* Trick to avoid "might be clobbered by `longjmp'" warning from gcc. */
				3244	line = line_arg;
				3245	col = col_arg;
				3246	#endif
				3247	retval = 0L;
				3248
				3249	if (REG_MULTI)
				3250	{
				3251	prog = reg_mmatch->regprog;
				3252	line = reg_getline((linenr_T)0);
				3253	reg_startpos = reg_mmatch->startpos;
				3254	reg_endpos = reg_mmatch->endpos;
				3255	}
				3256	else
				3257	{
				3258	prog = reg_match->regprog;
				3259	reg_startp = reg_match->startp;
				3260	reg_endp = reg_match->endp;
				3261	}
				3262
				3263	/* Be paranoid... */
				3264	if (prog == NULL \|\| line == NULL)
				3265	{
				3266	EMSG(_(e_null));
				3267	goto theend;
				3268	}
				3269
				3270	/* Check validity of program. */
				3271	if (prog_magic_wrong())
				3272	goto theend;
				3273
				3274	/* If pattern contains "\c" or "\C": overrule value of ireg_ic */
				3275	if (prog->regflags & RF_ICASE)
				3276	ireg_ic = TRUE;
				3277	else if (prog->regflags & RF_NOICASE)
				3278	ireg_ic = FALSE;
				3279
				3280	#ifdef FEAT_MBYTE
				3281	/* If pattern contains "\Z" overrule value of ireg_icombine */
				3282	if (prog->regflags & RF_ICOMBINE)
				3283	ireg_icombine = TRUE;
				3284	#endif
				3285
				3286	/* If there is a "must appear" string, look for it. */
				3287	if (prog->regmust != NULL)
				3288	{
				3289	int c;
				3290
				3291	#ifdef FEAT_MBYTE
				3292	if (has_mbyte)
				3293	c = (*mb_ptr2char)(prog->regmust);
				3294	else
				3295	#endif
				3296	c = *prog->regmust;
				3297	s = line + col;
Bram Moolenaar	05159a0	2005-02-26 23:04:13 +0000	[diff] [blame]	3298
				3299	/*
				3300	* This is used very often, esp. for ":global". Use three versions of
				3301	* the loop to avoid overhead of conditions.
				3302	*/
				3303	if (!ireg_ic
				3304	#ifdef FEAT_MBYTE
				3305	&& !has_mbyte
				3306	#endif
				3307	)
				3308	while ((s = vim_strbyte(s, c)) != NULL)
				3309	{
				3310	if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
				3311	break; /* Found it. */
				3312	++s;
				3313	}
				3314	#ifdef FEAT_MBYTE
				3315	else if (!ireg_ic \|\| (!enc_utf8 && mb_char2len(c) > 1))
				3316	while ((s = vim_strchr(s, c)) != NULL)
				3317	{
				3318	if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
				3319	break; /* Found it. */
				3320	mb_ptr_adv(s);
				3321	}
				3322	#endif
				3323	else
				3324	while ((s = cstrchr(s, c)) != NULL)
				3325	{
				3326	if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
				3327	break; /* Found it. */
				3328	mb_ptr_adv(s);
				3329	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3330	if (s == NULL) /* Not present. */
				3331	goto theend;
				3332	}
				3333
Bram Moolenaar	8cd06ca	2005-02-28 22:44:58 +0000	[diff] [blame]	3334	#ifdef MINGW_TRY
				3335	/* Ugly assembly code that is necessary to simulate "__try". */
				3336	__asm__ __volatile__ (
				3337	"movl %esp, _ESP_save" "\n\t"
				3338	"movl %ebp, _EBP_save");
				3339
				3340	__asm__ __volatile__ (
				3341	"pushl $__except_regexec_handler" "\n\t"
				3342	"pushl %fs:0" "\n\t"
				3343	"mov %esp, %fs:0");
				3344	#endif
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	3345	#ifdef HAVE_TRY_EXCEPT
				3346	__try
				3347	{
				3348	#endif
				3349
				3350	#ifdef HAVE_SETJMP_H
				3351	/*
				3352	* Matching with a regexp may cause a very deep recursive call of
				3353	* regmatch(). Vim will crash when running out of stack space. Catch
				3354	* this here if the system supports it.
				3355	* It's a bit slow, do it after the check for "regmust".
				3356	* Don't do it if the caller already set it up.
				3357	*/
				3358	if (!lc_active)
				3359	{
				3360	did_mch_startjmp = TRUE;
				3361	mch_startjmp();
				3362	if (SETJMP(lc_jump_env) != 0)
				3363	{
				3364	mch_didjmp();
				3365	# ifdef SIGHASARG
				3366	if (lc_signal != SIGINT)
				3367	# endif
				3368	EMSG(_(e_complex));
				3369	retval = 0L;
				3370	goto inner_end;
				3371	}
				3372	}
				3373	#endif
				3374
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3375	regline = line;
				3376	reglnum = 0;
				3377	out_of_stack = FALSE;
				3378
				3379	/* Simplest case: Anchored match need be tried only once. */
				3380	if (prog->reganch)
				3381	{
				3382	int c;
				3383
				3384	#ifdef FEAT_MBYTE
				3385	if (has_mbyte)
				3386	c = (*mb_ptr2char)(regline + col);
				3387	else
				3388	#endif
				3389	c = regline[col];
				3390	if (prog->regstart == NUL
				3391	\|\| prog->regstart == c
				3392	\|\| (ireg_ic && ((
				3393	#ifdef FEAT_MBYTE
				3394	(enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
				3395	\|\| (c < 255 && prog->regstart < 255 &&
				3396	#endif
				3397	TOLOWER_LOC(prog->regstart) == TOLOWER_LOC(c)))))
				3398	retval = regtry(prog, col);
				3399	else
				3400	retval = 0;
				3401	}
				3402	else
				3403	{
				3404	/* Messy cases: unanchored match. */
				3405	while (!got_int && !out_of_stack)
				3406	{
				3407	if (prog->regstart != NUL)
				3408	{
Bram Moolenaar	05159a0	2005-02-26 23:04:13 +0000	[diff] [blame]	3409	/* Skip until the char we know it must start with.
				3410	* Used often, do some work to avoid call overhead. */
				3411	if (!ireg_ic
				3412	#ifdef FEAT_MBYTE
				3413	&& !has_mbyte
				3414	#endif
				3415	)
				3416	s = vim_strbyte(regline + col, prog->regstart);
				3417	else
				3418	s = cstrchr(regline + col, prog->regstart);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3419	if (s == NULL)
				3420	{
				3421	retval = 0;
				3422	break;
				3423	}
				3424	col = (int)(s - regline);
				3425	}
				3426
				3427	retval = regtry(prog, col);
				3428	if (retval > 0)
				3429	break;
				3430
				3431	/* if not currently on the first line, get it again */
				3432	if (reglnum != 0)
				3433	{
				3434	regline = reg_getline((linenr_T)0);
				3435	reglnum = 0;
				3436	}
				3437	if (regline[col] == NUL)
				3438	break;
				3439	#ifdef FEAT_MBYTE
				3440	if (has_mbyte)
				3441	col += (*mb_ptr2len_check)(regline + col);
				3442	else
				3443	#endif
				3444	++col;
				3445	}
				3446	}
				3447
				3448	if (out_of_stack)
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	3449	EMSG(_(e_outofstack));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3450
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	3451	#ifdef HAVE_SETJMP_H
				3452	inner_end:
Bram Moolenaar	05159a0	2005-02-26 23:04:13 +0000	[diff] [blame]	3453	if (did_mch_startjmp)
				3454	mch_endjmp();
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	3455	#endif
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3456	#ifdef HAVE_TRY_EXCEPT
				3457	}
				3458	__except(EXCEPTION_EXECUTE_HANDLER)
				3459	{
				3460	if (GetExceptionCode() == EXCEPTION_STACK_OVERFLOW)
				3461	{
				3462	RESETSTKOFLW();
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	3463	EMSG(_(e_outofstack));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3464	}
				3465	else
Bram Moolenaar	748bf03	2005-02-02 23:04:36 +0000	[diff] [blame]	3466	EMSG(_(e_complex));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3467	retval = 0L;
				3468	}
				3469	#endif
Bram Moolenaar	8cd06ca	2005-02-28 22:44:58 +0000	[diff] [blame]	3470	#ifdef MINGW_TRY
				3471	__asm__ __volatile__ (
				3472	"jmp regexec_pop" "\n"
				3473	"regexec_reentry:" "\n\t"
				3474	"movl _ESP_save, %esp" "\n\t"
				3475	"movl _EBP_save, %ebp");
				3476
				3477	EMSG(_(e_complex));
				3478	retval = 0L;
				3479
				3480	__asm__ __volatile__ (
				3481	"regexec_pop:" "\n\t"
				3482	"mov (%esp), %eax" "\n\t"
				3483	"mov %eax, %fs:0" "\n\t"
				3484	"add $8, %esp");
				3485	#endif
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3486
				3487	theend:
				3488	/* Didn't find a match. */
				3489	vim_free(reg_tofree);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3490	return retval;
				3491	}
				3492
				3493	#ifdef FEAT_SYN_HL
				3494	static reg_extmatch_T *make_extmatch __ARGS((void));
				3495
				3496	/*
				3497	* Create a new extmatch and mark it as referenced once.
				3498	*/
				3499	static reg_extmatch_T *
				3500	make_extmatch()
				3501	{
				3502	reg_extmatch_T *em;
				3503
				3504	em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T));
				3505	if (em != NULL)
				3506	em->refcnt = 1;
				3507	return em;
				3508	}
				3509
				3510	/*
				3511	* Add a reference to an extmatch.
				3512	*/
				3513	reg_extmatch_T *
				3514	ref_extmatch(em)
				3515	reg_extmatch_T *em;
				3516	{
				3517	if (em != NULL)
				3518	em->refcnt++;
				3519	return em;
				3520	}
				3521
				3522	/*
				3523	* Remove a reference to an extmatch. If there are no references left, free
				3524	* the info.
				3525	*/
				3526	void
				3527	unref_extmatch(em)
				3528	reg_extmatch_T *em;
				3529	{
				3530	int i;
				3531
				3532	if (em != NULL && --em->refcnt <= 0)
				3533	{
				3534	for (i = 0; i < NSUBEXP; ++i)
				3535	vim_free(em->matches[i]);
				3536	vim_free(em);
				3537	}
				3538	}
				3539	#endif
				3540
				3541	/*
				3542	* regtry - try match of "prog" with at regline["col"].
				3543	* Returns 0 for failure, number of lines contained in the match otherwise.
				3544	*/
				3545	static long
				3546	regtry(prog, col)
				3547	regprog_T *prog;
				3548	colnr_T col;
				3549	{
				3550	reginput = regline + col;
				3551	need_clear_subexpr = TRUE;
				3552	#ifdef FEAT_SYN_HL
				3553	/* Clear the external match subpointers if necessary. */
				3554	if (prog->reghasz == REX_SET)
				3555	need_clear_zsubexpr = TRUE;
				3556	#endif
				3557
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	3558	if (regmatch(prog->program + 1, NULL))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3559	{
				3560	cleanup_subexpr();
				3561	if (REG_MULTI)
				3562	{
				3563	if (reg_startpos[0].lnum < 0)
				3564	{
				3565	reg_startpos[0].lnum = 0;
				3566	reg_startpos[0].col = col;
				3567	}
				3568	if (reg_endpos[0].lnum < 0)
				3569	{
				3570	reg_endpos[0].lnum = reglnum;
				3571	reg_endpos[0].col = (int)(reginput - regline);
				3572	}
				3573	else
				3574	/* Use line number of "\ze". */
				3575	reglnum = reg_endpos[0].lnum;
				3576	}
				3577	else
				3578	{
				3579	if (reg_startp[0] == NULL)
				3580	reg_startp[0] = regline + col;
				3581	if (reg_endp[0] == NULL)
				3582	reg_endp[0] = reginput;
				3583	}
				3584	#ifdef FEAT_SYN_HL
				3585	/* Package any found \z(...\) matches for export. Default is none. */
				3586	unref_extmatch(re_extmatch_out);
				3587	re_extmatch_out = NULL;
				3588
				3589	if (prog->reghasz == REX_SET)
				3590	{
				3591	int i;
				3592
				3593	cleanup_zsubexpr();
				3594	re_extmatch_out = make_extmatch();
				3595	for (i = 0; i < NSUBEXP; i++)
				3596	{
				3597	if (REG_MULTI)
				3598	{
				3599	/* Only accept single line matches. */
				3600	if (reg_startzpos[i].lnum >= 0
				3601	&& reg_endzpos[i].lnum == reg_startzpos[i].lnum)
				3602	re_extmatch_out->matches[i] =
				3603	vim_strnsave(reg_getline(reg_startzpos[i].lnum)
				3604	+ reg_startzpos[i].col,
				3605	reg_endzpos[i].col - reg_startzpos[i].col);
				3606	}
				3607	else
				3608	{
				3609	if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
				3610	re_extmatch_out->matches[i] =
				3611	vim_strnsave(reg_startzp[i],
				3612	(int)(reg_endzp[i] - reg_startzp[i]));
				3613	}
				3614	}
				3615	}
				3616	#endif
				3617	return 1 + reglnum;
				3618	}
				3619	return 0;
				3620	}
				3621
				3622	#ifdef FEAT_MBYTE
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3623	static int reg_prev_class __ARGS((void));
				3624
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3625	/*
				3626	* Get class of previous character.
				3627	*/
				3628	static int
				3629	reg_prev_class()
				3630	{
				3631	if (reginput > regline)
				3632	return mb_get_class(reginput - 1
				3633	- (*mb_head_off)(regline, reginput - 1));
				3634	return -1;
				3635	}
				3636
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3637	#endif
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	3638	#define ADVANCE_REGINPUT() mb_ptr_adv(reginput)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3639
				3640	/*
				3641	* The arguments from BRACE_LIMITS are stored here. They are actually local
				3642	* to regmatch(), but they are here to reduce the amount of stack space used
				3643	* (it can be called recursively many times).
				3644	*/
				3645	static long bl_minval;
				3646	static long bl_maxval;
				3647
				3648	/*
				3649	* regmatch - main matching routine
				3650	*
				3651	* Conceptually the strategy is simple: Check to see whether the current
				3652	* node matches, call self recursively to see whether the rest matches,
				3653	* and then act accordingly. In practice we make some effort to avoid
				3654	* recursion, in particular by going through "ordinary" nodes (that don't
				3655	* need to know whether the rest of the match failed) by a loop instead of
				3656	* by recursion.
				3657	*
				3658	* Returns TRUE when there is a match. Leaves reginput and reglnum just after
				3659	* the last matched character.
				3660	* Returns FALSE when there is no match. Leaves reginput and reglnum in an
				3661	* undefined state!
				3662	*/
				3663	static int
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	3664	regmatch(scan, startp)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3665	char_u scan; / Current node. */
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	3666	regsave_T startp; / start position for BACK */
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3667	{
				3668	char_u next; / Next node. */
				3669	int op;
				3670	int c;
				3671
				3672	#ifdef HAVE_GETRLIMIT
				3673	/* Check if we are running out of stack space. Could be caused by
				3674	* recursively calling ourselves. */
				3675	if (out_of_stack \|\| mch_stackcheck((char *)&op) == FAIL)
				3676	{
				3677	out_of_stack = TRUE;
				3678	return FALSE;
				3679	}
				3680	#endif
				3681
				3682	/* Some patterns my cause a long time to match, even though they are not
				3683	* illegal. E.g., "$[a-z]\+$\+Q". Allow breaking them with CTRL-C. */
				3684	fast_breakcheck();
				3685
				3686	#ifdef DEBUG
				3687	if (scan != NULL && regnarrate)
				3688	{
				3689	mch_errmsg(regprop(scan));
				3690	mch_errmsg("(\n");
				3691	}
				3692	#endif
				3693	while (scan != NULL)
				3694	{
				3695	if (got_int \|\| out_of_stack)
				3696	return FALSE;
				3697	#ifdef DEBUG
				3698	if (regnarrate)
				3699	{
				3700	mch_errmsg(regprop(scan));
				3701	mch_errmsg("...\n");
				3702	# ifdef FEAT_SYN_HL
				3703	if (re_extmatch_in != NULL)
				3704	{
				3705	int i;
				3706
				3707	mch_errmsg(_("External submatches:\n"));
				3708	for (i = 0; i < NSUBEXP; i++)
				3709	{
				3710	mch_errmsg(" \"");
				3711	if (re_extmatch_in->matches[i] != NULL)
				3712	mch_errmsg(re_extmatch_in->matches[i]);
				3713	mch_errmsg("\"\n");
				3714	}
				3715	}
				3716	# endif
				3717	}
				3718	#endif
				3719	next = regnext(scan);
				3720
				3721	op = OP(scan);
				3722	/* Check for character class with NL added. */
				3723	if (WITH_NL(op) && *reginput == NUL && reglnum < reg_maxline)
				3724	{
				3725	reg_nextline();
				3726	}
				3727	else if (reg_line_lbr && WITH_NL(op) && *reginput == '\n')
				3728	{
				3729	ADVANCE_REGINPUT();
				3730	}
				3731	else
				3732	{
				3733	if (WITH_NL(op))
				3734	op -= ADD_NL;
				3735	#ifdef FEAT_MBYTE
				3736	if (has_mbyte)
				3737	c = (*mb_ptr2char)(reginput);
				3738	else
				3739	#endif
				3740	c = *reginput;
				3741	switch (op)
				3742	{
				3743	case BOL:
				3744	if (reginput != regline)
				3745	return FALSE;
				3746	break;
				3747
				3748	case EOL:
				3749	if (c != NUL)
				3750	return FALSE;
				3751	break;
				3752
				3753	case RE_BOF:
				3754	/* Passing -1 to the getline() function provided for the search
				3755	* should always return NULL if the current line is the first
				3756	* line of the file. */
				3757	if (reglnum != 0 \|\| reginput != regline
				3758	\|\| (REG_MULTI && reg_getline((linenr_T)-1) != NULL))
				3759	return FALSE;
				3760	break;
				3761
				3762	case RE_EOF:
				3763	if (reglnum != reg_maxline \|\| c != NUL)
				3764	return FALSE;
				3765	break;
				3766
				3767	case CURSOR:
				3768	/* Check if the buffer is in a window and compare the
				3769	* reg_win->w_cursor position to the match position. */
				3770	if (reg_win == NULL
				3771	\|\| (reglnum + reg_firstlnum != reg_win->w_cursor.lnum)
				3772	\|\| ((colnr_T)(reginput - regline) != reg_win->w_cursor.col))
				3773	return FALSE;
				3774	break;
				3775
				3776	case RE_LNUM:
				3777	if (!REG_MULTI \|\| !re_num_cmp((long_u)(reglnum + reg_firstlnum),
				3778	scan))
				3779	return FALSE;
				3780	break;
				3781
				3782	case RE_COL:
				3783	if (!re_num_cmp((long_u)(reginput - regline) + 1, scan))
				3784	return FALSE;
				3785	break;
				3786
				3787	case RE_VCOL:
				3788	if (!re_num_cmp((long_u)win_linetabsize(
				3789	reg_win == NULL ? curwin : reg_win,
				3790	regline, (colnr_T)(reginput - regline)) + 1, scan))
				3791	return FALSE;
				3792	break;
				3793
				3794	case BOW: /* \<word; reginput points to w */
				3795	if (c == NUL) /* Can't match at end of line */
				3796	return FALSE;
				3797	#ifdef FEAT_MBYTE
				3798	if (has_mbyte)
				3799	{
				3800	int this_class;
				3801
				3802	/* Get class of current and previous char (if it exists). */
				3803	this_class = mb_get_class(reginput);
				3804	if (this_class <= 1)
				3805	return FALSE; /* not on a word at all */
				3806	if (reg_prev_class() == this_class)
				3807	return FALSE; /* previous char is in same word */
				3808	}
				3809	#endif
				3810	else
				3811	{
				3812	if (!vim_iswordc(c)
				3813	\|\| (reginput > regline && vim_iswordc(reginput[-1])))
				3814	return FALSE;
				3815	}
				3816	break;
				3817
				3818	case EOW: /* word\>; reginput points after d */
				3819	if (reginput == regline) /* Can't match at start of line */
				3820	return FALSE;
				3821	#ifdef FEAT_MBYTE
				3822	if (has_mbyte)
				3823	{
				3824	int this_class, prev_class;
				3825
				3826	/* Get class of current and previous char (if it exists). */
				3827	this_class = mb_get_class(reginput);
				3828	prev_class = reg_prev_class();
				3829	if (this_class == prev_class)
				3830	return FALSE;
				3831	if (prev_class == 0 \|\| prev_class == 1)
				3832	return FALSE;
				3833	}
				3834	else
				3835	#endif
				3836	{
				3837	if (!vim_iswordc(reginput[-1]))
				3838	return FALSE;
				3839	if (reginput[0] != NUL && vim_iswordc(c))
				3840	return FALSE;
				3841	}
				3842	break; /* Matched with EOW */
				3843
				3844	case ANY:
				3845	if (c == NUL)
				3846	return FALSE;
				3847	ADVANCE_REGINPUT();
				3848	break;
				3849
				3850	case IDENT:
				3851	if (!vim_isIDc(c))
				3852	return FALSE;
				3853	ADVANCE_REGINPUT();
				3854	break;
				3855
				3856	case SIDENT:
				3857	if (VIM_ISDIGIT(*reginput) \|\| !vim_isIDc(c))
				3858	return FALSE;
				3859	ADVANCE_REGINPUT();
				3860	break;
				3861
				3862	case KWORD:
				3863	if (!vim_iswordp(reginput))
				3864	return FALSE;
				3865	ADVANCE_REGINPUT();
				3866	break;
				3867
				3868	case SKWORD:
				3869	if (VIM_ISDIGIT(*reginput) \|\| !vim_iswordp(reginput))
				3870	return FALSE;
				3871	ADVANCE_REGINPUT();
				3872	break;
				3873
				3874	case FNAME:
				3875	if (!vim_isfilec(c))
				3876	return FALSE;
				3877	ADVANCE_REGINPUT();
				3878	break;
				3879
				3880	case SFNAME:
				3881	if (VIM_ISDIGIT(*reginput) \|\| !vim_isfilec(c))
				3882	return FALSE;
				3883	ADVANCE_REGINPUT();
				3884	break;
				3885
				3886	case PRINT:
				3887	if (ptr2cells(reginput) != 1)
				3888	return FALSE;
				3889	ADVANCE_REGINPUT();
				3890	break;
				3891
				3892	case SPRINT:
				3893	if (VIM_ISDIGIT(*reginput) \|\| ptr2cells(reginput) != 1)
				3894	return FALSE;
				3895	ADVANCE_REGINPUT();
				3896	break;
				3897
				3898	case WHITE:
				3899	if (!vim_iswhite(c))
				3900	return FALSE;
				3901	ADVANCE_REGINPUT();
				3902	break;
				3903
				3904	case NWHITE:
				3905	if (c == NUL \|\| vim_iswhite(c))
				3906	return FALSE;
				3907	ADVANCE_REGINPUT();
				3908	break;
				3909
				3910	case DIGIT:
				3911	if (!ri_digit(c))
				3912	return FALSE;
				3913	ADVANCE_REGINPUT();
				3914	break;
				3915
				3916	case NDIGIT:
				3917	if (c == NUL \|\| ri_digit(c))
				3918	return FALSE;
				3919	ADVANCE_REGINPUT();
				3920	break;
				3921
				3922	case HEX:
				3923	if (!ri_hex(c))
				3924	return FALSE;
				3925	ADVANCE_REGINPUT();
				3926	break;
				3927
				3928	case NHEX:
				3929	if (c == NUL \|\| ri_hex(c))
				3930	return FALSE;
				3931	ADVANCE_REGINPUT();
				3932	break;
				3933
				3934	case OCTAL:
				3935	if (!ri_octal(c))
				3936	return FALSE;
				3937	ADVANCE_REGINPUT();
				3938	break;
				3939
				3940	case NOCTAL:
				3941	if (c == NUL \|\| ri_octal(c))
				3942	return FALSE;
				3943	ADVANCE_REGINPUT();
				3944	break;
				3945
				3946	case WORD:
				3947	if (!ri_word(c))
				3948	return FALSE;
				3949	ADVANCE_REGINPUT();
				3950	break;
				3951
				3952	case NWORD:
				3953	if (c == NUL \|\| ri_word(c))
				3954	return FALSE;
				3955	ADVANCE_REGINPUT();
				3956	break;
				3957
				3958	case HEAD:
				3959	if (!ri_head(c))
				3960	return FALSE;
				3961	ADVANCE_REGINPUT();
				3962	break;
				3963
				3964	case NHEAD:
				3965	if (c == NUL \|\| ri_head(c))
				3966	return FALSE;
				3967	ADVANCE_REGINPUT();
				3968	break;
				3969
				3970	case ALPHA:
				3971	if (!ri_alpha(c))
				3972	return FALSE;
				3973	ADVANCE_REGINPUT();
				3974	break;
				3975
				3976	case NALPHA:
				3977	if (c == NUL \|\| ri_alpha(c))
				3978	return FALSE;
				3979	ADVANCE_REGINPUT();
				3980	break;
				3981
				3982	case LOWER:
				3983	if (!ri_lower(c))
				3984	return FALSE;
				3985	ADVANCE_REGINPUT();
				3986	break;
				3987
				3988	case NLOWER:
				3989	if (c == NUL \|\| ri_lower(c))
				3990	return FALSE;
				3991	ADVANCE_REGINPUT();
				3992	break;
				3993
				3994	case UPPER:
				3995	if (!ri_upper(c))
				3996	return FALSE;
				3997	ADVANCE_REGINPUT();
				3998	break;
				3999
				4000	case NUPPER:
				4001	if (c == NUL \|\| ri_upper(c))
				4002	return FALSE;
				4003	ADVANCE_REGINPUT();
				4004	break;
				4005
				4006	case EXACTLY:
				4007	{
				4008	int len;
				4009	char_u *opnd;
				4010
				4011	opnd = OPERAND(scan);
				4012	/* Inline the first byte, for speed. */
				4013	if (opnd != reginput
				4014	&& (!ireg_ic \|\| (
				4015	#ifdef FEAT_MBYTE
				4016	!enc_utf8 &&
				4017	#endif
				4018	TOLOWER_LOC(opnd) != TOLOWER_LOC(reginput))))
				4019	return FALSE;
				4020	if (*opnd == NUL)
				4021	{
				4022	/* match empty string always works; happens when "~" is
				4023	* empty. */
				4024	}
				4025	else if (opnd[1] == NUL
				4026	#ifdef FEAT_MBYTE
				4027	&& !(enc_utf8 && ireg_ic)
				4028	#endif
				4029	)
				4030	++reginput; /* matched a single char */
				4031	else
				4032	{
				4033	len = (int)STRLEN(opnd);
				4034	/* Need to match first byte again for multi-byte. */
				4035	if (cstrncmp(opnd, reginput, &len) != 0)
				4036	return FALSE;
				4037	#ifdef FEAT_MBYTE
				4038	/* Check for following composing character. */
				4039	if (enc_utf8 && UTF_COMPOSINGLIKE(reginput, reginput + len))
				4040	{
				4041	/* raaron: This code makes a composing character get
				4042	* ignored, which is the correct behavior (sometimes)
				4043	* for voweled Hebrew texts. */
				4044	if (!ireg_icombine)
				4045	return FALSE;
				4046	}
				4047	else
				4048	#endif
				4049	reginput += len;
				4050	}
				4051	}
				4052	break;
				4053
				4054	case ANYOF:
				4055	case ANYBUT:
				4056	if (c == NUL)
				4057	return FALSE;
				4058	if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
				4059	return FALSE;
				4060	ADVANCE_REGINPUT();
				4061	break;
				4062
				4063	#ifdef FEAT_MBYTE
				4064	case MULTIBYTECODE:
				4065	if (has_mbyte)
				4066	{
				4067	int i, len;
				4068	char_u *opnd;
				4069
				4070	opnd = OPERAND(scan);
				4071	/* Safety check (just in case 'encoding' was changed since
				4072	* compiling the program). */
				4073	if ((len = (*mb_ptr2len_check)(opnd)) < 2)
				4074	return FALSE;
				4075	for (i = 0; i < len; ++i)
				4076	if (opnd[i] != reginput[i])
				4077	return FALSE;
				4078	reginput += len;
				4079	}
				4080	else
				4081	return FALSE;
				4082	break;
				4083	#endif
				4084
				4085	case NOTHING:
				4086	break;
				4087
				4088	case BACK:
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4089	/* When we run into BACK without matching something non-empty, we
				4090	* fail. */
				4091	if (startp != NULL && reg_save_equal(startp))
				4092	return FALSE;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4093	break;
				4094
				4095	case MOPEN + 0: /* Match start: \zs */
				4096	case MOPEN + 1: /* \( */
				4097	case MOPEN + 2:
				4098	case MOPEN + 3:
				4099	case MOPEN + 4:
				4100	case MOPEN + 5:
				4101	case MOPEN + 6:
				4102	case MOPEN + 7:
				4103	case MOPEN + 8:
				4104	case MOPEN + 9:
				4105	{
				4106	int no;
				4107	save_se_T save;
				4108
				4109	no = op - MOPEN;
				4110	cleanup_subexpr();
				4111	save_se(&save, &reg_startpos[no], &reg_startp[no]);
				4112
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4113	if (regmatch(next, startp))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4114	return TRUE;
				4115
				4116	restore_se(&save, &reg_startpos[no], &reg_startp[no]);
				4117	return FALSE;
				4118	}
				4119	/* break; Not Reached */
				4120
				4121	case NOPEN: /* \%( */
				4122	case NCLOSE: /* \) after \%( */
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4123	if (regmatch(next, startp))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4124	return TRUE;
				4125	return FALSE;
				4126	/* break; Not Reached */
				4127
				4128	#ifdef FEAT_SYN_HL
				4129	case ZOPEN + 1:
				4130	case ZOPEN + 2:
				4131	case ZOPEN + 3:
				4132	case ZOPEN + 4:
				4133	case ZOPEN + 5:
				4134	case ZOPEN + 6:
				4135	case ZOPEN + 7:
				4136	case ZOPEN + 8:
				4137	case ZOPEN + 9:
				4138	{
				4139	int no;
				4140	save_se_T save;
				4141
				4142	no = op - ZOPEN;
				4143	cleanup_zsubexpr();
				4144	save_se(&save, &reg_startzpos[no], &reg_startzp[no]);
				4145
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4146	if (regmatch(next, startp))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4147	return TRUE;
				4148
				4149	restore_se(&save, &reg_startzpos[no], &reg_startzp[no]);
				4150	return FALSE;
				4151	}
				4152	/* break; Not Reached */
				4153	#endif
				4154
				4155	case MCLOSE + 0: /* Match end: \ze */
				4156	case MCLOSE + 1: /* \) */
				4157	case MCLOSE + 2:
				4158	case MCLOSE + 3:
				4159	case MCLOSE + 4:
				4160	case MCLOSE + 5:
				4161	case MCLOSE + 6:
				4162	case MCLOSE + 7:
				4163	case MCLOSE + 8:
				4164	case MCLOSE + 9:
				4165	{
				4166	int no;
				4167	save_se_T save;
				4168
				4169	no = op - MCLOSE;
				4170	cleanup_subexpr();
				4171	save_se(&save, &reg_endpos[no], &reg_endp[no]);
				4172
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4173	if (regmatch(next, startp))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4174	return TRUE;
				4175
				4176	restore_se(&save, &reg_endpos[no], &reg_endp[no]);
				4177	return FALSE;
				4178	}
				4179	/* break; Not Reached */
				4180
				4181	#ifdef FEAT_SYN_HL
				4182	case ZCLOSE + 1: /* \) after \z( */
				4183	case ZCLOSE + 2:
				4184	case ZCLOSE + 3:
				4185	case ZCLOSE + 4:
				4186	case ZCLOSE + 5:
				4187	case ZCLOSE + 6:
				4188	case ZCLOSE + 7:
				4189	case ZCLOSE + 8:
				4190	case ZCLOSE + 9:
				4191	{
				4192	int no;
				4193	save_se_T save;
				4194
				4195	no = op - ZCLOSE;
				4196	cleanup_zsubexpr();
				4197	save_se(&save, &reg_endzpos[no], &reg_endzp[no]);
				4198
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4199	if (regmatch(next, startp))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4200	return TRUE;
				4201
				4202	restore_se(&save, &reg_endzpos[no], &reg_endzp[no]);
				4203	return FALSE;
				4204	}
				4205	/* break; Not Reached */
				4206	#endif
				4207
				4208	case BACKREF + 1:
				4209	case BACKREF + 2:
				4210	case BACKREF + 3:
				4211	case BACKREF + 4:
				4212	case BACKREF + 5:
				4213	case BACKREF + 6:
				4214	case BACKREF + 7:
				4215	case BACKREF + 8:
				4216	case BACKREF + 9:
				4217	{
				4218	int no;
				4219	int len;
				4220	linenr_T clnum;
				4221	colnr_T ccol;
				4222	char_u *p;
				4223
				4224	no = op - BACKREF;
				4225	cleanup_subexpr();
				4226	if (!REG_MULTI) /* Single-line regexp */
				4227	{
				4228	if (reg_endp[no] == NULL)
				4229	{
				4230	/* Backref was not set: Match an empty string. */
				4231	len = 0;
				4232	}
				4233	else
				4234	{
				4235	/* Compare current input with back-ref in the same
				4236	* line. */
				4237	len = (int)(reg_endp[no] - reg_startp[no]);
				4238	if (cstrncmp(reg_startp[no], reginput, &len) != 0)
				4239	return FALSE;
				4240	}
				4241	}
				4242	else /* Multi-line regexp */
				4243	{
				4244	if (reg_endpos[no].lnum < 0)
				4245	{
				4246	/* Backref was not set: Match an empty string. */
				4247	len = 0;
				4248	}
				4249	else
				4250	{
				4251	if (reg_startpos[no].lnum == reglnum
				4252	&& reg_endpos[no].lnum == reglnum)
				4253	{
				4254	/* Compare back-ref within the current line. */
				4255	len = reg_endpos[no].col - reg_startpos[no].col;
				4256	if (cstrncmp(regline + reg_startpos[no].col,
				4257	reginput, &len) != 0)
				4258	return FALSE;
				4259	}
				4260	else
				4261	{
				4262	/* Messy situation: Need to compare between two
				4263	* lines. */
				4264	ccol = reg_startpos[no].col;
				4265	clnum = reg_startpos[no].lnum;
				4266	for (;;)
				4267	{
				4268	/* Since getting one line may invalidate
				4269	* the other, need to make copy. Slow! */
				4270	if (regline != reg_tofree)
				4271	{
				4272	len = (int)STRLEN(regline);
				4273	if (reg_tofree == NULL
				4274	\|\| len >= (int)reg_tofreelen)
				4275	{
				4276	len += 50; /* get some extra */
				4277	vim_free(reg_tofree);
				4278	reg_tofree = alloc(len);
				4279	if (reg_tofree == NULL)
				4280	return FALSE; /* out of memory! */
				4281	reg_tofreelen = len;
				4282	}
				4283	STRCPY(reg_tofree, regline);
				4284	reginput = reg_tofree
				4285	+ (reginput - regline);
				4286	regline = reg_tofree;
				4287	}
				4288
				4289	/* Get the line to compare with. */
				4290	p = reg_getline(clnum);
				4291	if (clnum == reg_endpos[no].lnum)
				4292	len = reg_endpos[no].col - ccol;
				4293	else
				4294	len = (int)STRLEN(p + ccol);
				4295
				4296	if (cstrncmp(p + ccol, reginput, &len) != 0)
				4297	return FALSE; /* doesn't match */
				4298	if (clnum == reg_endpos[no].lnum)
				4299	break; /* match and at end! */
				4300	if (reglnum == reg_maxline)
				4301	return FALSE; /* text too short */
				4302
				4303	/* Advance to next line. */
				4304	reg_nextline();
				4305	++clnum;
				4306	ccol = 0;
				4307	if (got_int \|\| out_of_stack)
				4308	return FALSE;
				4309	}
				4310
				4311	/* found a match! Note that regline may now point
				4312	* to a copy of the line, that should not matter. */
				4313	}
				4314	}
				4315	}
				4316
				4317	/* Matched the backref, skip over it. */
				4318	reginput += len;
				4319	}
				4320	break;
				4321
				4322	#ifdef FEAT_SYN_HL
				4323	case ZREF + 1:
				4324	case ZREF + 2:
				4325	case ZREF + 3:
				4326	case ZREF + 4:
				4327	case ZREF + 5:
				4328	case ZREF + 6:
				4329	case ZREF + 7:
				4330	case ZREF + 8:
				4331	case ZREF + 9:
				4332	{
				4333	int no;
				4334	int len;
				4335
				4336	cleanup_zsubexpr();
				4337	no = op - ZREF;
				4338	if (re_extmatch_in != NULL
				4339	&& re_extmatch_in->matches[no] != NULL)
				4340	{
				4341	len = (int)STRLEN(re_extmatch_in->matches[no]);
				4342	if (cstrncmp(re_extmatch_in->matches[no],
				4343	reginput, &len) != 0)
				4344	return FALSE;
				4345	reginput += len;
				4346	}
				4347	else
				4348	{
				4349	/* Backref was not set: Match an empty string. */
				4350	}
				4351	}
				4352	break;
				4353	#endif
				4354
				4355	case BRANCH:
				4356	{
				4357	if (OP(next) != BRANCH) /* No choice. */
				4358	next = OPERAND(scan); /* Avoid recursion. */
				4359	else
				4360	{
				4361	regsave_T save;
				4362
				4363	do
				4364	{
				4365	reg_save(&save);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4366	if (regmatch(OPERAND(scan), &save))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4367	return TRUE;
				4368	reg_restore(&save);
				4369	scan = regnext(scan);
				4370	} while (scan != NULL && OP(scan) == BRANCH);
				4371	return FALSE;
				4372	/* NOTREACHED */
				4373	}
				4374	}
				4375	break;
				4376
				4377	case BRACE_LIMITS:
				4378	{
				4379	int no;
				4380
				4381	if (OP(next) == BRACE_SIMPLE)
				4382	{
				4383	bl_minval = OPERAND_MIN(scan);
				4384	bl_maxval = OPERAND_MAX(scan);
				4385	}
				4386	else if (OP(next) >= BRACE_COMPLEX
				4387	&& OP(next) < BRACE_COMPLEX + 10)
				4388	{
				4389	no = OP(next) - BRACE_COMPLEX;
				4390	brace_min[no] = OPERAND_MIN(scan);
				4391	brace_max[no] = OPERAND_MAX(scan);
				4392	brace_count[no] = 0;
				4393	}
				4394	else
				4395	{
				4396	EMSG(_(e_internal)); /* Shouldn't happen */
				4397	return FALSE;
				4398	}
				4399	}
				4400	break;
				4401
				4402	case BRACE_COMPLEX + 0:
				4403	case BRACE_COMPLEX + 1:
				4404	case BRACE_COMPLEX + 2:
				4405	case BRACE_COMPLEX + 3:
				4406	case BRACE_COMPLEX + 4:
				4407	case BRACE_COMPLEX + 5:
				4408	case BRACE_COMPLEX + 6:
				4409	case BRACE_COMPLEX + 7:
				4410	case BRACE_COMPLEX + 8:
				4411	case BRACE_COMPLEX + 9:
				4412	{
				4413	int no;
				4414	regsave_T save;
				4415
				4416	no = op - BRACE_COMPLEX;
				4417	++brace_count[no];
				4418
				4419	/* If not matched enough times yet, try one more */
				4420	if (brace_count[no] <= (brace_min[no] <= brace_max[no]
				4421	? brace_min[no] : brace_max[no]))
				4422	{
				4423	reg_save(&save);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4424	if (regmatch(OPERAND(scan), &save))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4425	return TRUE;
				4426	reg_restore(&save);
				4427	--brace_count[no]; /* failed, decrement match count */
				4428	return FALSE;
				4429	}
				4430
				4431	/* If matched enough times, may try matching some more */
				4432	if (brace_min[no] <= brace_max[no])
				4433	{
				4434	/* Range is the normal way around, use longest match */
				4435	if (brace_count[no] <= brace_max[no])
				4436	{
				4437	reg_save(&save);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4438	if (regmatch(OPERAND(scan), &save))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4439	return TRUE; /* matched some more times */
				4440	reg_restore(&save);
				4441	--brace_count[no]; /* matched just enough times */
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4442	/* { continue with the items after \{} */
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4443	}
				4444	}
				4445	else
				4446	{
				4447	/* Range is backwards, use shortest match first */
				4448	if (brace_count[no] <= brace_min[no])
				4449	{
				4450	reg_save(&save);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4451	if (regmatch(next, &save))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4452	return TRUE;
				4453	reg_restore(&save);
				4454	next = OPERAND(scan);
				4455	/* must try to match one more item */
				4456	}
				4457	}
				4458	}
				4459	break;
				4460
				4461	case BRACE_SIMPLE:
				4462	case STAR:
				4463	case PLUS:
				4464	{
				4465	int nextb; /* next byte */
				4466	int nextb_ic; /* next byte reverse case */
				4467	long count;
				4468	regsave_T save;
				4469	long minval;
				4470	long maxval;
				4471
				4472	/*
				4473	* Lookahead to avoid useless match attempts when we know
				4474	* what character comes next.
				4475	*/
				4476	if (OP(next) == EXACTLY)
				4477	{
				4478	nextb = *OPERAND(next);
				4479	if (ireg_ic)
				4480	{
				4481	if (isupper(nextb))
				4482	nextb_ic = TOLOWER_LOC(nextb);
				4483	else
				4484	nextb_ic = TOUPPER_LOC(nextb);
				4485	}
				4486	else
				4487	nextb_ic = nextb;
				4488	}
				4489	else
				4490	{
				4491	nextb = NUL;
				4492	nextb_ic = NUL;
				4493	}
				4494	if (op != BRACE_SIMPLE)
				4495	{
				4496	minval = (op == STAR) ? 0 : 1;
				4497	maxval = MAX_LIMIT;
				4498	}
				4499	else
				4500	{
				4501	minval = bl_minval;
				4502	maxval = bl_maxval;
				4503	}
				4504
				4505	/*
				4506	* When maxval > minval, try matching as much as possible, up
				4507	* to maxval. When maxval < minval, try matching at least the
				4508	* minimal number (since the range is backwards, that's also
				4509	* maxval!).
				4510	*/
				4511	count = regrepeat(OPERAND(scan), maxval);
				4512	if (got_int)
				4513	return FALSE;
				4514	if (minval <= maxval)
				4515	{
				4516	/* Range is the normal way around, use longest match */
				4517	while (count >= minval)
				4518	{
				4519	/* If it could match, try it. */
				4520	if (nextb == NUL \|\| *reginput == nextb
				4521	\|\| *reginput == nextb_ic)
				4522	{
				4523	reg_save(&save);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4524	if (regmatch(next, startp))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4525	return TRUE;
				4526	reg_restore(&save);
				4527	}
				4528	/* Couldn't or didn't match -- back up one char. */
				4529	if (--count < minval)
				4530	break;
				4531	if (reginput == regline)
				4532	{
				4533	/* backup to last char of previous line */
				4534	--reglnum;
				4535	regline = reg_getline(reglnum);
				4536	/* Just in case regrepeat() didn't count right. */
				4537	if (regline == NULL)
				4538	return FALSE;
				4539	reginput = regline + STRLEN(regline);
				4540	fast_breakcheck();
				4541	if (got_int \|\| out_of_stack)
				4542	return FALSE;
				4543	}
				4544	else
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4545	mb_ptr_back(regline, reginput);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4546	}
				4547	}
				4548	else
				4549	{
				4550	/* Range is backwards, use shortest match first.
				4551	* Careful: maxval and minval are exchanged! */
				4552	if (count < maxval)
				4553	return FALSE;
				4554	for (;;)
				4555	{
				4556	/* If it could work, try it. */
				4557	if (nextb == NUL \|\| *reginput == nextb
				4558	\|\| *reginput == nextb_ic)
				4559	{
				4560	reg_save(&save);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4561	if (regmatch(next, &save))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4562	return TRUE;
				4563	reg_restore(&save);
				4564	}
				4565	/* Couldn't or didn't match: try advancing one char. */
				4566	if (count == minval
				4567	\|\| regrepeat(OPERAND(scan), 1L) == 0)
				4568	break;
				4569	++count;
				4570	if (got_int \|\| out_of_stack)
				4571	return FALSE;
				4572	}
				4573	}
				4574	return FALSE;
				4575	}
				4576	/* break; Not Reached */
				4577
				4578	case NOMATCH:
				4579	{
				4580	regsave_T save;
				4581
				4582	/* If the operand matches, we fail. Otherwise backup and
				4583	* continue with the next item. */
				4584	reg_save(&save);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4585	if (regmatch(OPERAND(scan), startp))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4586	return FALSE;
				4587	reg_restore(&save);
				4588	}
				4589	break;
				4590
				4591	case MATCH:
				4592	case SUBPAT:
				4593	{
				4594	regsave_T save;
				4595
				4596	/* If the operand doesn't match, we fail. Otherwise backup
				4597	* and continue with the next item. */
				4598	reg_save(&save);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4599	if (!regmatch(OPERAND(scan), startp))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4600	return FALSE;
				4601	if (op == MATCH) /* zero-width */
				4602	reg_restore(&save);
				4603	}
				4604	break;
				4605
				4606	case BEHIND:
				4607	case NOBEHIND:
				4608	{
				4609	regsave_T save_after, save_start;
				4610	regsave_T save_behind_pos;
				4611	int needmatch = (op == BEHIND);
				4612
				4613	/*
				4614	* Look back in the input of the operand matches or not. This
				4615	* must be done at every position in the input and checking if
				4616	* the match ends at the current position.
				4617	* First check if the next item matches, that's probably
				4618	* faster.
				4619	*/
				4620	reg_save(&save_start);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4621	if (regmatch(next, startp))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4622	{
				4623	/* save the position after the found match for next */
				4624	reg_save(&save_after);
				4625
				4626	/* start looking for a match with operand at the current
				4627	* postion. Go back one character until we find the
				4628	* result, hitting the start of the line or the previous
				4629	* line (for multi-line matching).
				4630	* Set behind_pos to where the match should end, BHPOS
				4631	* will match it. */
				4632	save_behind_pos = behind_pos;
				4633	behind_pos = save_start;
				4634	for (;;)
				4635	{
				4636	reg_restore(&save_start);
Bram Moolenaar	df177f6	2005-02-22 08:39:57 +0000	[diff] [blame]	4637	if (regmatch(OPERAND(scan), startp)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4638	&& reg_save_equal(&behind_pos))
				4639	{
				4640	behind_pos = save_behind_pos;
				4641	/* found a match that ends where "next" started */
				4642	if (needmatch)
				4643	{
				4644	reg_restore(&save_after);
				4645	return TRUE;
				4646	}
				4647	return FALSE;
				4648	}
				4649	/*
				4650	* No match: Go back one character. May go to
				4651	* previous line once.
				4652	*/
				4653	if (REG_MULTI)
				4654	{
				4655	if (save_start.rs_u.pos.col == 0)
				4656	{
				4657	if (save_start.rs_u.pos.lnum
				4658	< behind_pos.rs_u.pos.lnum
				4659	\|\| reg_getline(
				4660	--save_start.rs_u.pos.lnum) == NULL)
				4661	break;
				4662	reg_restore(&save_start);
				4663	save_start.rs_u.pos.col =
				4664	(colnr_T)STRLEN(regline);
				4665	}
				4666	else
				4667	--save_start.rs_u.pos.col;
				4668	}
				4669	else
				4670	{
				4671	if (save_start.rs_u.ptr == regline)
				4672	break;
				4673	--save_start.rs_u.ptr;
				4674	}
				4675	}
				4676
				4677	/* NOBEHIND succeeds when no match was found */
				4678	behind_pos = save_behind_pos;
				4679	if (!needmatch)
				4680	{
				4681	reg_restore(&save_after);
				4682	return TRUE;
				4683	}
				4684	}
				4685	return FALSE;
				4686	}
				4687
				4688	case BHPOS:
				4689	if (REG_MULTI)
				4690	{
				4691	if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline)
				4692	\|\| behind_pos.rs_u.pos.lnum != reglnum)
				4693	return FALSE;
				4694	}
				4695	else if (behind_pos.rs_u.ptr != reginput)
				4696	return FALSE;
				4697	break;
				4698
				4699	case NEWL:
				4700	if ((c != NUL \|\| reglnum == reg_maxline)
				4701	&& (c != '\n' \|\| !reg_line_lbr))
				4702	return FALSE;
				4703	if (reg_line_lbr)
				4704	ADVANCE_REGINPUT();
				4705	else
				4706	reg_nextline();
				4707	break;
				4708
				4709	case END:
				4710	return TRUE; /* Success! */
				4711
				4712	default:
				4713	EMSG(_(e_re_corr));
				4714	#ifdef DEBUG
				4715	printf("Illegal op code %d\n", op);
				4716	#endif
				4717	return FALSE;
				4718	}
				4719	}
				4720
				4721	scan = next;
				4722	}
				4723
				4724	/*
				4725	* We get here only if there's trouble -- normally "case END" is the
				4726	* terminating point.
				4727	*/
				4728	EMSG(_(e_re_corr));
				4729	#ifdef DEBUG
				4730	printf("Premature EOL\n");
				4731	#endif
				4732	return FALSE;
				4733	}
				4734
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4735	/*
				4736	* regrepeat - repeatedly match something simple, return how many.
				4737	* Advances reginput (and reglnum) to just after the matched chars.
				4738	*/
				4739	static int
				4740	regrepeat(p, maxcount)
				4741	char_u *p;
				4742	long maxcount; /* maximum number of matches allowed */
				4743	{
				4744	long count = 0;
				4745	char_u *scan;
				4746	char_u *opnd;
				4747	int mask;
				4748	int testval = 0;
				4749
				4750	scan = reginput; /* Make local copy of reginput for speed. */
				4751	opnd = OPERAND(p);
				4752	switch (OP(p))
				4753	{
				4754	case ANY:
				4755	case ANY + ADD_NL:
				4756	while (count < maxcount)
				4757	{
				4758	/* Matching anything means we continue until end-of-line (or
				4759	* end-of-file for ANY + ADD_NL), only limited by maxcount. */
				4760	while (*scan != NUL && count < maxcount)
				4761	{
				4762	++count;
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4763	mb_ptr_adv(scan);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4764	}
				4765	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline \|\| count == maxcount)
				4766	break;
				4767	++count; /* count the line-break */
				4768	reg_nextline();
				4769	scan = reginput;
				4770	if (got_int)
				4771	break;
				4772	}
				4773	break;
				4774
				4775	case IDENT:
				4776	case IDENT + ADD_NL:
				4777	testval = TRUE;
				4778	/FALLTHROUGH/
				4779	case SIDENT:
				4780	case SIDENT + ADD_NL:
				4781	while (count < maxcount)
				4782	{
				4783	if (vim_isIDc(scan) && (testval \|\| !VIM_ISDIGIT(scan)))
				4784	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4785	mb_ptr_adv(scan);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4786	}
				4787	else if (*scan == NUL)
				4788	{
				4789	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4790	break;
				4791	reg_nextline();
				4792	scan = reginput;
				4793	if (got_int)
				4794	break;
				4795	}
				4796	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4797	++scan;
				4798	else
				4799	break;
				4800	++count;
				4801	}
				4802	break;
				4803
				4804	case KWORD:
				4805	case KWORD + ADD_NL:
				4806	testval = TRUE;
				4807	/FALLTHROUGH/
				4808	case SKWORD:
				4809	case SKWORD + ADD_NL:
				4810	while (count < maxcount)
				4811	{
				4812	if (vim_iswordp(scan) && (testval \|\| !VIM_ISDIGIT(*scan)))
				4813	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4814	mb_ptr_adv(scan);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4815	}
				4816	else if (*scan == NUL)
				4817	{
				4818	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4819	break;
				4820	reg_nextline();
				4821	scan = reginput;
				4822	if (got_int)
				4823	break;
				4824	}
				4825	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4826	++scan;
				4827	else
				4828	break;
				4829	++count;
				4830	}
				4831	break;
				4832
				4833	case FNAME:
				4834	case FNAME + ADD_NL:
				4835	testval = TRUE;
				4836	/FALLTHROUGH/
				4837	case SFNAME:
				4838	case SFNAME + ADD_NL:
				4839	while (count < maxcount)
				4840	{
				4841	if (vim_isfilec(scan) && (testval \|\| !VIM_ISDIGIT(scan)))
				4842	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4843	mb_ptr_adv(scan);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4844	}
				4845	else if (*scan == NUL)
				4846	{
				4847	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4848	break;
				4849	reg_nextline();
				4850	scan = reginput;
				4851	if (got_int)
				4852	break;
				4853	}
				4854	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4855	++scan;
				4856	else
				4857	break;
				4858	++count;
				4859	}
				4860	break;
				4861
				4862	case PRINT:
				4863	case PRINT + ADD_NL:
				4864	testval = TRUE;
				4865	/FALLTHROUGH/
				4866	case SPRINT:
				4867	case SPRINT + ADD_NL:
				4868	while (count < maxcount)
				4869	{
				4870	if (*scan == NUL)
				4871	{
				4872	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4873	break;
				4874	reg_nextline();
				4875	scan = reginput;
				4876	if (got_int)
				4877	break;
				4878	}
				4879	else if (ptr2cells(scan) == 1 && (testval \|\| !VIM_ISDIGIT(*scan)))
				4880	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4881	mb_ptr_adv(scan);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4882	}
				4883	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4884	++scan;
				4885	else
				4886	break;
				4887	++count;
				4888	}
				4889	break;
				4890
				4891	case WHITE:
				4892	case WHITE + ADD_NL:
				4893	testval = mask = RI_WHITE;
				4894	do_class:
				4895	while (count < maxcount)
				4896	{
				4897	#ifdef FEAT_MBYTE
				4898	int l;
				4899	#endif
				4900	if (*scan == NUL)
				4901	{
				4902	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4903	break;
				4904	reg_nextline();
				4905	scan = reginput;
				4906	if (got_int)
				4907	break;
				4908	}
				4909	#ifdef FEAT_MBYTE
				4910	else if (has_mbyte && (l = (*mb_ptr2len_check)(scan)) > 1)
				4911	{
				4912	if (testval != 0)
				4913	break;
				4914	scan += l;
				4915	}
				4916	#endif
				4917	else if ((class_tab[*scan] & mask) == testval)
				4918	++scan;
				4919	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4920	++scan;
				4921	else
				4922	break;
				4923	++count;
				4924	}
				4925	break;
				4926
				4927	case NWHITE:
				4928	case NWHITE + ADD_NL:
				4929	mask = RI_WHITE;
				4930	goto do_class;
				4931	case DIGIT:
				4932	case DIGIT + ADD_NL:
				4933	testval = mask = RI_DIGIT;
				4934	goto do_class;
				4935	case NDIGIT:
				4936	case NDIGIT + ADD_NL:
				4937	mask = RI_DIGIT;
				4938	goto do_class;
				4939	case HEX:
				4940	case HEX + ADD_NL:
				4941	testval = mask = RI_HEX;
				4942	goto do_class;
				4943	case NHEX:
				4944	case NHEX + ADD_NL:
				4945	mask = RI_HEX;
				4946	goto do_class;
				4947	case OCTAL:
				4948	case OCTAL + ADD_NL:
				4949	testval = mask = RI_OCTAL;
				4950	goto do_class;
				4951	case NOCTAL:
				4952	case NOCTAL + ADD_NL:
				4953	mask = RI_OCTAL;
				4954	goto do_class;
				4955	case WORD:
				4956	case WORD + ADD_NL:
				4957	testval = mask = RI_WORD;
				4958	goto do_class;
				4959	case NWORD:
				4960	case NWORD + ADD_NL:
				4961	mask = RI_WORD;
				4962	goto do_class;
				4963	case HEAD:
				4964	case HEAD + ADD_NL:
				4965	testval = mask = RI_HEAD;
				4966	goto do_class;
				4967	case NHEAD:
				4968	case NHEAD + ADD_NL:
				4969	mask = RI_HEAD;
				4970	goto do_class;
				4971	case ALPHA:
				4972	case ALPHA + ADD_NL:
				4973	testval = mask = RI_ALPHA;
				4974	goto do_class;
				4975	case NALPHA:
				4976	case NALPHA + ADD_NL:
				4977	mask = RI_ALPHA;
				4978	goto do_class;
				4979	case LOWER:
				4980	case LOWER + ADD_NL:
				4981	testval = mask = RI_LOWER;
				4982	goto do_class;
				4983	case NLOWER:
				4984	case NLOWER + ADD_NL:
				4985	mask = RI_LOWER;
				4986	goto do_class;
				4987	case UPPER:
				4988	case UPPER + ADD_NL:
				4989	testval = mask = RI_UPPER;
				4990	goto do_class;
				4991	case NUPPER:
				4992	case NUPPER + ADD_NL:
				4993	mask = RI_UPPER;
				4994	goto do_class;
				4995
				4996	case EXACTLY:
				4997	{
				4998	int cu, cl;
				4999
				5000	/* This doesn't do a multi-byte character, because a MULTIBYTECODE
				5001	* would have been used for it. */
				5002	if (ireg_ic)
				5003	{
				5004	cu = TOUPPER_LOC(*opnd);
				5005	cl = TOLOWER_LOC(*opnd);
				5006	while (count < maxcount && (scan == cu \|\| scan == cl))
				5007	{
				5008	count++;
				5009	scan++;
				5010	}
				5011	}
				5012	else
				5013	{
				5014	cu = *opnd;
				5015	while (count < maxcount && *scan == cu)
				5016	{
				5017	count++;
				5018	scan++;
				5019	}
				5020	}
				5021	break;
				5022	}
				5023
				5024	#ifdef FEAT_MBYTE
				5025	case MULTIBYTECODE:
				5026	{
				5027	int i, len, cf = 0;
				5028
				5029	/* Safety check (just in case 'encoding' was changed since
				5030	* compiling the program). */
				5031	if ((len = (*mb_ptr2len_check)(opnd)) > 1)
				5032	{
				5033	if (ireg_ic && enc_utf8)
				5034	cf = utf_fold(utf_ptr2char(opnd));
				5035	while (count < maxcount)
				5036	{
				5037	for (i = 0; i < len; ++i)
				5038	if (opnd[i] != scan[i])
				5039	break;
				5040	if (i < len && (!ireg_ic \|\| !enc_utf8
				5041	\|\| utf_fold(utf_ptr2char(scan)) != cf))
				5042	break;
				5043	scan += len;
				5044	++count;
				5045	}
				5046	}
				5047	}
				5048	break;
				5049	#endif
				5050
				5051	case ANYOF:
				5052	case ANYOF + ADD_NL:
				5053	testval = TRUE;
				5054	/FALLTHROUGH/
				5055
				5056	case ANYBUT:
				5057	case ANYBUT + ADD_NL:
				5058	while (count < maxcount)
				5059	{
				5060	#ifdef FEAT_MBYTE
				5061	int len;
				5062	#endif
				5063	if (*scan == NUL)
				5064	{
				5065	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				5066	break;
				5067	reg_nextline();
				5068	scan = reginput;
				5069	if (got_int)
				5070	break;
				5071	}
				5072	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				5073	++scan;
				5074	#ifdef FEAT_MBYTE
				5075	else if (has_mbyte && (len = (*mb_ptr2len_check)(scan)) > 1)
				5076	{
				5077	if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
				5078	break;
				5079	scan += len;
				5080	}
				5081	#endif
				5082	else
				5083	{
				5084	if ((cstrchr(opnd, *scan) == NULL) == testval)
				5085	break;
				5086	++scan;
				5087	}
				5088	++count;
				5089	}
				5090	break;
				5091
				5092	case NEWL:
				5093	while (count < maxcount
				5094	&& ((*scan == NUL && reglnum < reg_maxline)
				5095	\|\| (*scan == '\n' && reg_line_lbr)))
				5096	{
				5097	count++;
				5098	if (reg_line_lbr)
				5099	ADVANCE_REGINPUT();
				5100	else
				5101	reg_nextline();
				5102	scan = reginput;
				5103	if (got_int)
				5104	break;
				5105	}
				5106	break;
				5107
				5108	default: /* Oh dear. Called inappropriately. */
				5109	EMSG(_(e_re_corr));
				5110	#ifdef DEBUG
				5111	printf("Called regrepeat with op code %d\n", OP(p));
				5112	#endif
				5113	break;
				5114	}
				5115
				5116	reginput = scan;
				5117
				5118	return (int)count;
				5119	}
				5120
				5121	/*
				5122	* regnext - dig the "next" pointer out of a node
				5123	*/
				5124	static char_u *
				5125	regnext(p)
				5126	char_u *p;
				5127	{
				5128	int offset;
				5129
				5130	if (p == JUST_CALC_SIZE)
				5131	return NULL;
				5132
				5133	offset = NEXT(p);
				5134	if (offset == 0)
				5135	return NULL;
				5136
				5137	if (OP(p) == BACK)
				5138	return p - offset;
				5139	else
				5140	return p + offset;
				5141	}
				5142
				5143	/*
				5144	* Check the regexp program for its magic number.
				5145	* Return TRUE if it's wrong.
				5146	*/
				5147	static int
				5148	prog_magic_wrong()
				5149	{
				5150	if (UCHARAT(REG_MULTI
				5151	? reg_mmatch->regprog->program
				5152	: reg_match->regprog->program) != REGMAGIC)
				5153	{
				5154	EMSG(_(e_re_corr));
				5155	return TRUE;
				5156	}
				5157	return FALSE;
				5158	}
				5159
				5160	/*
				5161	* Cleanup the subexpressions, if this wasn't done yet.
				5162	* This construction is used to clear the subexpressions only when they are
				5163	* used (to increase speed).
				5164	*/
				5165	static void
				5166	cleanup_subexpr()
				5167	{
				5168	if (need_clear_subexpr)
				5169	{
				5170	if (REG_MULTI)
				5171	{
				5172	/* Use 0xff to set lnum to -1 */
				5173	vim_memset(reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				5174	vim_memset(reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				5175	}
				5176	else
				5177	{
				5178	vim_memset(reg_startp, 0, sizeof(char_u ) NSUBEXP);
				5179	vim_memset(reg_endp, 0, sizeof(char_u ) NSUBEXP);
				5180	}
				5181	need_clear_subexpr = FALSE;
				5182	}
				5183	}
				5184
				5185	#ifdef FEAT_SYN_HL
				5186	static void
				5187	cleanup_zsubexpr()
				5188	{
				5189	if (need_clear_zsubexpr)
				5190	{
				5191	if (REG_MULTI)
				5192	{
				5193	/* Use 0xff to set lnum to -1 */
				5194	vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				5195	vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				5196	}
				5197	else
				5198	{
				5199	vim_memset(reg_startzp, 0, sizeof(char_u ) NSUBEXP);
				5200	vim_memset(reg_endzp, 0, sizeof(char_u ) NSUBEXP);
				5201	}
				5202	need_clear_zsubexpr = FALSE;
				5203	}
				5204	}
				5205	#endif
				5206
				5207	/*
				5208	* Advance reglnum, regline and reginput to the next line.
				5209	*/
				5210	static void
				5211	reg_nextline()
				5212	{
				5213	regline = reg_getline(++reglnum);
				5214	reginput = regline;
				5215	fast_breakcheck();
				5216	}
				5217
				5218	/*
				5219	* Save the input line and position in a regsave_T.
				5220	*/
				5221	static void
				5222	reg_save(save)
				5223	regsave_T *save;
				5224	{
				5225	if (REG_MULTI)
				5226	{
				5227	save->rs_u.pos.col = (colnr_T)(reginput - regline);
				5228	save->rs_u.pos.lnum = reglnum;
				5229	}
				5230	else
				5231	save->rs_u.ptr = reginput;
				5232	}
				5233
				5234	/*
				5235	* Restore the input line and position from a regsave_T.
				5236	*/
				5237	static void
				5238	reg_restore(save)
				5239	regsave_T *save;
				5240	{
				5241	if (REG_MULTI)
				5242	{
				5243	if (reglnum != save->rs_u.pos.lnum)
				5244	{
				5245	/* only call reg_getline() when the line number changed to save
				5246	* a bit of time */
				5247	reglnum = save->rs_u.pos.lnum;
				5248	regline = reg_getline(reglnum);
				5249	}
				5250	reginput = regline + save->rs_u.pos.col;
				5251	}
				5252	else
				5253	reginput = save->rs_u.ptr;
				5254	}
				5255
				5256	/*
				5257	* Return TRUE if current position is equal to saved position.
				5258	*/
				5259	static int
				5260	reg_save_equal(save)
				5261	regsave_T *save;
				5262	{
				5263	if (REG_MULTI)
				5264	return reglnum == save->rs_u.pos.lnum
				5265	&& reginput == regline + save->rs_u.pos.col;
				5266	return reginput == save->rs_u.ptr;
				5267	}
				5268
				5269	/*
				5270	* Tentatively set the sub-expression start to the current position (after
				5271	* calling regmatch() they will have changed). Need to save the existing
				5272	* values for when there is no match.
				5273	* Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
				5274	* depending on REG_MULTI.
				5275	*/
				5276	static void
				5277	save_se_multi(savep, posp)
				5278	save_se_T *savep;
				5279	lpos_T *posp;
				5280	{
				5281	savep->se_u.pos = *posp;
				5282	posp->lnum = reglnum;
				5283	posp->col = (colnr_T)(reginput - regline);
				5284	}
				5285
				5286	static void
				5287	save_se_one(savep, pp)
				5288	save_se_T *savep;
				5289	char_u **pp;
				5290	{
				5291	savep->se_u.ptr = *pp;
				5292	*pp = reginput;
				5293	}
				5294
				5295	/*
				5296	* Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
				5297	*/
				5298	static int
				5299	re_num_cmp(val, scan)
				5300	long_u val;
				5301	char_u *scan;
				5302	{
				5303	long_u n = OPERAND_MIN(scan);
				5304
				5305	if (OPERAND_CMP(scan) == '>')
				5306	return val > n;
				5307	if (OPERAND_CMP(scan) == '<')
				5308	return val < n;
				5309	return val == n;
				5310	}
				5311
				5312
				5313	#ifdef DEBUG
				5314
				5315	/*
				5316	* regdump - dump a regexp onto stdout in vaguely comprehensible form
				5317	*/
				5318	static void
				5319	regdump(pattern, r)
				5320	char_u *pattern;
				5321	regprog_T *r;
				5322	{
				5323	char_u *s;
				5324	int op = EXACTLY; /* Arbitrary non-END op. */
				5325	char_u *next;
				5326	char_u *end = NULL;
				5327
				5328	printf("\r\nregcomp(%s):\r\n", pattern);
				5329
				5330	s = r->program + 1;
				5331	/*
				5332	* Loop until we find the END that isn't before a referred next (an END
				5333	* can also appear in a NOMATCH operand).
				5334	*/
				5335	while (op != END \|\| s <= end)
				5336	{
				5337	op = OP(s);
				5338	printf("%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
				5339	next = regnext(s);
				5340	if (next == NULL) /* Next ptr. */
				5341	printf("(0)");
				5342	else
				5343	printf("(%d)", (int)((s - r->program) + (next - s)));
				5344	if (end < next)
				5345	end = next;
				5346	if (op == BRACE_LIMITS)
				5347	{
				5348	/* Two short ints */
				5349	printf(" minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
				5350	s += 8;
				5351	}
				5352	s += 3;
				5353	if (op == ANYOF \|\| op == ANYOF + ADD_NL
				5354	\|\| op == ANYBUT \|\| op == ANYBUT + ADD_NL
				5355	\|\| op == EXACTLY)
				5356	{
				5357	/* Literal string, where present. */
				5358	while (*s != NUL)
				5359	printf("%c", *s++);
				5360	s++;
				5361	}
				5362	printf("\r\n");
				5363	}
				5364
				5365	/* Header fields of interest. */
				5366	if (r->regstart != NUL)
				5367	printf("start `%s' 0x%x; ", r->regstart < 256
				5368	? (char *)transchar(r->regstart)
				5369	: "multibyte", r->regstart);
				5370	if (r->reganch)
				5371	printf("anchored; ");
				5372	if (r->regmust != NULL)
				5373	printf("must have \"%s\"", r->regmust);
				5374	printf("\r\n");
				5375	}
				5376
				5377	/*
				5378	* regprop - printable representation of opcode
				5379	*/
				5380	static char_u *
				5381	regprop(op)
				5382	char_u *op;
				5383	{
				5384	char_u *p;
				5385	static char_u buf[50];
				5386
				5387	(void) strcpy(buf, ":");
				5388
				5389	switch (OP(op))
				5390	{
				5391	case BOL:
				5392	p = "BOL";
				5393	break;
				5394	case EOL:
				5395	p = "EOL";
				5396	break;
				5397	case RE_BOF:
				5398	p = "BOF";
				5399	break;
				5400	case RE_EOF:
				5401	p = "EOF";
				5402	break;
				5403	case CURSOR:
				5404	p = "CURSOR";
				5405	break;
				5406	case RE_LNUM:
				5407	p = "RE_LNUM";
				5408	break;
				5409	case RE_COL:
				5410	p = "RE_COL";
				5411	break;
				5412	case RE_VCOL:
				5413	p = "RE_VCOL";
				5414	break;
				5415	case BOW:
				5416	p = "BOW";
				5417	break;
				5418	case EOW:
				5419	p = "EOW";
				5420	break;
				5421	case ANY:
				5422	p = "ANY";
				5423	break;
				5424	case ANY + ADD_NL:
				5425	p = "ANY+NL";
				5426	break;
				5427	case ANYOF:
				5428	p = "ANYOF";
				5429	break;
				5430	case ANYOF + ADD_NL:
				5431	p = "ANYOF+NL";
				5432	break;
				5433	case ANYBUT:
				5434	p = "ANYBUT";
				5435	break;
				5436	case ANYBUT + ADD_NL:
				5437	p = "ANYBUT+NL";
				5438	break;
				5439	case IDENT:
				5440	p = "IDENT";
				5441	break;
				5442	case IDENT + ADD_NL:
				5443	p = "IDENT+NL";
				5444	break;
				5445	case SIDENT:
				5446	p = "SIDENT";
				5447	break;
				5448	case SIDENT + ADD_NL:
				5449	p = "SIDENT+NL";
				5450	break;
				5451	case KWORD:
				5452	p = "KWORD";
				5453	break;
				5454	case KWORD + ADD_NL:
				5455	p = "KWORD+NL";
				5456	break;
				5457	case SKWORD:
				5458	p = "SKWORD";
				5459	break;
				5460	case SKWORD + ADD_NL:
				5461	p = "SKWORD+NL";
				5462	break;
				5463	case FNAME:
				5464	p = "FNAME";
				5465	break;
				5466	case FNAME + ADD_NL:
				5467	p = "FNAME+NL";
				5468	break;
				5469	case SFNAME:
				5470	p = "SFNAME";
				5471	break;
				5472	case SFNAME + ADD_NL:
				5473	p = "SFNAME+NL";
				5474	break;
				5475	case PRINT:
				5476	p = "PRINT";
				5477	break;
				5478	case PRINT + ADD_NL:
				5479	p = "PRINT+NL";
				5480	break;
				5481	case SPRINT:
				5482	p = "SPRINT";
				5483	break;
				5484	case SPRINT + ADD_NL:
				5485	p = "SPRINT+NL";
				5486	break;
				5487	case WHITE:
				5488	p = "WHITE";
				5489	break;
				5490	case WHITE + ADD_NL:
				5491	p = "WHITE+NL";
				5492	break;
				5493	case NWHITE:
				5494	p = "NWHITE";
				5495	break;
				5496	case NWHITE + ADD_NL:
				5497	p = "NWHITE+NL";
				5498	break;
				5499	case DIGIT:
				5500	p = "DIGIT";
				5501	break;
				5502	case DIGIT + ADD_NL:
				5503	p = "DIGIT+NL";
				5504	break;
				5505	case NDIGIT:
				5506	p = "NDIGIT";
				5507	break;
				5508	case NDIGIT + ADD_NL:
				5509	p = "NDIGIT+NL";
				5510	break;
				5511	case HEX:
				5512	p = "HEX";
				5513	break;
				5514	case HEX + ADD_NL:
				5515	p = "HEX+NL";
				5516	break;
				5517	case NHEX:
				5518	p = "NHEX";
				5519	break;
				5520	case NHEX + ADD_NL:
				5521	p = "NHEX+NL";
				5522	break;
				5523	case OCTAL:
				5524	p = "OCTAL";
				5525	break;
				5526	case OCTAL + ADD_NL:
				5527	p = "OCTAL+NL";
				5528	break;
				5529	case NOCTAL:
				5530	p = "NOCTAL";
				5531	break;
				5532	case NOCTAL + ADD_NL:
				5533	p = "NOCTAL+NL";
				5534	break;
				5535	case WORD:
				5536	p = "WORD";
				5537	break;
				5538	case WORD + ADD_NL:
				5539	p = "WORD+NL";
				5540	break;
				5541	case NWORD:
				5542	p = "NWORD";
				5543	break;
				5544	case NWORD + ADD_NL:
				5545	p = "NWORD+NL";
				5546	break;
				5547	case HEAD:
				5548	p = "HEAD";
				5549	break;
				5550	case HEAD + ADD_NL:
				5551	p = "HEAD+NL";
				5552	break;
				5553	case NHEAD:
				5554	p = "NHEAD";
				5555	break;
				5556	case NHEAD + ADD_NL:
				5557	p = "NHEAD+NL";
				5558	break;
				5559	case ALPHA:
				5560	p = "ALPHA";
				5561	break;
				5562	case ALPHA + ADD_NL:
				5563	p = "ALPHA+NL";
				5564	break;
				5565	case NALPHA:
				5566	p = "NALPHA";
				5567	break;
				5568	case NALPHA + ADD_NL:
				5569	p = "NALPHA+NL";
				5570	break;
				5571	case LOWER:
				5572	p = "LOWER";
				5573	break;
				5574	case LOWER + ADD_NL:
				5575	p = "LOWER+NL";
				5576	break;
				5577	case NLOWER:
				5578	p = "NLOWER";
				5579	break;
				5580	case NLOWER + ADD_NL:
				5581	p = "NLOWER+NL";
				5582	break;
				5583	case UPPER:
				5584	p = "UPPER";
				5585	break;
				5586	case UPPER + ADD_NL:
				5587	p = "UPPER+NL";
				5588	break;
				5589	case NUPPER:
				5590	p = "NUPPER";
				5591	break;
				5592	case NUPPER + ADD_NL:
				5593	p = "NUPPER+NL";
				5594	break;
				5595	case BRANCH:
				5596	p = "BRANCH";
				5597	break;
				5598	case EXACTLY:
				5599	p = "EXACTLY";
				5600	break;
				5601	case NOTHING:
				5602	p = "NOTHING";
				5603	break;
				5604	case BACK:
				5605	p = "BACK";
				5606	break;
				5607	case END:
				5608	p = "END";
				5609	break;
				5610	case MOPEN + 0:
				5611	p = "MATCH START";
				5612	break;
				5613	case MOPEN + 1:
				5614	case MOPEN + 2:
				5615	case MOPEN + 3:
				5616	case MOPEN + 4:
				5617	case MOPEN + 5:
				5618	case MOPEN + 6:
				5619	case MOPEN + 7:
				5620	case MOPEN + 8:
				5621	case MOPEN + 9:
				5622	sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
				5623	p = NULL;
				5624	break;
				5625	case MCLOSE + 0:
				5626	p = "MATCH END";
				5627	break;
				5628	case MCLOSE + 1:
				5629	case MCLOSE + 2:
				5630	case MCLOSE + 3:
				5631	case MCLOSE + 4:
				5632	case MCLOSE + 5:
				5633	case MCLOSE + 6:
				5634	case MCLOSE + 7:
				5635	case MCLOSE + 8:
				5636	case MCLOSE + 9:
				5637	sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
				5638	p = NULL;
				5639	break;
				5640	case BACKREF + 1:
				5641	case BACKREF + 2:
				5642	case BACKREF + 3:
				5643	case BACKREF + 4:
				5644	case BACKREF + 5:
				5645	case BACKREF + 6:
				5646	case BACKREF + 7:
				5647	case BACKREF + 8:
				5648	case BACKREF + 9:
				5649	sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
				5650	p = NULL;
				5651	break;
				5652	case NOPEN:
				5653	p = "NOPEN";
				5654	break;
				5655	case NCLOSE:
				5656	p = "NCLOSE";
				5657	break;
				5658	#ifdef FEAT_SYN_HL
				5659	case ZOPEN + 1:
				5660	case ZOPEN + 2:
				5661	case ZOPEN + 3:
				5662	case ZOPEN + 4:
				5663	case ZOPEN + 5:
				5664	case ZOPEN + 6:
				5665	case ZOPEN + 7:
				5666	case ZOPEN + 8:
				5667	case ZOPEN + 9:
				5668	sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
				5669	p = NULL;
				5670	break;
				5671	case ZCLOSE + 1:
				5672	case ZCLOSE + 2:
				5673	case ZCLOSE + 3:
				5674	case ZCLOSE + 4:
				5675	case ZCLOSE + 5:
				5676	case ZCLOSE + 6:
				5677	case ZCLOSE + 7:
				5678	case ZCLOSE + 8:
				5679	case ZCLOSE + 9:
				5680	sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
				5681	p = NULL;
				5682	break;
				5683	case ZREF + 1:
				5684	case ZREF + 2:
				5685	case ZREF + 3:
				5686	case ZREF + 4:
				5687	case ZREF + 5:
				5688	case ZREF + 6:
				5689	case ZREF + 7:
				5690	case ZREF + 8:
				5691	case ZREF + 9:
				5692	sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
				5693	p = NULL;
				5694	break;
				5695	#endif
				5696	case STAR:
				5697	p = "STAR";
				5698	break;
				5699	case PLUS:
				5700	p = "PLUS";
				5701	break;
				5702	case NOMATCH:
				5703	p = "NOMATCH";
				5704	break;
				5705	case MATCH:
				5706	p = "MATCH";
				5707	break;
				5708	case BEHIND:
				5709	p = "BEHIND";
				5710	break;
				5711	case NOBEHIND:
				5712	p = "NOBEHIND";
				5713	break;
				5714	case SUBPAT:
				5715	p = "SUBPAT";
				5716	break;
				5717	case BRACE_LIMITS:
				5718	p = "BRACE_LIMITS";
				5719	break;
				5720	case BRACE_SIMPLE:
				5721	p = "BRACE_SIMPLE";
				5722	break;
				5723	case BRACE_COMPLEX + 0:
				5724	case BRACE_COMPLEX + 1:
				5725	case BRACE_COMPLEX + 2:
				5726	case BRACE_COMPLEX + 3:
				5727	case BRACE_COMPLEX + 4:
				5728	case BRACE_COMPLEX + 5:
				5729	case BRACE_COMPLEX + 6:
				5730	case BRACE_COMPLEX + 7:
				5731	case BRACE_COMPLEX + 8:
				5732	case BRACE_COMPLEX + 9:
				5733	sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
				5734	p = NULL;
				5735	break;
				5736	#ifdef FEAT_MBYTE
				5737	case MULTIBYTECODE:
				5738	p = "MULTIBYTECODE";
				5739	break;
				5740	#endif
				5741	case NEWL:
				5742	p = "NEWL";
				5743	break;
				5744	default:
				5745	sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
				5746	p = NULL;
				5747	break;
				5748	}
				5749	if (p != NULL)
				5750	(void) strcat(buf, p);
				5751	return buf;
				5752	}
				5753	#endif
				5754
				5755	#ifdef FEAT_MBYTE
				5756	static void mb_decompose __ARGS((int c, int c1, int c2, int *c3));
				5757
				5758	typedef struct
				5759	{
				5760	int a, b, c;
				5761	} decomp_T;
				5762
				5763
				5764	/* 0xfb20 - 0xfb4f */
				5765	decomp_T decomp_table[0xfb4f-0xfb20+1] =
				5766	{
				5767	{0x5e2,0,0}, /* 0xfb20 alt ayin */
				5768	{0x5d0,0,0}, /* 0xfb21 alt alef */
				5769	{0x5d3,0,0}, /* 0xfb22 alt dalet */
				5770	{0x5d4,0,0}, /* 0xfb23 alt he */
				5771	{0x5db,0,0}, /* 0xfb24 alt kaf */
				5772	{0x5dc,0,0}, /* 0xfb25 alt lamed */
				5773	{0x5dd,0,0}, /* 0xfb26 alt mem-sofit */
				5774	{0x5e8,0,0}, /* 0xfb27 alt resh */
				5775	{0x5ea,0,0}, /* 0xfb28 alt tav */
				5776	{'+', 0, 0}, /* 0xfb29 alt plus */
				5777	{0x5e9, 0x5c1, 0}, /* 0xfb2a shin+shin-dot */
				5778	{0x5e9, 0x5c2, 0}, /* 0xfb2b shin+sin-dot */
				5779	{0x5e9, 0x5c1, 0x5bc}, /* 0xfb2c shin+shin-dot+dagesh */
				5780	{0x5e9, 0x5c2, 0x5bc}, /* 0xfb2d shin+sin-dot+dagesh */
				5781	{0x5d0, 0x5b7, 0}, /* 0xfb2e alef+patah */
				5782	{0x5d0, 0x5b8, 0}, /* 0xfb2f alef+qamats */
				5783	{0x5d0, 0x5b4, 0}, /* 0xfb30 alef+hiriq */
				5784	{0x5d1, 0x5bc, 0}, /* 0xfb31 bet+dagesh */
				5785	{0x5d2, 0x5bc, 0}, /* 0xfb32 gimel+dagesh */
				5786	{0x5d3, 0x5bc, 0}, /* 0xfb33 dalet+dagesh */
				5787	{0x5d4, 0x5bc, 0}, /* 0xfb34 he+dagesh */
				5788	{0x5d5, 0x5bc, 0}, /* 0xfb35 vav+dagesh */
				5789	{0x5d6, 0x5bc, 0}, /* 0xfb36 zayin+dagesh */
				5790	{0xfb37, 0, 0}, /* 0xfb37 -- UNUSED */
				5791	{0x5d8, 0x5bc, 0}, /* 0xfb38 tet+dagesh */
				5792	{0x5d9, 0x5bc, 0}, /* 0xfb39 yud+dagesh */
				5793	{0x5da, 0x5bc, 0}, /* 0xfb3a kaf sofit+dagesh */
				5794	{0x5db, 0x5bc, 0}, /* 0xfb3b kaf+dagesh */
				5795	{0x5dc, 0x5bc, 0}, /* 0xfb3c lamed+dagesh */
				5796	{0xfb3d, 0, 0}, /* 0xfb3d -- UNUSED */
				5797	{0x5de, 0x5bc, 0}, /* 0xfb3e mem+dagesh */
				5798	{0xfb3f, 0, 0}, /* 0xfb3f -- UNUSED */
				5799	{0x5e0, 0x5bc, 0}, /* 0xfb40 nun+dagesh */
				5800	{0x5e1, 0x5bc, 0}, /* 0xfb41 samech+dagesh */
				5801	{0xfb42, 0, 0}, /* 0xfb42 -- UNUSED */
				5802	{0x5e3, 0x5bc, 0}, /* 0xfb43 pe sofit+dagesh */
				5803	{0x5e4, 0x5bc,0}, /* 0xfb44 pe+dagesh */
				5804	{0xfb45, 0, 0}, /* 0xfb45 -- UNUSED */
				5805	{0x5e6, 0x5bc, 0}, /* 0xfb46 tsadi+dagesh */
				5806	{0x5e7, 0x5bc, 0}, /* 0xfb47 qof+dagesh */
				5807	{0x5e8, 0x5bc, 0}, /* 0xfb48 resh+dagesh */
				5808	{0x5e9, 0x5bc, 0}, /* 0xfb49 shin+dagesh */
				5809	{0x5ea, 0x5bc, 0}, /* 0xfb4a tav+dagesh */
				5810	{0x5d5, 0x5b9, 0}, /* 0xfb4b vav+holam */
				5811	{0x5d1, 0x5bf, 0}, /* 0xfb4c bet+rafe */
				5812	{0x5db, 0x5bf, 0}, /* 0xfb4d kaf+rafe */
				5813	{0x5e4, 0x5bf, 0}, /* 0xfb4e pe+rafe */
				5814	{0x5d0, 0x5dc, 0} /* 0xfb4f alef-lamed */
				5815	};
				5816
				5817	static void
				5818	mb_decompose(c, c1, c2, c3)
				5819	int c, c1, c2, *c3;
				5820	{
				5821	decomp_T d;
				5822
				5823	if (c >= 0x4b20 && c <= 0xfb4f)
				5824	{
				5825	d = decomp_table[c - 0xfb20];
				5826	*c1 = d.a;
				5827	*c2 = d.b;
				5828	*c3 = d.c;
				5829	}
				5830	else
				5831	{
				5832	*c1 = c;
				5833	c2 = c3 = 0;
				5834	}
				5835	}
				5836	#endif
				5837
				5838	/*
				5839	* Compare two strings, ignore case if ireg_ic set.
				5840	* Return 0 if strings match, non-zero otherwise.
				5841	* Correct the length "*n" when composing characters are ignored.
				5842	*/
				5843	static int
				5844	cstrncmp(s1, s2, n)
				5845	char_u s1, s2;
				5846	int *n;
				5847	{
				5848	int result;
				5849
				5850	if (!ireg_ic)
				5851	result = STRNCMP(s1, s2, *n);
				5852	else
				5853	result = MB_STRNICMP(s1, s2, *n);
				5854
				5855	#ifdef FEAT_MBYTE
				5856	/* if it failed and it's utf8 and we want to combineignore: */
				5857	if (result != 0 && enc_utf8 && ireg_icombine)
				5858	{
				5859	char_u str1, str2;
				5860	int c1, c2, c11, c12;
				5861	int ix;
				5862	int junk;
				5863
				5864	/* we have to handle the strcmp ourselves, since it is necessary to
				5865	* deal with the composing characters by ignoring them: */
				5866	str1 = s1;
				5867	str2 = s2;
				5868	c1 = c2 = 0;
				5869	for (ix = 0; ix < *n; )
				5870	{
				5871	c1 = mb_ptr2char_adv(&str1);
				5872	c2 = mb_ptr2char_adv(&str2);
				5873	ix += utf_char2len(c1);
				5874
				5875	/* decompose the character if necessary, into 'base' characters
				5876	* because I don't care about Arabic, I will hard-code the Hebrew
				5877	* which I do care about! So sue me... */
				5878	if (c1 != c2 && (!ireg_ic \|\| utf_fold(c1) != utf_fold(c2)))
				5879	{
				5880	/* decomposition necessary? */
				5881	mb_decompose(c1, &c11, &junk, &junk);
				5882	mb_decompose(c2, &c12, &junk, &junk);
				5883	c1 = c11;
				5884	c2 = c12;
				5885	if (c11 != c12 && (!ireg_ic \|\| utf_fold(c11) != utf_fold(c12)))
				5886	break;
				5887	}
				5888	}
				5889	result = c2 - c1;
				5890	if (result == 0)
				5891	*n = (int)(str2 - s2);
				5892	}
				5893	#endif
				5894
				5895	return result;
				5896	}
				5897
				5898	/*
				5899	* cstrchr: This function is used a lot for simple searches, keep it fast!
				5900	*/
				5901	static char_u *
				5902	cstrchr(s, c)
				5903	char_u *s;
				5904	int c;
				5905	{
				5906	char_u *p;
				5907	int cc;
				5908
				5909	if (!ireg_ic
				5910	#ifdef FEAT_MBYTE
				5911	\|\| (!enc_utf8 && mb_char2len(c) > 1)
				5912	#endif
				5913	)
				5914	return vim_strchr(s, c);
				5915
				5916	/* tolower() and toupper() can be slow, comparing twice should be a lot
				5917	* faster (esp. when using MS Visual C++!).
				5918	* For UTF-8 need to use folded case. */
				5919	#ifdef FEAT_MBYTE
				5920	if (enc_utf8 && c > 0x80)
				5921	cc = utf_fold(c);
				5922	else
				5923	#endif
				5924	if (isupper(c))
				5925	cc = TOLOWER_LOC(c);
				5926	else if (islower(c))
				5927	cc = TOUPPER_LOC(c);
				5928	else
				5929	return vim_strchr(s, c);
				5930
				5931	#ifdef FEAT_MBYTE
				5932	if (has_mbyte)
				5933	{
				5934	for (p = s; p != NUL; p += (mb_ptr2len_check)(p))
				5935	{
				5936	if (enc_utf8 && c > 0x80)
				5937	{
				5938	if (utf_fold(utf_ptr2char(p)) == cc)
				5939	return p;
				5940	}
				5941	else if (p == c \|\| p == cc)
				5942	return p;
				5943	}
				5944	}
				5945	else
				5946	#endif
				5947	/* Faster version for when there are no multi-byte characters. */
				5948	for (p = s; *p != NUL; ++p)
				5949	if (p == c \|\| p == cc)
				5950	return p;
				5951
				5952	return NULL;
				5953	}
				5954
				5955	/***************************************************************
				5956	* regsub stuff *
				5957	***************************************************************/
				5958
				5959	/* This stuff below really confuses cc on an SGI -- webb */
				5960	#ifdef __sgi
				5961	# undef __ARGS
				5962	# define __ARGS(x) ()
				5963	#endif
				5964
				5965	/*
				5966	* We should define ftpr as a pointer to a function returning a pointer to
				5967	* a function returning a pointer to a function ...
				5968	* This is impossible, so we declare a pointer to a function returning a
				5969	* pointer to a function returning void. This should work for all compilers.
				5970	*/
				5971	typedef void ((fptr) __ARGS((char_u *, int)))();
				5972
				5973	static fptr do_upper __ARGS((char_u *, int));
				5974	static fptr do_Upper __ARGS((char_u *, int));
				5975	static fptr do_lower __ARGS((char_u *, int));
				5976	static fptr do_Lower __ARGS((char_u *, int));
				5977
				5978	static int vim_regsub_both __ARGS((char_u source, char_u dest, int copy, int magic, int backslash));
				5979
				5980	static fptr
				5981	do_upper(d, c)
				5982	char_u *d;
				5983	int c;
				5984	{
				5985	*d = TOUPPER_LOC(c);
				5986
				5987	return (fptr)NULL;
				5988	}
				5989
				5990	static fptr
				5991	do_Upper(d, c)
				5992	char_u *d;
				5993	int c;
				5994	{
				5995	*d = TOUPPER_LOC(c);
				5996
				5997	return (fptr)do_Upper;
				5998	}
				5999
				6000	static fptr
				6001	do_lower(d, c)
				6002	char_u *d;
				6003	int c;
				6004	{
				6005	*d = TOLOWER_LOC(c);
				6006
				6007	return (fptr)NULL;
				6008	}
				6009
				6010	static fptr
				6011	do_Lower(d, c)
				6012	char_u *d;
				6013	int c;
				6014	{
				6015	*d = TOLOWER_LOC(c);
				6016
				6017	return (fptr)do_Lower;
				6018	}
				6019
				6020	/*
				6021	* regtilde(): Replace tildes in the pattern by the old pattern.
				6022	*
				6023	* Short explanation of the tilde: It stands for the previous replacement
				6024	* pattern. If that previous pattern also contains a ~ we should go back a
				6025	* step further... But we insert the previous pattern into the current one
				6026	* and remember that.
				6027	* This still does not handle the case where "magic" changes. TODO?
				6028	*
				6029	* The tildes are parsed once before the first call to vim_regsub().
				6030	*/
				6031	char_u *
				6032	regtilde(source, magic)
				6033	char_u *source;
				6034	int magic;
				6035	{
				6036	char_u *newsub = source;
				6037	char_u *tmpsub;
				6038	char_u *p;
				6039	int len;
				6040	int prevlen;
				6041
				6042	for (p = newsub; *p; ++p)
				6043	{
				6044	if ((p == '~' && magic) \|\| (p == '\\' && *(p + 1) == '~' && !magic))
				6045	{
				6046	if (reg_prev_sub != NULL)
				6047	{
				6048	/* length = len(newsub) - 1 + len(prev_sub) + 1 */
				6049	prevlen = (int)STRLEN(reg_prev_sub);
				6050	tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen));
				6051	if (tmpsub != NULL)
				6052	{
				6053	/* copy prefix */
				6054	len = (int)(p - newsub); /* not including ~ */
				6055	mch_memmove(tmpsub, newsub, (size_t)len);
				6056	/* interpretate tilde */
				6057	mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
				6058	/* copy postfix */
				6059	if (!magic)
				6060	++p; /* back off \ */
				6061	STRCPY(tmpsub + len + prevlen, p + 1);
				6062
				6063	if (newsub != source) /* already allocated newsub */
				6064	vim_free(newsub);
				6065	newsub = tmpsub;
				6066	p = newsub + len + prevlen;
				6067	}
				6068	}
				6069	else if (magic)
				6070	STRCPY(p, p + 1); /* remove '~' */
				6071	else
				6072	STRCPY(p, p + 2); /* remove '\~' */
				6073	--p;
				6074	}
				6075	else
				6076	{
				6077	if (p == '\\' && p[1]) / skip escaped characters */
				6078	++p;
				6079	#ifdef FEAT_MBYTE
				6080	if (has_mbyte)
				6081	p += (*mb_ptr2len_check)(p) - 1;
				6082	#endif
				6083	}
				6084	}
				6085
				6086	vim_free(reg_prev_sub);
				6087	if (newsub != source) /* newsub was allocated, just keep it */
				6088	reg_prev_sub = newsub;
				6089	else /* no ~ found, need to save newsub */
				6090	reg_prev_sub = vim_strsave(newsub);
				6091	return newsub;
				6092	}
				6093
				6094	#ifdef FEAT_EVAL
				6095	static int can_f_submatch = FALSE; /* TRUE when submatch() can be used */
				6096
				6097	/* These pointers are used instead of reg_match and reg_mmatch for
				6098	* reg_submatch(). Needed for when the substitution string is an expression
				6099	* that contains a call to substitute() and submatch(). */
				6100	static regmatch_T *submatch_match;
				6101	static regmmatch_T *submatch_mmatch;
				6102	#endif
				6103
				6104	#if defined(FEAT_MODIFY_FNAME) \|\| defined(FEAT_EVAL) \|\| defined(PROTO)
				6105	/*
				6106	* vim_regsub() - perform substitutions after a vim_regexec() or
				6107	* vim_regexec_multi() match.
				6108	*
				6109	* If "copy" is TRUE really copy into "dest".
				6110	* If "copy" is FALSE nothing is copied, this is just to find out the length
				6111	* of the result.
				6112	*
				6113	* If "backslash" is TRUE, a backslash will be removed later, need to double
				6114	* them to keep them, and insert a backslash before a CR to avoid it being
				6115	* replaced with a line break later.
				6116	*
				6117	* Note: The matched text must not change between the call of
				6118	* vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
				6119	* references invalid!
				6120	*
				6121	* Returns the size of the replacement, including terminating NUL.
				6122	*/
				6123	int
				6124	vim_regsub(rmp, source, dest, copy, magic, backslash)
				6125	regmatch_T *rmp;
				6126	char_u *source;
				6127	char_u *dest;
				6128	int copy;
				6129	int magic;
				6130	int backslash;
				6131	{
				6132	reg_match = rmp;
				6133	reg_mmatch = NULL;
				6134	reg_maxline = 0;
				6135	return vim_regsub_both(source, dest, copy, magic, backslash);
				6136	}
				6137	#endif
				6138
				6139	int
				6140	vim_regsub_multi(rmp, lnum, source, dest, copy, magic, backslash)
				6141	regmmatch_T *rmp;
				6142	linenr_T lnum;
				6143	char_u *source;
				6144	char_u *dest;
				6145	int copy;
				6146	int magic;
				6147	int backslash;
				6148	{
				6149	reg_match = NULL;
				6150	reg_mmatch = rmp;
				6151	reg_buf = curbuf; /* always works on the current buffer! */
				6152	reg_firstlnum = lnum;
				6153	reg_maxline = curbuf->b_ml.ml_line_count - lnum;
				6154	return vim_regsub_both(source, dest, copy, magic, backslash);
				6155	}
				6156
				6157	static int
				6158	vim_regsub_both(source, dest, copy, magic, backslash)
				6159	char_u *source;
				6160	char_u *dest;
				6161	int copy;
				6162	int magic;
				6163	int backslash;
				6164	{
				6165	char_u *src;
				6166	char_u *dst;
				6167	char_u *s;
				6168	int c;
				6169	int no = -1;
				6170	fptr func = (fptr)NULL;
				6171	linenr_T clnum = 0; /* init for GCC */
				6172	int len = 0; /* init for GCC */
				6173	#ifdef FEAT_EVAL
				6174	static char_u *eval_result = NULL;
				6175	#endif
				6176	#ifdef FEAT_MBYTE
				6177	int l;
				6178	#endif
				6179
				6180
				6181	/* Be paranoid... */
				6182	if (source == NULL \|\| dest == NULL)
				6183	{
				6184	EMSG(_(e_null));
				6185	return 0;
				6186	}
				6187	if (prog_magic_wrong())
				6188	return 0;
				6189	src = source;
				6190	dst = dest;
				6191
				6192	/*
				6193	* When the substitute part starts with "\=" evaluate it as an expression.
				6194	*/
				6195	if (source[0] == '\\' && source[1] == '='
				6196	#ifdef FEAT_EVAL
				6197	&& !can_f_submatch /* can't do this recursively */
				6198	#endif
				6199	)
				6200	{
				6201	#ifdef FEAT_EVAL
				6202	/* To make sure that the length doesn't change between checking the
				6203	* length and copying the string, and to speed up things, the
				6204	* resulting string is saved from the call with "copy" == FALSE to the
				6205	* call with "copy" == TRUE. */
				6206	if (copy)
				6207	{
				6208	if (eval_result != NULL)
				6209	{
				6210	STRCPY(dest, eval_result);
				6211	dst += STRLEN(eval_result);
				6212	vim_free(eval_result);
				6213	eval_result = NULL;
				6214	}
				6215	}
				6216	else
				6217	{
				6218	linenr_T save_reg_maxline;
				6219	win_T *save_reg_win;
				6220	int save_ireg_ic;
				6221
				6222	vim_free(eval_result);
				6223
				6224	/* The expression may contain substitute(), which calls us
				6225	* recursively. Make sure submatch() gets the text from the first
				6226	* level. Don't need to save "reg_buf", because
				6227	* vim_regexec_multi() can't be called recursively. */
				6228	submatch_match = reg_match;
				6229	submatch_mmatch = reg_mmatch;
				6230	save_reg_maxline = reg_maxline;
				6231	save_reg_win = reg_win;
				6232	save_ireg_ic = ireg_ic;
				6233	can_f_submatch = TRUE;
				6234
				6235	eval_result = eval_to_string(source + 2, NULL);
				6236	if (eval_result != NULL)
				6237	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	6238	for (s = eval_result; *s != NUL; mb_ptr_adv(s))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	6239	{
				6240	/* Change NL to CR, so that it becomes a line break.
				6241	* Skip over a backslashed character. */
				6242	if (*s == NL)
				6243	*s = CAR;
				6244	else if (*s == '\\' && s[1] != NUL)
				6245	++s;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	6246	}
				6247
				6248	dst += STRLEN(eval_result);
				6249	}
				6250
				6251	reg_match = submatch_match;
				6252	reg_mmatch = submatch_mmatch;
				6253	reg_maxline = save_reg_maxline;
				6254	reg_win = save_reg_win;
				6255	ireg_ic = save_ireg_ic;
				6256	can_f_submatch = FALSE;
				6257	}
				6258	#endif
				6259	}
				6260	else
				6261	while ((c = *src++) != NUL)
				6262	{
				6263	if (c == '&' && magic)
				6264	no = 0;
				6265	else if (c == '\\' && *src != NUL)
				6266	{
				6267	if (*src == '&' && !magic)
				6268	{
				6269	++src;
				6270	no = 0;
				6271	}
				6272	else if ('0' <= src && src <= '9')
				6273	{
				6274	no = *src++ - '0';
				6275	}
				6276	else if (vim_strchr((char_u )"uUlLeE", src))
				6277	{
				6278	switch (*src++)
				6279	{
				6280	case 'u': func = (fptr)do_upper;
				6281	continue;
				6282	case 'U': func = (fptr)do_Upper;
				6283	continue;
				6284	case 'l': func = (fptr)do_lower;
				6285	continue;
				6286	case 'L': func = (fptr)do_Lower;
				6287	continue;
				6288	case 'e':
				6289	case 'E': func = (fptr)NULL;
				6290	continue;
				6291	}
				6292	}
				6293	}
				6294	if (no < 0) /* Ordinary character. */
				6295	{
				6296	if (c == '\\' && *src != NUL)
				6297	{
				6298	/* Check for abbreviations -- webb */
				6299	switch (*src)
				6300	{
				6301	case 'r': c = CAR; ++src; break;
				6302	case 'n': c = NL; ++src; break;
				6303	case 't': c = TAB; ++src; break;
				6304	/* Oh no! \e already has meaning in subst pat :-( */
				6305	/* case 'e': c = ESC; ++src; break; */
				6306	case 'b': c = Ctrl_H; ++src; break;
				6307
				6308	/* If "backslash" is TRUE the backslash will be removed
				6309	* later. Used to insert a literal CR. */
				6310	default: if (backslash)
				6311	{
				6312	if (copy)
				6313	*dst = '\\';
				6314	++dst;
				6315	}
				6316	c = *src++;
				6317	}
				6318	}
				6319
				6320	/* Write to buffer, if copy is set. */
				6321	#ifdef FEAT_MBYTE
				6322	if (has_mbyte && (l = (*mb_ptr2len_check)(src - 1)) > 1)
				6323	{
				6324	/* TODO: should use "func" here. */
				6325	if (copy)
				6326	mch_memmove(dst, src - 1, l);
				6327	dst += l - 1;
				6328	src += l - 1;
				6329	}
				6330	else
				6331	{
				6332	#endif
				6333	if (copy)
				6334	{
				6335	if (func == (fptr)NULL) /* just copy */
				6336	*dst = c;
				6337	else /* change case */
				6338	func = (fptr)(func(dst, c));
				6339	/* Turbo C complains without the typecast */
				6340	}
				6341	#ifdef FEAT_MBYTE
				6342	}
				6343	#endif
				6344	dst++;
				6345	}
				6346	else
				6347	{
				6348	if (REG_MULTI)
				6349	{
				6350	clnum = reg_mmatch->startpos[no].lnum;
				6351	if (clnum < 0 \|\| reg_mmatch->endpos[no].lnum < 0)
				6352	s = NULL;
				6353	else
				6354	{
				6355	s = reg_getline(clnum) + reg_mmatch->startpos[no].col;
				6356	if (reg_mmatch->endpos[no].lnum == clnum)
				6357	len = reg_mmatch->endpos[no].col
				6358	- reg_mmatch->startpos[no].col;
				6359	else
				6360	len = (int)STRLEN(s);
				6361	}
				6362	}
				6363	else
				6364	{
				6365	s = reg_match->startp[no];
				6366	if (reg_match->endp[no] == NULL)
				6367	s = NULL;
				6368	else
				6369	len = (int)(reg_match->endp[no] - s);
				6370	}
				6371	if (s != NULL)
				6372	{
				6373	for (;;)
				6374	{
				6375	if (len == 0)
				6376	{
				6377	if (REG_MULTI)
				6378	{
				6379	if (reg_mmatch->endpos[no].lnum == clnum)
				6380	break;
				6381	if (copy)
				6382	*dst = CAR;
				6383	++dst;
				6384	s = reg_getline(++clnum);
				6385	if (reg_mmatch->endpos[no].lnum == clnum)
				6386	len = reg_mmatch->endpos[no].col;
				6387	else
				6388	len = (int)STRLEN(s);
				6389	}
				6390	else
				6391	break;
				6392	}
				6393	else if (s == NUL) / we hit NUL. */
				6394	{
				6395	if (copy)
				6396	EMSG(_(e_re_damg));
				6397	goto exit;
				6398	}
				6399	else
				6400	{
				6401	if (backslash && (s == CAR \|\| s == '\\'))
				6402	{
				6403	/*
				6404	* Insert a backslash in front of a CR, otherwise
				6405	* it will be replaced by a line break.
				6406	* Number of backslashes will be halved later,
				6407	* double them here.
				6408	*/
				6409	if (copy)
				6410	{
				6411	dst[0] = '\\';
				6412	dst[1] = *s;
				6413	}
				6414	dst += 2;
				6415	}
				6416	#ifdef FEAT_MBYTE
				6417	else if (has_mbyte && (l = (*mb_ptr2len_check)(s)) > 1)
				6418	{
				6419	/* TODO: should use "func" here. */
				6420	if (copy)
				6421	mch_memmove(dst, s, l);
				6422	dst += l;
				6423	s += l - 1;
				6424	len -= l - 1;
				6425	}
				6426	#endif
				6427	else
				6428	{
				6429	if (copy)
				6430	{
				6431	if (func == (fptr)NULL) /* just copy */
				6432	dst = s;
				6433	else /* change case */
				6434	func = (fptr)(func(dst, *s));
				6435	/* Turbo C complains without the typecast */
				6436	}
				6437	++dst;
				6438	}
				6439	++s;
				6440	--len;
				6441	}
				6442	}
				6443	}
				6444	no = -1;
				6445	}
				6446	}
				6447	if (copy)
				6448	*dst = NUL;
				6449
				6450	exit:
				6451	return (int)((dst - dest) + 1);
				6452	}
				6453
				6454	#ifdef FEAT_EVAL
				6455	/*
				6456	* Used for the submatch() function: get the string from tne n'th submatch in
				6457	* allocated memory.
				6458	* Returns NULL when not in a ":s" command and for a non-existing submatch.
				6459	*/
				6460	char_u *
				6461	reg_submatch(no)
				6462	int no;
				6463	{
				6464	char_u *retval = NULL;
				6465	char_u *s;
				6466	int len;
				6467	int round;
				6468	linenr_T lnum;
				6469
				6470	if (!can_f_submatch)
				6471	return NULL;
				6472
				6473	if (submatch_match == NULL)
				6474	{
				6475	/*
				6476	* First round: compute the length and allocate memory.
				6477	* Second round: copy the text.
				6478	*/
				6479	for (round = 1; round <= 2; ++round)
				6480	{
				6481	lnum = submatch_mmatch->startpos[no].lnum;
				6482	if (lnum < 0 \|\| submatch_mmatch->endpos[no].lnum < 0)
				6483	return NULL;
				6484
				6485	s = reg_getline(lnum) + submatch_mmatch->startpos[no].col;
				6486	if (s == NULL) /* anti-crash check, cannot happen? */
				6487	break;
				6488	if (submatch_mmatch->endpos[no].lnum == lnum)
				6489	{
				6490	/* Within one line: take form start to end col. */
				6491	len = submatch_mmatch->endpos[no].col
				6492	- submatch_mmatch->startpos[no].col;
				6493	if (round == 2)
				6494	{
				6495	STRNCPY(retval, s, len);
				6496	retval[len] = NUL;
				6497	}
				6498	++len;
				6499	}
				6500	else
				6501	{
				6502	/* Multiple lines: take start line from start col, middle
				6503	* lines completely and end line up to end col. */
				6504	len = (int)STRLEN(s);
				6505	if (round == 2)
				6506	{
				6507	STRCPY(retval, s);
				6508	retval[len] = '\n';
				6509	}
				6510	++len;
				6511	++lnum;
				6512	while (lnum < submatch_mmatch->endpos[no].lnum)
				6513	{
				6514	s = reg_getline(lnum++);
				6515	if (round == 2)
				6516	STRCPY(retval + len, s);
				6517	len += (int)STRLEN(s);
				6518	if (round == 2)
				6519	retval[len] = '\n';
				6520	++len;
				6521	}
				6522	if (round == 2)
				6523	STRNCPY(retval + len, reg_getline(lnum),
				6524	submatch_mmatch->endpos[no].col);
				6525	len += submatch_mmatch->endpos[no].col;
				6526	if (round == 2)
				6527	retval[len] = NUL;
				6528	++len;
				6529	}
				6530
				6531	if (round == 1)
				6532	{
				6533	retval = lalloc((long_u)len, TRUE);
				6534	if (s == NULL)
				6535	return NULL;
				6536	}
				6537	}
				6538	}
				6539	else
				6540	{
				6541	if (submatch_match->endp[no] == NULL)
				6542	retval = NULL;
				6543	else
				6544	{
				6545	s = submatch_match->startp[no];
				6546	retval = vim_strnsave(s, (int)(submatch_match->endp[no] - s));
				6547	}
				6548	}
				6549
				6550	return retval;
				6551	}
				6552	#endif