Blame - src/regexp.c - android_external_vim

blob: 2e828541abb71d5dff9be3efd8866958518a2bf8 [file] [log] [blame]

Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1	/* vi:set ts=8 sts=4 sw=4:
				2	*
				3	* Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
				4	*
				5	* NOTICE:
				6	*
				7	* This is NOT the original regular expression code as written by Henry
				8	* Spencer. This code has been modified specifically for use with the VIM
				9	* editor, and should not be used separately from Vim. If you want a good
				10	* regular expression library, get the original code. The copyright notice
				11	* that follows is from the original.
				12	*
				13	* END NOTICE
				14	*
				15	* Copyright (c) 1986 by University of Toronto.
				16	* Written by Henry Spencer. Not derived from licensed software.
				17	*
				18	* Permission is granted to anyone to use this software for any
				19	* purpose on any computer system, and to redistribute it freely,
				20	* subject to the following restrictions:
				21	*
				22	* 1. The author is not responsible for the consequences of use of
				23	* this software, no matter how awful, even if they arise
				24	* from defects in it.
				25	*
				26	* 2. The origin of this software must not be misrepresented, either
				27	* by explicit claim or by omission.
				28	*
				29	* 3. Altered versions must be plainly marked as such, and must not
				30	* be misrepresented as being the original software.
				31	*
				32	* Beware that some of this code is subtly aware of the way operator
				33	* precedence is structured in regular expressions. Serious changes in
				34	* regular-expression syntax might require a total rethink.
				35	*
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	36	* Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
				37	* Webb, Ciaran McCreesh and Bram Moolenaar.
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	38	* Named character class support added by Walter Briscoe (1998 Jul 01)
				39	*/
				40
				41	#include "vim.h"
				42
				43	#undef DEBUG
				44
				45	/*
				46	* The "internal use only" fields in regexp.h are present to pass info from
				47	* compile to execute that permits the execute phase to run lots faster on
				48	* simple cases. They are:
				49	*
				50	* regstart char that must begin a match; NUL if none obvious; Can be a
				51	* multi-byte character.
				52	* reganch is the match anchored (at beginning-of-line only)?
				53	* regmust string (pointer into program) that match must include, or NULL
				54	* regmlen length of regmust string
				55	* regflags RF_ values or'ed together
				56	*
				57	* Regstart and reganch permit very fast decisions on suitable starting points
				58	* for a match, cutting down the work a lot. Regmust permits fast rejection
				59	* of lines that cannot possibly match. The regmust tests are costly enough
				60	* that vim_regcomp() supplies a regmust only if the r.e. contains something
				61	* potentially expensive (at present, the only such thing detected is * or +
				62	* at the start of the r.e., which can involve a lot of backup). Regmlen is
				63	* supplied because the test in vim_regexec() needs it and vim_regcomp() is
				64	* computing it anyway.
				65	*/
				66
				67	/*
				68	* Structure for regexp "program". This is essentially a linear encoding
				69	* of a nondeterministic finite-state machine (aka syntax charts or
				70	* "railroad normal form" in parsing technology). Each node is an opcode
				71	* plus a "next" pointer, possibly plus an operand. "Next" pointers of
				72	* all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
				73	* pointer with a BRANCH on both ends of it is connecting two alternatives.
				74	* (Here we have one of the subtle syntax dependencies: an individual BRANCH
				75	* (as opposed to a collection of them) is never concatenated with anything
				76	* because of operator precedence). The "next" pointer of a BRACES_COMPLEX
				77	* node points to the node after the stuff to be repeated. The operand of some
				78	* types of node is a literal string; for others, it is a node leading into a
				79	* sub-FSM. In particular, the operand of a BRANCH node is the first node of
				80	* the branch. (NB this is not a tree structure: the tail of the branch
				81	* connects to the thing following the set of BRANCHes.)
				82	*
				83	* pattern is coded like:
				84	*
				85	* +-----------------+
				86	* \| V
				87	* <aa>\\|<bb> BRANCH <aa> BRANCH <bb> --> END
				88	* \| ^ \| ^
				89	* +------+ +----------+
				90	*
				91	*
				92	* +------------------+
				93	* V \|
				94	* <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
				95	* \| \| ^ ^
				96	* \| +---------------+ \|
				97	* +---------------------------------------------+
				98	*
				99	*
				100	* +-------------------------+
				101	* V \|
				102	* <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
				103	* \| \| ^
				104	* \| +----------------+
				105	* +-----------------------------------------------+
				106	*
				107	*
				108	* <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END
				109	* \| \| ^ ^
				110	* \| +----------------+ \|
				111	* +--------------------------------+
				112	*
				113	* +---------+
				114	* \| V
				115	* \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END
				116	* \| \| \| \| ^ ^
				117	* \| \| \| +-----+ \|
				118	* \| \| +----------------+ \|
				119	* \| +---------------------------+ \|
				120	* +------------------------------------------------------+
				121	*
				122	* They all start with a BRANCH for "\\|" alternaties, even when there is only
				123	* one alternative.
				124	*/
				125
				126	/*
				127	* The opcodes are:
				128	*/
				129
				130	/* definition number opnd? meaning */
				131	#define END 0 /* End of program or NOMATCH operand. */
				132	#define BOL 1 /* Match "" at beginning of line. */
				133	#define EOL 2 /* Match "" at end of line. */
				134	#define BRANCH 3 /* node Match this alternative, or the
				135	* next... */
				136	#define BACK 4 /* Match "", "next" ptr points backward. */
				137	#define EXACTLY 5 /* str Match this string. */
				138	#define NOTHING 6 /* Match empty string. */
				139	#define STAR 7 /* node Match this (simple) thing 0 or more
				140	* times. */
				141	#define PLUS 8 /* node Match this (simple) thing 1 or more
				142	* times. */
				143	#define MATCH 9 /* node match the operand zero-width */
				144	#define NOMATCH 10 /* node check for no match with operand */
				145	#define BEHIND 11 /* node look behind for a match with operand */
				146	#define NOBEHIND 12 /* node look behind for no match with operand */
				147	#define SUBPAT 13 /* node match the operand here */
				148	#define BRACE_SIMPLE 14 /* node Match this (simple) thing between m and
				149	* n times (\{m,n\}). */
				150	#define BOW 15 /* Match "" after [^a-zA-Z0-9_] */
				151	#define EOW 16 /* Match "" at [^a-zA-Z0-9_] */
				152	#define BRACE_LIMITS 17 /* nr nr define the min & max for BRACE_SIMPLE
				153	* and BRACE_COMPLEX. */
				154	#define NEWL 18 /* Match line-break */
				155	#define BHPOS 19 /* End position for BEHIND or NOBEHIND */
				156
				157
				158	/* character classes: 20-48 normal, 50-78 include a line-break */
				159	#define ADD_NL 30
				160	#define FIRST_NL ANY + ADD_NL
				161	#define ANY 20 /* Match any one character. */
				162	#define ANYOF 21 /* str Match any character in this string. */
				163	#define ANYBUT 22 /* str Match any character not in this
				164	* string. */
				165	#define IDENT 23 /* Match identifier char */
				166	#define SIDENT 24 /* Match identifier char but no digit */
				167	#define KWORD 25 /* Match keyword char */
				168	#define SKWORD 26 /* Match word char but no digit */
				169	#define FNAME 27 /* Match file name char */
				170	#define SFNAME 28 /* Match file name char but no digit */
				171	#define PRINT 29 /* Match printable char */
				172	#define SPRINT 30 /* Match printable char but no digit */
				173	#define WHITE 31 /* Match whitespace char */
				174	#define NWHITE 32 /* Match non-whitespace char */
				175	#define DIGIT 33 /* Match digit char */
				176	#define NDIGIT 34 /* Match non-digit char */
				177	#define HEX 35 /* Match hex char */
				178	#define NHEX 36 /* Match non-hex char */
				179	#define OCTAL 37 /* Match octal char */
				180	#define NOCTAL 38 /* Match non-octal char */
				181	#define WORD 39 /* Match word char */
				182	#define NWORD 40 /* Match non-word char */
				183	#define HEAD 41 /* Match head char */
				184	#define NHEAD 42 /* Match non-head char */
				185	#define ALPHA 43 /* Match alpha char */
				186	#define NALPHA 44 /* Match non-alpha char */
				187	#define LOWER 45 /* Match lowercase char */
				188	#define NLOWER 46 /* Match non-lowercase char */
				189	#define UPPER 47 /* Match uppercase char */
				190	#define NUPPER 48 /* Match non-uppercase char */
				191	#define LAST_NL NUPPER + ADD_NL
				192	#define WITH_NL(op) ((op) >= FIRST_NL && (op) <= LAST_NL)
				193
				194	#define MOPEN 80 /* -89 Mark this point in input as start of
				195	* \( subexpr. MOPEN + 0 marks start of
				196	* match. */
				197	#define MCLOSE 90 /* -99 Analogous to MOPEN. MCLOSE + 0 marks
				198	* end of match. */
				199	#define BACKREF 100 /* -109 node Match same string again \1-\9 */
				200
				201	#ifdef FEAT_SYN_HL
				202	# define ZOPEN 110 /* -119 Mark this point in input as start of
				203	* \z( subexpr. */
				204	# define ZCLOSE 120 /* -129 Analogous to ZOPEN. */
				205	# define ZREF 130 /* -139 node Match external submatch \z1-\z9 */
				206	#endif
				207
				208	#define BRACE_COMPLEX 140 /* -149 node Match nodes between m & n times */
				209
				210	#define NOPEN 150 /* Mark this point in input as start of
				211	\%( subexpr. */
				212	#define NCLOSE 151 /* Analogous to NOPEN. */
				213
				214	#define MULTIBYTECODE 200 /* mbc Match one multi-byte character */
				215	#define RE_BOF 201 /* Match "" at beginning of file. */
				216	#define RE_EOF 202 /* Match "" at end of file. */
				217	#define CURSOR 203 /* Match location of cursor. */
				218
				219	#define RE_LNUM 204 /* nr cmp Match line number */
				220	#define RE_COL 205 /* nr cmp Match column number */
				221	#define RE_VCOL 206 /* nr cmp Match virtual column number */
				222
				223	/*
				224	* Magic characters have a special meaning, they don't match literally.
				225	* Magic characters are negative. This separates them from literal characters
				226	* (possibly multi-byte). Only ASCII characters can be Magic.
				227	*/
				228	#define Magic(x) ((int)(x) - 256)
				229	#define un_Magic(x) ((x) + 256)
				230	#define is_Magic(x) ((x) < 0)
				231
				232	static int no_Magic __ARGS((int x));
				233	static int toggle_Magic __ARGS((int x));
				234
				235	static int
				236	no_Magic(x)
				237	int x;
				238	{
				239	if (is_Magic(x))
				240	return un_Magic(x);
				241	return x;
				242	}
				243
				244	static int
				245	toggle_Magic(x)
				246	int x;
				247	{
				248	if (is_Magic(x))
				249	return un_Magic(x);
				250	return Magic(x);
				251	}
				252
				253	/*
				254	* The first byte of the regexp internal "program" is actually this magic
				255	* number; the start node begins in the second byte. It's used to catch the
				256	* most severe mutilation of the program by the caller.
				257	*/
				258
				259	#define REGMAGIC 0234
				260
				261	/*
				262	* Opcode notes:
				263	*
				264	* BRANCH The set of branches constituting a single choice are hooked
				265	* together with their "next" pointers, since precedence prevents
				266	* anything being concatenated to any individual branch. The
				267	* "next" pointer of the last BRANCH in a choice points to the
				268	* thing following the whole choice. This is also where the
				269	* final "next" pointer of each individual branch points; each
				270	* branch starts with the operand node of a BRANCH node.
				271	*
				272	* BACK Normal "next" pointers all implicitly point forward; BACK
				273	* exists to make loop structures possible.
				274	*
				275	* STAR,PLUS '=', and complex '*' and '+', are implemented as circular
				276	* BRANCH structures using BACK. Simple cases (one character
				277	* per match) are implemented with STAR and PLUS for speed
				278	* and to minimize recursive plunges.
				279	*
				280	* BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
				281	* node, and defines the min and max limits to be used for that
				282	* node.
				283	*
				284	* MOPEN,MCLOSE ...are numbered at compile time.
				285	* ZOPEN,ZCLOSE ...ditto
				286	*/
				287
				288	/*
				289	* A node is one char of opcode followed by two chars of "next" pointer.
				290	* "Next" pointers are stored as two 8-bit bytes, high order first. The
				291	* value is a positive offset from the opcode of the node containing it.
				292	* An operand, if any, simply follows the node. (Note that much of the
				293	* code generation knows about this implicit relationship.)
				294	*
				295	* Using two bytes for the "next" pointer is vast overkill for most things,
				296	* but allows patterns to get big without disasters.
				297	*/
				298	#define OP(p) ((int)*(p))
				299	#define NEXT(p) (((((p) + 1) & 0377) << 8) + (((p) + 2) & 0377))
				300	#define OPERAND(p) ((p) + 3)
				301	/* Obtain an operand that was stored as four bytes, MSB first. */
				302	#define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
				303	+ ((long)(p)[5] << 8) + (long)(p)[6])
				304	/* Obtain a second operand stored as four bytes. */
				305	#define OPERAND_MAX(p) OPERAND_MIN((p) + 4)
				306	/* Obtain a second single-byte operand stored after a four bytes operand. */
				307	#define OPERAND_CMP(p) (p)[7]
				308
				309	/*
				310	* Utility definitions.
				311	*/
				312	#define UCHARAT(p) ((int)(char_u )(p))
				313
				314	/* Used for an error (down from) vim_regcomp(): give the error message, set
				315	* rc_did_emsg and return NULL */
				316	#define EMSG_RET_NULL(m) { EMSG(m); rc_did_emsg = TRUE; return NULL; }
				317	#define EMSG_M_RET_NULL(m, c) { EMSG2(m, c ? "" : "\\"); rc_did_emsg = TRUE; return NULL; }
				318	#define EMSG_RET_FAIL(m) { EMSG(m); rc_did_emsg = TRUE; return FAIL; }
				319	#define EMSG_ONE_RET_NULL EMSG_M_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
				320
				321	#define MAX_LIMIT (32767L << 16L)
				322
				323	static int re_multi_type __ARGS((int));
				324	static int cstrncmp __ARGS((char_u s1, char_u s2, int *n));
				325	static char_u cstrchr __ARGS((char_u , int));
				326
				327	#ifdef DEBUG
				328	static void regdump __ARGS((char_u , regprog_T ));
				329	static char_u regprop __ARGS((char_u ));
				330	#endif
				331
				332	#define NOT_MULTI 0
				333	#define MULTI_ONE 1
				334	#define MULTI_MULT 2
				335	/*
				336	* Return NOT_MULTI if c is not a "multi" operator.
				337	* Return MULTI_ONE if c is a single "multi" operator.
				338	* Return MULTI_MULT if c is a multi "multi" operator.
				339	*/
				340	static int
				341	re_multi_type(c)
				342	int c;
				343	{
				344	if (c == Magic('@') \|\| c == Magic('=') \|\| c == Magic('?'))
				345	return MULTI_ONE;
				346	if (c == Magic('*') \|\| c == Magic('+') \|\| c == Magic('{'))
				347	return MULTI_MULT;
				348	return NOT_MULTI;
				349	}
				350
				351	/*
				352	* Flags to be passed up and down.
				353	*/
				354	#define HASWIDTH 0x1 /* Known never to match null string. */
				355	#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */
				356	#define SPSTART 0x4 /* Starts with * or +. */
				357	#define HASNL 0x8 /* Contains some \n. */
				358	#define HASLOOKBH 0x10 /* Contains "\@<=" or "\@<!". */
				359	#define WORST 0 /* Worst case. */
				360
				361	/*
				362	* When regcode is set to this value, code is not emitted and size is computed
				363	* instead.
				364	*/
				365	#define JUST_CALC_SIZE ((char_u *) -1)
				366
				367	static char_u *reg_prev_sub;
				368
				369	/*
				370	* REGEXP_INRANGE contains all characters which are always special in a []
				371	* range after '\'.
				372	* REGEXP_ABBR contains all characters which act as abbreviations after '\'.
				373	* These are:
				374	* \n - New line (NL).
				375	* \r - Carriage Return (CR).
				376	* \t - Tab (TAB).
				377	* \e - Escape (ESC).
				378	* \b - Backspace (Ctrl_H).
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	379	* \d - Character code in decimal, eg \d123
				380	* \o - Character code in octal, eg \o80
				381	* \x - Character code in hex, eg \x4a
				382	* \u - Multibyte character code, eg \u20ac
				383	* \U - Long multibyte character code, eg \U12345678
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	384	*/
				385	static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	386	static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	387
				388	static int backslash_trans __ARGS((int c));
				389	static int skip_class_name __ARGS((char_u **pp));
				390	static char_u skip_anyof __ARGS((char_u p));
				391	static void init_class_tab __ARGS((void));
				392
				393	/*
				394	* Translate '\x' to its control character, except "\n", which is Magic.
				395	*/
				396	static int
				397	backslash_trans(c)
				398	int c;
				399	{
				400	switch (c)
				401	{
				402	case 'r': return CAR;
				403	case 't': return TAB;
				404	case 'e': return ESC;
				405	case 'b': return BS;
				406	}
				407	return c;
				408	}
				409
				410	/*
				411	* Check for a character class name. "pp" points to the '['.
				412	* Returns one of the CLASS_ items. CLASS_NONE means that no item was
				413	* recognized. Otherwise "pp" is advanced to after the item.
				414	*/
				415	static int
				416	skip_class_name(pp)
				417	char_u **pp;
				418	{
				419	static const char *(class_names[]) =
				420	{
				421	"alnum:]",
				422	#define CLASS_ALNUM 0
				423	"alpha:]",
				424	#define CLASS_ALPHA 1
				425	"blank:]",
				426	#define CLASS_BLANK 2
				427	"cntrl:]",
				428	#define CLASS_CNTRL 3
				429	"digit:]",
				430	#define CLASS_DIGIT 4
				431	"graph:]",
				432	#define CLASS_GRAPH 5
				433	"lower:]",
				434	#define CLASS_LOWER 6
				435	"print:]",
				436	#define CLASS_PRINT 7
				437	"punct:]",
				438	#define CLASS_PUNCT 8
				439	"space:]",
				440	#define CLASS_SPACE 9
				441	"upper:]",
				442	#define CLASS_UPPER 10
				443	"xdigit:]",
				444	#define CLASS_XDIGIT 11
				445	"tab:]",
				446	#define CLASS_TAB 12
				447	"return:]",
				448	#define CLASS_RETURN 13
				449	"backspace:]",
				450	#define CLASS_BACKSPACE 14
				451	"escape:]",
				452	#define CLASS_ESCAPE 15
				453	};
				454	#define CLASS_NONE 99
				455	int i;
				456
				457	if ((*pp)[1] == ':')
				458	{
				459	for (i = 0; i < sizeof(class_names) / sizeof(*class_names); ++i)
				460	if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
				461	{
				462	*pp += STRLEN(class_names[i]) + 2;
				463	return i;
				464	}
				465	}
				466	return CLASS_NONE;
				467	}
				468
				469	/*
				470	* Skip over a "[]" range.
				471	* "p" must point to the character after the '['.
				472	* The returned pointer is on the matching ']', or the terminating NUL.
				473	*/
				474	static char_u *
				475	skip_anyof(p)
				476	char_u *p;
				477	{
				478	int cpo_lit; /* 'cpoptions' contains 'l' flag */
				479	#ifdef FEAT_MBYTE
				480	int l;
				481	#endif
				482
				483	cpo_lit = (!reg_syn && vim_strchr(p_cpo, CPO_LITERAL) != NULL);
				484
				485	if (p == '^') / Complement of range. */
				486	++p;
				487	if (p == ']' \|\| p == '-')
				488	++p;
				489	while (p != NUL && p != ']')
				490	{
				491	#ifdef FEAT_MBYTE
				492	if (has_mbyte && (l = (*mb_ptr2len_check)(p)) > 1)
				493	p += l;
				494	else
				495	#endif
				496	if (*p == '-')
				497	{
				498	++p;
				499	if (p != ']' && p != NUL)
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	500	mb_ptr_adv(p);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	501	}
				502	else if (*p == '\\'
				503	&& (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
				504	\|\| (!cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
				505	p += 2;
				506	else if (*p == '[')
				507	{
				508	if (skip_class_name(&p) == CLASS_NONE)
				509	++p; /* It was not a class name */
				510	}
				511	else
				512	++p;
				513	}
				514
				515	return p;
				516	}
				517
				518	/*
				519	* Specific version of character class functions.
				520	* Using a table to keep this fast.
				521	*/
				522	static short class_tab[256];
				523
				524	#define RI_DIGIT 0x01
				525	#define RI_HEX 0x02
				526	#define RI_OCTAL 0x04
				527	#define RI_WORD 0x08
				528	#define RI_HEAD 0x10
				529	#define RI_ALPHA 0x20
				530	#define RI_LOWER 0x40
				531	#define RI_UPPER 0x80
				532	#define RI_WHITE 0x100
				533
				534	static void
				535	init_class_tab()
				536	{
				537	int i;
				538	static int done = FALSE;
				539
				540	if (done)
				541	return;
				542
				543	for (i = 0; i < 256; ++i)
				544	{
				545	if (i >= '0' && i <= '7')
				546	class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
				547	else if (i >= '8' && i <= '9')
				548	class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
				549	else if (i >= 'a' && i <= 'f')
				550	class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
				551	#ifdef EBCDIC
				552	else if ((i >= 'g' && i <= 'i') \|\| (i >= 'j' && i <= 'r')
				553	\|\| (i >= 's' && i <= 'z'))
				554	#else
				555	else if (i >= 'g' && i <= 'z')
				556	#endif
				557	class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
				558	else if (i >= 'A' && i <= 'F')
				559	class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
				560	#ifdef EBCDIC
				561	else if ((i >= 'G' && i <= 'I') \|\| ( i >= 'J' && i <= 'R')
				562	\|\| (i >= 'S' && i <= 'Z'))
				563	#else
				564	else if (i >= 'G' && i <= 'Z')
				565	#endif
				566	class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
				567	else if (i == '_')
				568	class_tab[i] = RI_WORD + RI_HEAD;
				569	else
				570	class_tab[i] = 0;
				571	}
				572	class_tab[' '] \|= RI_WHITE;
				573	class_tab['\t'] \|= RI_WHITE;
				574	done = TRUE;
				575	}
				576
				577	#ifdef FEAT_MBYTE
				578	# define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT))
				579	# define ri_hex(c) (c < 0x100 && (class_tab[c] & RI_HEX))
				580	# define ri_octal(c) (c < 0x100 && (class_tab[c] & RI_OCTAL))
				581	# define ri_word(c) (c < 0x100 && (class_tab[c] & RI_WORD))
				582	# define ri_head(c) (c < 0x100 && (class_tab[c] & RI_HEAD))
				583	# define ri_alpha(c) (c < 0x100 && (class_tab[c] & RI_ALPHA))
				584	# define ri_lower(c) (c < 0x100 && (class_tab[c] & RI_LOWER))
				585	# define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER))
				586	# define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE))
				587	#else
				588	# define ri_digit(c) (class_tab[c] & RI_DIGIT)
				589	# define ri_hex(c) (class_tab[c] & RI_HEX)
				590	# define ri_octal(c) (class_tab[c] & RI_OCTAL)
				591	# define ri_word(c) (class_tab[c] & RI_WORD)
				592	# define ri_head(c) (class_tab[c] & RI_HEAD)
				593	# define ri_alpha(c) (class_tab[c] & RI_ALPHA)
				594	# define ri_lower(c) (class_tab[c] & RI_LOWER)
				595	# define ri_upper(c) (class_tab[c] & RI_UPPER)
				596	# define ri_white(c) (class_tab[c] & RI_WHITE)
				597	#endif
				598
				599	/* flags for regflags */
				600	#define RF_ICASE 1 /* ignore case */
				601	#define RF_NOICASE 2 /* don't ignore case */
				602	#define RF_HASNL 4 /* can match a NL */
				603	#define RF_ICOMBINE 8 /* ignore combining characters */
				604	#define RF_LOOKBH 16 /* uses "\@<=" or "\@<!" */
				605
				606	/*
				607	* Global work variables for vim_regcomp().
				608	*/
				609
				610	static char_u regparse; / Input-scan pointer. */
				611	static int prevchr_len; /* byte length of previous char */
				612	static int num_complex_braces; /* Complex \{...} count */
				613	static int regnpar; /* () count. */
				614	#ifdef FEAT_SYN_HL
				615	static int regnzpar; /* \z() count. */
				616	static int re_has_z; /* \z item detected */
				617	#endif
				618	static char_u regcode; / Code-emit pointer, or JUST_CALC_SIZE */
				619	static long regsize; /* Code size. */
				620	static char_u had_endbrace[NSUBEXP]; /* flags, TRUE if end of () found */
				621	static unsigned regflags; /* RF_ flags for prog */
				622	static long brace_min[10]; /* Minimums for complex brace repeats */
				623	static long brace_max[10]; /* Maximums for complex brace repeats */
				624	static int brace_count[10]; /* Current counts for complex brace repeats */
				625	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				626	static int had_eol; /* TRUE when EOL found by vim_regcomp() */
				627	#endif
				628	static int one_exactly = FALSE; /* only do one char for EXACTLY */
				629
				630	static int reg_magic; /* magicness of the pattern: */
				631	#define MAGIC_NONE 1 /* "\V" very unmagic */
				632	#define MAGIC_OFF 2 /* "\M" or 'magic' off */
				633	#define MAGIC_ON 3 /* "\m" or 'magic' */
				634	#define MAGIC_ALL 4 /* "\v" very magic */
				635
				636	static int reg_string; /* matching with a string instead of a buffer
				637	line */
				638
				639	/*
				640	* META contains all characters that may be magic, except '^' and '$'.
				641	*/
				642
				643	#ifdef EBCDIC
				644	static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{\|~";
				645	#else
				646	/* META[] is used often enough to justify turning it into a table. */
				647	static char_u META_flags[] = {
				648	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				649	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				650	/* % & ( ) * + . */
				651	0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
				652	/* 1 2 3 4 5 6 7 8 9 < = > ? */
				653	0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
				654	/* @ A C D F H I K L M O */
				655	1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
				656	/* P S U V W X Z [ _ */
				657	1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
				658	/* a c d f h i k l m n o */
				659	0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
				660	/* p s u v w x z { \| ~ */
				661	1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
				662	};
				663	#endif
				664
				665	static int curchr;
				666
				667	/* arguments for reg() */
				668	#define REG_NOPAREN 0 /* toplevel reg() */
				669	#define REG_PAREN 1 /* */
				670	#define REG_ZPAREN 2 /* \z(\) */
				671	#define REG_NPAREN 3 /* \%(\) */
				672
				673	/*
				674	* Forward declarations for vim_regcomp()'s friends.
				675	*/
				676	static void initchr __ARGS((char_u *));
				677	static int getchr __ARGS((void));
				678	static void skipchr_keepstart __ARGS((void));
				679	static int peekchr __ARGS((void));
				680	static void skipchr __ARGS((void));
				681	static void ungetchr __ARGS((void));
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	682	static int gethexchrs __ARGS((int maxinputlen));
				683	static int getoctchrs __ARGS((void));
				684	static int getdecchrs __ARGS((void));
				685	static int coll_get_char __ARGS((void));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	686	static void regcomp_start __ARGS((char_u *expr, int flags));
				687	static char_u reg __ARGS((int, int ));
				688	static char_u regbranch __ARGS((int flagp));
				689	static char_u regconcat __ARGS((int flagp));
				690	static char_u regpiece __ARGS((int ));
				691	static char_u regatom __ARGS((int ));
				692	static char_u *regnode __ARGS((int));
				693	static int prog_magic_wrong __ARGS((void));
				694	static char_u regnext __ARGS((char_u ));
				695	static void regc __ARGS((int b));
				696	#ifdef FEAT_MBYTE
				697	static void regmbc __ARGS((int c));
				698	#endif
				699	static void reginsert __ARGS((int, char_u *));
				700	static void reginsert_limits __ARGS((int, long, long, char_u *));
				701	static char_u re_put_long __ARGS((char_u pr, long_u val));
				702	static int read_limits __ARGS((long , long ));
				703	static void regtail __ARGS((char_u , char_u ));
				704	static void regoptail __ARGS((char_u , char_u ));
				705
				706	/*
				707	* Return TRUE if compiled regular expression "prog" can match a line break.
				708	*/
				709	int
				710	re_multiline(prog)
				711	regprog_T *prog;
				712	{
				713	return (prog->regflags & RF_HASNL);
				714	}
				715
				716	/*
				717	* Return TRUE if compiled regular expression "prog" looks before the start
				718	* position (pattern contains "\@<=" or "\@<!").
				719	*/
				720	int
				721	re_lookbehind(prog)
				722	regprog_T *prog;
				723	{
				724	return (prog->regflags & RF_LOOKBH);
				725	}
				726
				727	/*
				728	* Skip past regular expression.
				729	* Stop at end of 'p' of where 'dirc' is found ('/', '?', etc).
				730	* Take care of characters with a backslash in front of it.
				731	* Skip strings inside [ and ].
				732	* When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
				733	* expression and change "\?" to "?". If "*newp" is not NULL the expression
				734	* is changed in-place.
				735	*/
				736	char_u *
				737	skip_regexp(startp, dirc, magic, newp)
				738	char_u *startp;
				739	int dirc;
				740	int magic;
				741	char_u **newp;
				742	{
				743	int mymagic;
				744	char_u *p = startp;
				745
				746	if (magic)
				747	mymagic = MAGIC_ON;
				748	else
				749	mymagic = MAGIC_OFF;
				750
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	751	for (; p[0] != NUL; mb_ptr_adv(p))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	752	{
				753	if (p[0] == dirc) /* found end of regexp */
				754	break;
				755	if ((p[0] == '[' && mymagic >= MAGIC_ON)
				756	\|\| (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
				757	{
				758	p = skip_anyof(p + 1);
				759	if (p[0] == NUL)
				760	break;
				761	}
				762	else if (p[0] == '\\' && p[1] != NUL)
				763	{
				764	if (dirc == '?' && newp != NULL && p[1] == '?')
				765	{
				766	/* change "\?" to "?", make a copy first. */
				767	if (*newp == NULL)
				768	{
				769	*newp = vim_strsave(startp);
				770	if (*newp != NULL)
				771	p = *newp + (p - startp);
				772	}
				773	if (*newp != NULL)
				774	mch_memmove(p, p + 1, STRLEN(p));
				775	else
				776	++p;
				777	}
				778	else
				779	++p; /* skip next character */
				780	if (*p == 'v')
				781	mymagic = MAGIC_ALL;
				782	else if (*p == 'V')
				783	mymagic = MAGIC_NONE;
				784	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	785	}
				786	return p;
				787	}
				788
				789	/*
				790	* vim_regcomp - compile a regular expression into internal code
				791	*
				792	* We can't allocate space until we know how big the compiled form will be,
				793	* but we can't compile it (and thus know how big it is) until we've got a
				794	* place to put the code. So we cheat: we compile it twice, once with code
				795	* generation turned off and size counting turned on, and once "for real".
				796	* This also means that we don't allocate space until we are sure that the
				797	* thing really will compile successfully, and we never have to move the
				798	* code and thus invalidate pointers into it. (Note that it has to be in
				799	* one piece because vim_free() must be able to free it all.)
				800	*
				801	* Whether upper/lower case is to be ignored is decided when executing the
				802	* program, it does not matter here.
				803	*
				804	* Beware that the optimization-preparation code in here knows about some
				805	* of the structure of the compiled regexp.
				806	* "re_flags": RE_MAGIC and/or RE_STRING.
				807	*/
				808	regprog_T *
				809	vim_regcomp(expr, re_flags)
				810	char_u *expr;
				811	int re_flags;
				812	{
				813	regprog_T *r;
				814	char_u *scan;
				815	char_u *longest;
				816	int len;
				817	int flags;
				818
				819	if (expr == NULL)
				820	EMSG_RET_NULL(_(e_null));
				821
				822	init_class_tab();
				823
				824	/*
				825	* First pass: determine size, legality.
				826	*/
				827	regcomp_start(expr, re_flags);
				828	regcode = JUST_CALC_SIZE;
				829	regc(REGMAGIC);
				830	if (reg(REG_NOPAREN, &flags) == NULL)
				831	return NULL;
				832
				833	/* Small enough for pointer-storage convention? */
				834	#ifdef SMALL_MALLOC /* 16 bit storage allocation */
				835	if (regsize >= 65536L - 256L)
				836	EMSG_RET_NULL(_("E339: Pattern too long"));
				837	#endif
				838
				839	/* Allocate space. */
				840	r = (regprog_T *)lalloc(sizeof(regprog_T) + regsize, TRUE);
				841	if (r == NULL)
				842	return NULL;
				843
				844	/*
				845	* Second pass: emit code.
				846	*/
				847	regcomp_start(expr, re_flags);
				848	regcode = r->program;
				849	regc(REGMAGIC);
				850	if (reg(REG_NOPAREN, &flags) == NULL)
				851	{
				852	vim_free(r);
				853	return NULL;
				854	}
				855
				856	/* Dig out information for optimizations. */
				857	r->regstart = NUL; /* Worst-case defaults. */
				858	r->reganch = 0;
				859	r->regmust = NULL;
				860	r->regmlen = 0;
				861	r->regflags = regflags;
				862	if (flags & HASNL)
				863	r->regflags \|= RF_HASNL;
				864	if (flags & HASLOOKBH)
				865	r->regflags \|= RF_LOOKBH;
				866	#ifdef FEAT_SYN_HL
				867	/* Remember whether this pattern has any \z specials in it. */
				868	r->reghasz = re_has_z;
				869	#endif
				870	scan = r->program + 1; /* First BRANCH. */
				871	if (OP(regnext(scan)) == END) /* Only one top-level choice. */
				872	{
				873	scan = OPERAND(scan);
				874
				875	/* Starting-point info. */
				876	if (OP(scan) == BOL \|\| OP(scan) == RE_BOF)
				877	{
				878	r->reganch++;
				879	scan = regnext(scan);
				880	}
				881
				882	if (OP(scan) == EXACTLY)
				883	{
				884	#ifdef FEAT_MBYTE
				885	if (has_mbyte)
				886	r->regstart = (*mb_ptr2char)(OPERAND(scan));
				887	else
				888	#endif
				889	r->regstart = *OPERAND(scan);
				890	}
				891	else if ((OP(scan) == BOW
				892	\|\| OP(scan) == EOW
				893	\|\| OP(scan) == NOTHING
				894	\|\| OP(scan) == MOPEN + 0 \|\| OP(scan) == NOPEN
				895	\|\| OP(scan) == MCLOSE + 0 \|\| OP(scan) == NCLOSE)
				896	&& OP(regnext(scan)) == EXACTLY)
				897	{
				898	#ifdef FEAT_MBYTE
				899	if (has_mbyte)
				900	r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
				901	else
				902	#endif
				903	r->regstart = *OPERAND(regnext(scan));
				904	}
				905
				906	/*
				907	* If there's something expensive in the r.e., find the longest
				908	* literal string that must appear and make it the regmust. Resolve
				909	* ties in favor of later strings, since the regstart check works
				910	* with the beginning of the r.e. and avoiding duplication
				911	* strengthens checking. Not a strong reason, but sufficient in the
				912	* absence of others.
				913	*/
				914	/*
				915	* When the r.e. starts with BOW, it is faster to look for a regmust
				916	* first. Used a lot for "#" and "*" commands. (Added by mool).
				917	*/
				918	if ((flags & SPSTART \|\| OP(scan) == BOW \|\| OP(scan) == EOW)
				919	&& !(flags & HASNL))
				920	{
				921	longest = NULL;
				922	len = 0;
				923	for (; scan != NULL; scan = regnext(scan))
				924	if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
				925	{
				926	longest = OPERAND(scan);
				927	len = (int)STRLEN(OPERAND(scan));
				928	}
				929	r->regmust = longest;
				930	r->regmlen = len;
				931	}
				932	}
				933	#ifdef DEBUG
				934	regdump(expr, r);
				935	#endif
				936	return r;
				937	}
				938
				939	/*
				940	* Setup to parse the regexp. Used once to get the length and once to do it.
				941	*/
				942	static void
				943	regcomp_start(expr, re_flags)
				944	char_u *expr;
				945	int re_flags; /* see vim_regcomp() */
				946	{
				947	initchr(expr);
				948	if (re_flags & RE_MAGIC)
				949	reg_magic = MAGIC_ON;
				950	else
				951	reg_magic = MAGIC_OFF;
				952	reg_string = (re_flags & RE_STRING);
				953
				954	num_complex_braces = 0;
				955	regnpar = 1;
				956	vim_memset(had_endbrace, 0, sizeof(had_endbrace));
				957	#ifdef FEAT_SYN_HL
				958	regnzpar = 1;
				959	re_has_z = 0;
				960	#endif
				961	regsize = 0L;
				962	regflags = 0;
				963	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				964	had_eol = FALSE;
				965	#endif
				966	}
				967
				968	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				969	/*
				970	* Check if during the previous call to vim_regcomp the EOL item "$" has been
				971	* found. This is messy, but it works fine.
				972	*/
				973	int
				974	vim_regcomp_had_eol()
				975	{
				976	return had_eol;
				977	}
				978	#endif
				979
				980	/*
				981	* reg - regular expression, i.e. main body or parenthesized thing
				982	*
				983	* Caller must absorb opening parenthesis.
				984	*
				985	* Combining parenthesis handling with the base level of regular expression
				986	* is a trifle forced, but the need to tie the tails of the branches to what
				987	* follows makes it hard to avoid.
				988	*/
				989	static char_u *
				990	reg(paren, flagp)
				991	int paren; /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
				992	int *flagp;
				993	{
				994	char_u *ret;
				995	char_u *br;
				996	char_u *ender;
				997	int parno = 0;
				998	int flags;
				999
				1000	flagp = HASWIDTH; / Tentatively. */
				1001
				1002	#ifdef FEAT_SYN_HL
				1003	if (paren == REG_ZPAREN)
				1004	{
				1005	/* Make a ZOPEN node. */
				1006	if (regnzpar >= NSUBEXP)
				1007	EMSG_RET_NULL(_("E50: Too many \\z("));
				1008	parno = regnzpar;
				1009	regnzpar++;
				1010	ret = regnode(ZOPEN + parno);
				1011	}
				1012	else
				1013	#endif
				1014	if (paren == REG_PAREN)
				1015	{
				1016	/* Make a MOPEN node. */
				1017	if (regnpar >= NSUBEXP)
				1018	EMSG_M_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
				1019	parno = regnpar;
				1020	++regnpar;
				1021	ret = regnode(MOPEN + parno);
				1022	}
				1023	else if (paren == REG_NPAREN)
				1024	{
				1025	/* Make a NOPEN node. */
				1026	ret = regnode(NOPEN);
				1027	}
				1028	else
				1029	ret = NULL;
				1030
				1031	/* Pick up the branches, linking them together. */
				1032	br = regbranch(&flags);
				1033	if (br == NULL)
				1034	return NULL;
				1035	if (ret != NULL)
				1036	regtail(ret, br); /* [MZ]OPEN -> first. */
				1037	else
				1038	ret = br;
				1039	/* If one of the branches can be zero-width, the whole thing can.
				1040	* If one of the branches has * at start or matches a line-break, the
				1041	* whole thing can. */
				1042	if (!(flags & HASWIDTH))
				1043	*flagp &= ~HASWIDTH;
				1044	*flagp \|= flags & (SPSTART \| HASNL \| HASLOOKBH);
				1045	while (peekchr() == Magic('\|'))
				1046	{
				1047	skipchr();
				1048	br = regbranch(&flags);
				1049	if (br == NULL)
				1050	return NULL;
				1051	regtail(ret, br); /* BRANCH -> BRANCH. */
				1052	if (!(flags & HASWIDTH))
				1053	*flagp &= ~HASWIDTH;
				1054	*flagp \|= flags & (SPSTART \| HASNL \| HASLOOKBH);
				1055	}
				1056
				1057	/* Make a closing node, and hook it on the end. */
				1058	ender = regnode(
				1059	#ifdef FEAT_SYN_HL
				1060	paren == REG_ZPAREN ? ZCLOSE + parno :
				1061	#endif
				1062	paren == REG_PAREN ? MCLOSE + parno :
				1063	paren == REG_NPAREN ? NCLOSE : END);
				1064	regtail(ret, ender);
				1065
				1066	/* Hook the tails of the branches to the closing node. */
				1067	for (br = ret; br != NULL; br = regnext(br))
				1068	regoptail(br, ender);
				1069
				1070	/* Check for proper termination. */
				1071	if (paren != REG_NOPAREN && getchr() != Magic(')'))
				1072	{
				1073	#ifdef FEAT_SYN_HL
				1074	if (paren == REG_ZPAREN)
				1075	EMSG_RET_NULL(_("E52: Unmatched \\z("))
				1076	else
				1077	#endif
				1078	if (paren == REG_NPAREN)
				1079	EMSG_M_RET_NULL(_("E53: Unmatched %s%%("), reg_magic == MAGIC_ALL)
				1080	else
				1081	EMSG_M_RET_NULL(_("E54: Unmatched %s("), reg_magic == MAGIC_ALL)
				1082	}
				1083	else if (paren == REG_NOPAREN && peekchr() != NUL)
				1084	{
				1085	if (curchr == Magic(')'))
				1086	EMSG_M_RET_NULL(_("E55: Unmatched %s)"), reg_magic == MAGIC_ALL)
				1087	else
				1088	EMSG_RET_NULL(_(e_trailing)) /* "Can't happen". */
				1089	/* NOTREACHED */
				1090	}
				1091	/*
				1092	* Here we set the flag allowing back references to this set of
				1093	* parentheses.
				1094	*/
				1095	if (paren == REG_PAREN)
				1096	had_endbrace[parno] = TRUE; /* have seen the close paren */
				1097	return ret;
				1098	}
				1099
				1100	/*
				1101	* regbranch - one alternative of an \| operator
				1102	*
				1103	* Implements the & operator.
				1104	*/
				1105	static char_u *
				1106	regbranch(flagp)
				1107	int *flagp;
				1108	{
				1109	char_u *ret;
				1110	char_u *chain = NULL;
				1111	char_u *latest;
				1112	int flags;
				1113
				1114	flagp = WORST \| HASNL; / Tentatively. */
				1115
				1116	ret = regnode(BRANCH);
				1117	for (;;)
				1118	{
				1119	latest = regconcat(&flags);
				1120	if (latest == NULL)
				1121	return NULL;
				1122	/* If one of the branches has width, the whole thing has. If one of
				1123	* the branches anchors at start-of-line, the whole thing does.
				1124	* If one of the branches uses look-behind, the whole thing does. */
				1125	*flagp \|= flags & (HASWIDTH \| SPSTART \| HASLOOKBH);
				1126	/* If one of the branches doesn't match a line-break, the whole thing
				1127	* doesn't. */
				1128	*flagp &= ~HASNL \| (flags & HASNL);
				1129	if (chain != NULL)
				1130	regtail(chain, latest);
				1131	if (peekchr() != Magic('&'))
				1132	break;
				1133	skipchr();
				1134	regtail(latest, regnode(END)); /* operand ends */
				1135	reginsert(MATCH, latest);
				1136	chain = latest;
				1137	}
				1138
				1139	return ret;
				1140	}
				1141
				1142	/*
				1143	* regbranch - one alternative of an \| or & operator
				1144	*
				1145	* Implements the concatenation operator.
				1146	*/
				1147	static char_u *
				1148	regconcat(flagp)
				1149	int *flagp;
				1150	{
				1151	char_u *first = NULL;
				1152	char_u *chain = NULL;
				1153	char_u *latest;
				1154	int flags;
				1155	int cont = TRUE;
				1156
				1157	flagp = WORST; / Tentatively. */
				1158
				1159	while (cont)
				1160	{
				1161	switch (peekchr())
				1162	{
				1163	case NUL:
				1164	case Magic('\|'):
				1165	case Magic('&'):
				1166	case Magic(')'):
				1167	cont = FALSE;
				1168	break;
				1169	case Magic('Z'):
				1170	#ifdef FEAT_MBYTE
				1171	regflags \|= RF_ICOMBINE;
				1172	#endif
				1173	skipchr_keepstart();
				1174	break;
				1175	case Magic('c'):
				1176	regflags \|= RF_ICASE;
				1177	skipchr_keepstart();
				1178	break;
				1179	case Magic('C'):
				1180	regflags \|= RF_NOICASE;
				1181	skipchr_keepstart();
				1182	break;
				1183	case Magic('v'):
				1184	reg_magic = MAGIC_ALL;
				1185	skipchr_keepstart();
				1186	curchr = -1;
				1187	break;
				1188	case Magic('m'):
				1189	reg_magic = MAGIC_ON;
				1190	skipchr_keepstart();
				1191	curchr = -1;
				1192	break;
				1193	case Magic('M'):
				1194	reg_magic = MAGIC_OFF;
				1195	skipchr_keepstart();
				1196	curchr = -1;
				1197	break;
				1198	case Magic('V'):
				1199	reg_magic = MAGIC_NONE;
				1200	skipchr_keepstart();
				1201	curchr = -1;
				1202	break;
				1203	default:
				1204	latest = regpiece(&flags);
				1205	if (latest == NULL)
				1206	return NULL;
				1207	*flagp \|= flags & (HASWIDTH \| HASNL \| HASLOOKBH);
				1208	if (chain == NULL) /* First piece. */
				1209	*flagp \|= flags & SPSTART;
				1210	else
				1211	regtail(chain, latest);
				1212	chain = latest;
				1213	if (first == NULL)
				1214	first = latest;
				1215	break;
				1216	}
				1217	}
				1218	if (first == NULL) /* Loop ran zero times. */
				1219	first = regnode(NOTHING);
				1220	return first;
				1221	}
				1222
				1223	/*
				1224	* regpiece - something followed by possible [*+=]
				1225	*
				1226	* Note that the branching code sequences used for = and the general cases
				1227	* of * and + are somewhat optimized: they use the same NOTHING node as
				1228	* both the endmarker for their branch list and the body of the last branch.
				1229	* It might seem that this node could be dispensed with entirely, but the
				1230	* endmarker role is not redundant.
				1231	*/
				1232	static char_u *
				1233	regpiece(flagp)
				1234	int *flagp;
				1235	{
				1236	char_u *ret;
				1237	int op;
				1238	char_u *next;
				1239	int flags;
				1240	long minval;
				1241	long maxval;
				1242
				1243	ret = regatom(&flags);
				1244	if (ret == NULL)
				1245	return NULL;
				1246
				1247	op = peekchr();
				1248	if (re_multi_type(op) == NOT_MULTI)
				1249	{
				1250	*flagp = flags;
				1251	return ret;
				1252	}
				1253	if (!(flags & HASWIDTH) && re_multi_type(op) == MULTI_MULT)
				1254	{
				1255	if (op == Magic('*'))
				1256	EMSG_M_RET_NULL(_("E56: %s* operand could be empty"),
				1257	reg_magic >= MAGIC_ON);
				1258	if (op == Magic('+'))
				1259	EMSG_M_RET_NULL(_("E57: %s+ operand could be empty"),
				1260	reg_magic == MAGIC_ALL);
				1261	/* "\{}" is checked below, it's allowed when there is an upper limit */
				1262	}
				1263	/* default flags */
				1264	*flagp = (WORST \| SPSTART \| (flags & (HASNL \| HASLOOKBH)));
				1265
				1266	skipchr();
				1267	switch (op)
				1268	{
				1269	case Magic('*'):
				1270	if (flags & SIMPLE)
				1271	reginsert(STAR, ret);
				1272	else
				1273	{
				1274	/* Emit x* as (x&\|), where & means "self". */
				1275	reginsert(BRANCH, ret); /* Either x */
				1276	regoptail(ret, regnode(BACK)); /* and loop */
				1277	regoptail(ret, ret); /* back */
				1278	regtail(ret, regnode(BRANCH)); /* or */
				1279	regtail(ret, regnode(NOTHING)); /* null. */
				1280	}
				1281	break;
				1282
				1283	case Magic('+'):
				1284	if (flags & SIMPLE)
				1285	reginsert(PLUS, ret);
				1286	else
				1287	{
				1288	/* Emit x+ as x(&\|), where & means "self". */
				1289	next = regnode(BRANCH); /* Either */
				1290	regtail(ret, next);
				1291	regtail(regnode(BACK), ret); /* loop back */
				1292	regtail(next, regnode(BRANCH)); /* or */
				1293	regtail(ret, regnode(NOTHING)); /* null. */
				1294	}
				1295	*flagp = (WORST \| HASWIDTH \| (flags & (HASNL \| HASLOOKBH)));
				1296	break;
				1297
				1298	case Magic('@'):
				1299	{
				1300	int lop = END;
				1301
				1302	switch (no_Magic(getchr()))
				1303	{
				1304	case '=': lop = MATCH; break; /* \@= */
				1305	case '!': lop = NOMATCH; break; /* \@! */
				1306	case '>': lop = SUBPAT; break; /* \@> */
				1307	case '<': switch (no_Magic(getchr()))
				1308	{
				1309	case '=': lop = BEHIND; break; /* \@<= */
				1310	case '!': lop = NOBEHIND; break; /* \@<! */
				1311	}
				1312	}
				1313	if (lop == END)
				1314	EMSG_M_RET_NULL(_("E59: invalid character after %s@"),
				1315	reg_magic == MAGIC_ALL);
				1316	/* Look behind must match with behind_pos. */
				1317	if (lop == BEHIND \|\| lop == NOBEHIND)
				1318	{
				1319	regtail(ret, regnode(BHPOS));
				1320	*flagp \|= HASLOOKBH;
				1321	}
				1322	regtail(ret, regnode(END)); /* operand ends */
				1323	reginsert(lop, ret);
				1324	break;
				1325	}
				1326
				1327	case Magic('?'):
				1328	case Magic('='):
				1329	/* Emit x= as (x\|) */
				1330	reginsert(BRANCH, ret); /* Either x */
				1331	regtail(ret, regnode(BRANCH)); /* or */
				1332	next = regnode(NOTHING); /* null. */
				1333	regtail(ret, next);
				1334	regoptail(ret, next);
				1335	break;
				1336
				1337	case Magic('{'):
				1338	if (!read_limits(&minval, &maxval))
				1339	return NULL;
				1340	if (!(flags & HASWIDTH) && (maxval > minval
				1341	? maxval >= MAX_LIMIT : minval >= MAX_LIMIT))
				1342	EMSG_M_RET_NULL(_("E58: %s{ operand could be empty"),
				1343	reg_magic == MAGIC_ALL);
				1344	if (flags & SIMPLE)
				1345	{
				1346	reginsert(BRACE_SIMPLE, ret);
				1347	reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
				1348	}
				1349	else
				1350	{
				1351	if (num_complex_braces >= 10)
				1352	EMSG_M_RET_NULL(_("E60: Too many complex %s{...}s"),
				1353	reg_magic == MAGIC_ALL);
				1354	reginsert(BRACE_COMPLEX + num_complex_braces, ret);
				1355	regoptail(ret, regnode(BACK));
				1356	regoptail(ret, ret);
				1357	reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
				1358	++num_complex_braces;
				1359	}
				1360	if (minval > 0 && maxval > 0)
				1361	*flagp = (HASWIDTH \| (flags & (HASNL \| HASLOOKBH)));
				1362	break;
				1363	}
				1364	if (re_multi_type(peekchr()) != NOT_MULTI)
				1365	{
				1366	/* Can't have a multi follow a multi. */
				1367	if (peekchr() == Magic('*'))
				1368	sprintf((char )IObuff, _("E61: Nested %s"),
				1369	reg_magic >= MAGIC_ON ? "" : "\\");
				1370	else
				1371	sprintf((char *)IObuff, _("E62: Nested %s%c"),
				1372	reg_magic == MAGIC_ALL ? "" : "\\", no_Magic(peekchr()));
				1373	EMSG_RET_NULL(IObuff);
				1374	}
				1375
				1376	return ret;
				1377	}
				1378
				1379	/*
				1380	* regatom - the lowest level
				1381	*
				1382	* Optimization: gobbles an entire sequence of ordinary characters so that
				1383	* it can turn them into a single node, which is smaller to store and
				1384	* faster to run. Don't do this when one_exactly is set.
				1385	*/
				1386	static char_u *
				1387	regatom(flagp)
				1388	int *flagp;
				1389	{
				1390	char_u *ret;
				1391	int flags;
				1392	int cpo_lit; /* 'cpoptions' contains 'l' flag */
				1393	int c;
				1394	static char_u classchars = (char_u )".iIkKfFpPsSdDxXoOwWhHaAlLuU";
				1395	static int classcodes[] = {ANY, IDENT, SIDENT, KWORD, SKWORD,
				1396	FNAME, SFNAME, PRINT, SPRINT,
				1397	WHITE, NWHITE, DIGIT, NDIGIT,
				1398	HEX, NHEX, OCTAL, NOCTAL,
				1399	WORD, NWORD, HEAD, NHEAD,
				1400	ALPHA, NALPHA, LOWER, NLOWER,
				1401	UPPER, NUPPER
				1402	};
				1403	char_u *p;
				1404	int extra = 0;
				1405
				1406	flagp = WORST; / Tentatively. */
				1407	cpo_lit = (!reg_syn && vim_strchr(p_cpo, CPO_LITERAL) != NULL);
				1408
				1409	c = getchr();
				1410	switch (c)
				1411	{
				1412	case Magic('^'):
				1413	ret = regnode(BOL);
				1414	break;
				1415
				1416	case Magic('$'):
				1417	ret = regnode(EOL);
				1418	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				1419	had_eol = TRUE;
				1420	#endif
				1421	break;
				1422
				1423	case Magic('<'):
				1424	ret = regnode(BOW);
				1425	break;
				1426
				1427	case Magic('>'):
				1428	ret = regnode(EOW);
				1429	break;
				1430
				1431	case Magic('_'):
				1432	c = no_Magic(getchr());
				1433	if (c == '^') /* "\_^" is start-of-line */
				1434	{
				1435	ret = regnode(BOL);
				1436	break;
				1437	}
				1438	if (c == '$') /* "\_$" is end-of-line */
				1439	{
				1440	ret = regnode(EOL);
				1441	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				1442	had_eol = TRUE;
				1443	#endif
				1444	break;
				1445	}
				1446
				1447	extra = ADD_NL;
				1448	*flagp \|= HASNL;
				1449
				1450	/* "\_[" is character range plus newline */
				1451	if (c == '[')
				1452	goto collection;
				1453
				1454	/* "\_x" is character class plus newline */
				1455	/FALLTHROUGH/
				1456
				1457	/*
				1458	* Character classes.
				1459	*/
				1460	case Magic('.'):
				1461	case Magic('i'):
				1462	case Magic('I'):
				1463	case Magic('k'):
				1464	case Magic('K'):
				1465	case Magic('f'):
				1466	case Magic('F'):
				1467	case Magic('p'):
				1468	case Magic('P'):
				1469	case Magic('s'):
				1470	case Magic('S'):
				1471	case Magic('d'):
				1472	case Magic('D'):
				1473	case Magic('x'):
				1474	case Magic('X'):
				1475	case Magic('o'):
				1476	case Magic('O'):
				1477	case Magic('w'):
				1478	case Magic('W'):
				1479	case Magic('h'):
				1480	case Magic('H'):
				1481	case Magic('a'):
				1482	case Magic('A'):
				1483	case Magic('l'):
				1484	case Magic('L'):
				1485	case Magic('u'):
				1486	case Magic('U'):
				1487	p = vim_strchr(classchars, no_Magic(c));
				1488	if (p == NULL)
				1489	EMSG_RET_NULL(_("E63: invalid use of \\_"));
				1490	ret = regnode(classcodes[p - classchars] + extra);
				1491	*flagp \|= HASWIDTH \| SIMPLE;
				1492	break;
				1493
				1494	case Magic('n'):
				1495	if (reg_string)
				1496	{
				1497	/* In a string "\n" matches a newline character. */
				1498	ret = regnode(EXACTLY);
				1499	regc(NL);
				1500	regc(NUL);
				1501	*flagp \|= HASWIDTH \| SIMPLE;
				1502	}
				1503	else
				1504	{
				1505	/* In buffer text "\n" matches the end of a line. */
				1506	ret = regnode(NEWL);
				1507	*flagp \|= HASWIDTH \| HASNL;
				1508	}
				1509	break;
				1510
				1511	case Magic('('):
				1512	if (one_exactly)
				1513	EMSG_ONE_RET_NULL;
				1514	ret = reg(REG_PAREN, &flags);
				1515	if (ret == NULL)
				1516	return NULL;
				1517	*flagp \|= flags & (HASWIDTH \| SPSTART \| HASNL \| HASLOOKBH);
				1518	break;
				1519
				1520	case NUL:
				1521	case Magic('\|'):
				1522	case Magic('&'):
				1523	case Magic(')'):
				1524	EMSG_RET_NULL(_(e_internal)); /* Supposed to be caught earlier. */
				1525	/* NOTREACHED */
				1526
				1527	case Magic('='):
				1528	case Magic('?'):
				1529	case Magic('+'):
				1530	case Magic('@'):
				1531	case Magic('{'):
				1532	case Magic('*'):
				1533	c = no_Magic(c);
				1534	sprintf((char *)IObuff, _("E64: %s%c follows nothing"),
				1535	(c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL)
				1536	? "" : "\\", c);
				1537	EMSG_RET_NULL(IObuff);
				1538	/* NOTREACHED */
				1539
				1540	case Magic('~'): /* previous substitute pattern */
				1541	if (reg_prev_sub)
				1542	{
				1543	char_u *lp;
				1544
				1545	ret = regnode(EXACTLY);
				1546	lp = reg_prev_sub;
				1547	while (*lp != NUL)
				1548	regc(*lp++);
				1549	regc(NUL);
				1550	if (*reg_prev_sub != NUL)
				1551	{
				1552	*flagp \|= HASWIDTH;
				1553	if ((lp - reg_prev_sub) == 1)
				1554	*flagp \|= SIMPLE;
				1555	}
				1556	}
				1557	else
				1558	EMSG_RET_NULL(_(e_nopresub));
				1559	break;
				1560
				1561	case Magic('1'):
				1562	case Magic('2'):
				1563	case Magic('3'):
				1564	case Magic('4'):
				1565	case Magic('5'):
				1566	case Magic('6'):
				1567	case Magic('7'):
				1568	case Magic('8'):
				1569	case Magic('9'):
				1570	{
				1571	int refnum;
				1572
				1573	refnum = c - Magic('0');
				1574	/*
				1575	* Check if the back reference is legal. We must have seen the
				1576	* close brace.
				1577	* TODO: Should also check that we don't refer to something
				1578	* that is repeated (+*=): what instance of the repetition
				1579	* should we match?
				1580	*/
				1581	if (!had_endbrace[refnum])
				1582	{
				1583	/* Trick: check if "@<=" or "@<!" follows, in which case
				1584	* the \1 can appear before the referenced match. */
				1585	for (p = regparse; *p != NUL; ++p)
				1586	if (p[0] == '@' && p[1] == '<'
				1587	&& (p[2] == '!' \|\| p[2] == '='))
				1588	break;
				1589	if (*p == NUL)
				1590	EMSG_RET_NULL(_("E65: Illegal back reference"));
				1591	}
				1592	ret = regnode(BACKREF + refnum);
				1593	}
				1594	break;
				1595
				1596	#ifdef FEAT_SYN_HL
				1597	case Magic('z'):
				1598	{
				1599	c = no_Magic(getchr());
				1600	switch (c)
				1601	{
				1602	case '(': if (reg_do_extmatch != REX_SET)
				1603	EMSG_RET_NULL(_("E66: \\z( not allowed here"));
				1604	if (one_exactly)
				1605	EMSG_ONE_RET_NULL;
				1606	ret = reg(REG_ZPAREN, &flags);
				1607	if (ret == NULL)
				1608	return NULL;
				1609	*flagp \|= flags & (HASWIDTH\|SPSTART\|HASNL\|HASLOOKBH);
				1610	re_has_z = REX_SET;
				1611	break;
				1612
				1613	case '1':
				1614	case '2':
				1615	case '3':
				1616	case '4':
				1617	case '5':
				1618	case '6':
				1619	case '7':
				1620	case '8':
				1621	case '9': if (reg_do_extmatch != REX_USE)
				1622	EMSG_RET_NULL(_("E67: \\z1 et al. not allowed here"));
				1623	ret = regnode(ZREF + c - '0');
				1624	re_has_z = REX_USE;
				1625	break;
				1626
				1627	case 's': ret = regnode(MOPEN + 0);
				1628	break;
				1629
				1630	case 'e': ret = regnode(MCLOSE + 0);
				1631	break;
				1632
				1633	default: EMSG_RET_NULL(_("E68: Invalid character after \\z"));
				1634	}
				1635	}
				1636	break;
				1637	#endif
				1638
				1639	case Magic('%'):
				1640	{
				1641	c = no_Magic(getchr());
				1642	switch (c)
				1643	{
				1644	/* () without a back reference */
				1645	case '(':
				1646	if (one_exactly)
				1647	EMSG_ONE_RET_NULL;
				1648	ret = reg(REG_NPAREN, &flags);
				1649	if (ret == NULL)
				1650	return NULL;
				1651	*flagp \|= flags & (HASWIDTH \| SPSTART \| HASNL \| HASLOOKBH);
				1652	break;
				1653
				1654	/* Catch \%^ and \%$ regardless of where they appear in the
				1655	* pattern -- regardless of whether or not it makes sense. */
				1656	case '^':
				1657	ret = regnode(RE_BOF);
				1658	break;
				1659
				1660	case '$':
				1661	ret = regnode(RE_EOF);
				1662	break;
				1663
				1664	case '#':
				1665	ret = regnode(CURSOR);
				1666	break;
				1667
				1668	/* \%[abc]: Emit as a list of branches, all ending at the last
				1669	* branch which matches nothing. */
				1670	case '[':
				1671	if (one_exactly) /* doesn't nest */
				1672	EMSG_ONE_RET_NULL;
				1673	{
				1674	char_u *lastbranch;
				1675	char_u *lastnode = NULL;
				1676	char_u *br;
				1677
				1678	ret = NULL;
				1679	while ((c = getchr()) != ']')
				1680	{
				1681	if (c == NUL)
				1682	EMSG_M_RET_NULL(_("E69: Missing ] after %s%%["),
				1683	reg_magic == MAGIC_ALL);
				1684	br = regnode(BRANCH);
				1685	if (ret == NULL)
				1686	ret = br;
				1687	else
				1688	regtail(lastnode, br);
				1689
				1690	ungetchr();
				1691	one_exactly = TRUE;
				1692	lastnode = regatom(flagp);
				1693	one_exactly = FALSE;
				1694	if (lastnode == NULL)
				1695	return NULL;
				1696	}
				1697	if (ret == NULL)
				1698	EMSG_M_RET_NULL(_("E70: Empty %s%%[]"),
				1699	reg_magic == MAGIC_ALL);
				1700	lastbranch = regnode(BRANCH);
				1701	br = regnode(NOTHING);
				1702	if (ret != JUST_CALC_SIZE)
				1703	{
				1704	regtail(lastnode, br);
				1705	regtail(lastbranch, br);
				1706	/* connect all branches to the NOTHING
				1707	* branch at the end */
				1708	for (br = ret; br != lastnode; )
				1709	{
				1710	if (OP(br) == BRANCH)
				1711	{
				1712	regtail(br, lastbranch);
				1713	br = OPERAND(br);
				1714	}
				1715	else
				1716	br = regnext(br);
				1717	}
				1718	}
				1719	*flagp &= ~HASWIDTH;
				1720	break;
				1721	}
				1722
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	1723	case 'd': /* %d123 decimal */
				1724	case 'o': /* %o123 octal */
				1725	case 'x': /* %xab hex 2 */
				1726	case 'u': /* %uabcd hex 4 */
				1727	case 'U': /* %U1234abcd hex 8 */
				1728	{
				1729	int i;
				1730
				1731	switch (c)
				1732	{
				1733	case 'd': i = getdecchrs(); break;
				1734	case 'o': i = getoctchrs(); break;
				1735	case 'x': i = gethexchrs(2); break;
				1736	case 'u': i = gethexchrs(4); break;
				1737	case 'U': i = gethexchrs(8); break;
				1738	default: i = -1; break;
				1739	}
				1740
				1741	if (i < 0)
				1742	EMSG_M_RET_NULL(
				1743	_("E678: Invalid character after %s%%[dxouU]"),
				1744	reg_magic == MAGIC_ALL);
				1745	ret = regnode(EXACTLY);
				1746	if (i == 0)
				1747	regc(0x0a);
				1748	else
				1749	#ifdef FEAT_MBYTE
				1750	regmbc(i);
				1751	#else
				1752	regc(i);
				1753	#endif
				1754	regc(NUL);
				1755	*flagp \|= HASWIDTH;
				1756	break;
				1757	}
				1758
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1759	default:
				1760	if (VIM_ISDIGIT(c) \|\| c == '<' \|\| c == '>')
				1761	{
				1762	long_u n = 0;
				1763	int cmp;
				1764
				1765	cmp = c;
				1766	if (cmp == '<' \|\| cmp == '>')
				1767	c = getchr();
				1768	while (VIM_ISDIGIT(c))
				1769	{
				1770	n = n * 10 + (c - '0');
				1771	c = getchr();
				1772	}
				1773	if (c == 'l' \|\| c == 'c' \|\| c == 'v')
				1774	{
				1775	if (c == 'l')
				1776	ret = regnode(RE_LNUM);
				1777	else if (c == 'c')
				1778	ret = regnode(RE_COL);
				1779	else
				1780	ret = regnode(RE_VCOL);
				1781	if (ret == JUST_CALC_SIZE)
				1782	regsize += 5;
				1783	else
				1784	{
				1785	/* put the number and the optional
				1786	* comparator after the opcode */
				1787	regcode = re_put_long(regcode, n);
				1788	*regcode++ = cmp;
				1789	}
				1790	break;
				1791	}
				1792	}
				1793
				1794	EMSG_M_RET_NULL(_("E71: Invalid character after %s%%"),
				1795	reg_magic == MAGIC_ALL);
				1796	}
				1797	}
				1798	break;
				1799
				1800	case Magic('['):
				1801	collection:
				1802	{
				1803	char_u *lp;
				1804
				1805	/*
				1806	* If there is no matching ']', we assume the '[' is a normal
				1807	* character. This makes 'incsearch' and ":help [" work.
				1808	*/
				1809	lp = skip_anyof(regparse);
				1810	if (lp == ']') / there is a matching ']' */
				1811	{
				1812	int startc = -1; /* > 0 when next '-' is a range */
				1813	int endc;
				1814
				1815	/*
				1816	* In a character class, different parsing rules apply.
				1817	* Not even \ is special anymore, nothing is.
				1818	*/
				1819	if (regparse == '^') / Complement of range. */
				1820	{
				1821	ret = regnode(ANYBUT + extra);
				1822	regparse++;
				1823	}
				1824	else
				1825	ret = regnode(ANYOF + extra);
				1826
				1827	/* At the start ']' and '-' mean the literal character. */
				1828	if (regparse == ']' \|\| regparse == '-')
				1829	regc(*regparse++);
				1830
				1831	while (regparse != NUL && regparse != ']')
				1832	{
				1833	if (*regparse == '-')
				1834	{
				1835	++regparse;
				1836	/* The '-' is not used for a range at the end and
				1837	* after or before a '\n'. */
				1838	if (regparse == ']' \|\| regparse == NUL
				1839	\|\| startc == -1
				1840	\|\| (regparse[0] == '\\' && regparse[1] == 'n'))
				1841	{
				1842	regc('-');
				1843	startc = '-'; /* [--x] is a range */
				1844	}
				1845	else
				1846	{
				1847	#ifdef FEAT_MBYTE
				1848	if (has_mbyte)
				1849	endc = mb_ptr2char_adv(&regparse);
				1850	else
				1851	#endif
				1852	endc = *regparse++;
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	1853
				1854	/* Handle \o40, \x20 and \u20AC style sequences */
				1855	if (endc == '\\' && !cpo_lit)
				1856	endc = coll_get_char();
				1857
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1858	if (startc > endc)
				1859	EMSG_RET_NULL(_(e_invrange));
				1860	#ifdef FEAT_MBYTE
				1861	if (has_mbyte && ((*mb_char2len)(startc) > 1
				1862	\|\| (*mb_char2len)(endc) > 1))
				1863	{
				1864	/* Limit to a range of 256 chars */
				1865	if (endc > startc + 256)
				1866	EMSG_RET_NULL(_(e_invrange));
				1867	while (++startc <= endc)
				1868	regmbc(startc);
				1869	}
				1870	else
				1871	#endif
				1872	{
				1873	#ifdef EBCDIC
				1874	int alpha_only = FALSE;
				1875
				1876	/* for alphabetical range skip the gaps
				1877	* 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'. */
				1878	if (isalpha(startc) && isalpha(endc))
				1879	alpha_only = TRUE;
				1880	#endif
				1881	while (++startc <= endc)
				1882	#ifdef EBCDIC
				1883	if (!alpha_only \|\| isalpha(startc))
				1884	#endif
				1885	regc(startc);
				1886	}
				1887	startc = -1;
				1888	}
				1889	}
				1890	/*
				1891	* Only "\]", "\^", "\]" and "\\" are special in Vi. Vim
				1892	* accepts "\t", "\e", etc., but only when the 'l' flag in
				1893	* 'cpoptions' is not included.
				1894	*/
				1895	else if (*regparse == '\\'
				1896	&& (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
				1897	\|\| (!cpo_lit
				1898	&& vim_strchr(REGEXP_ABBR,
				1899	regparse[1]) != NULL)))
				1900	{
				1901	regparse++;
				1902	if (*regparse == 'n')
				1903	{
				1904	/* '\n' in range: also match NL */
				1905	if (ret != JUST_CALC_SIZE)
				1906	{
				1907	if (*ret == ANYBUT)
				1908	*ret = ANYBUT + ADD_NL;
				1909	else if (*ret == ANYOF)
				1910	*ret = ANYOF + ADD_NL;
				1911	/* else: must have had a \n already */
				1912	}
				1913	*flagp \|= HASNL;
				1914	regparse++;
				1915	startc = -1;
				1916	}
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	1917	else if (*regparse == 'd'
				1918	\|\| *regparse == 'o'
				1919	\|\| *regparse == 'x'
				1920	\|\| *regparse == 'u'
				1921	\|\| *regparse == 'U')
				1922	{
				1923	startc = coll_get_char();
				1924	if (startc == 0)
				1925	regc(0x0a);
				1926	else
				1927	#ifdef FEAT_MBYTE
				1928	regmbc(startc);
				1929	#else
				1930	regc(startc);
				1931	#endif
				1932	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1933	else
				1934	{
				1935	startc = backslash_trans(*regparse++);
				1936	regc(startc);
				1937	}
				1938	}
				1939	else if (*regparse == '[')
				1940	{
				1941	int c_class;
				1942	int cu;
				1943
				1944	c_class = skip_class_name(&regparse);
				1945	startc = -1;
				1946	/* Characters assumed to be 8 bits! */
				1947	switch (c_class)
				1948	{
				1949	case CLASS_NONE:
				1950	/* literal '[', allow [[-x] as a range */
				1951	startc = *regparse++;
				1952	regc(startc);
				1953	break;
				1954	case CLASS_ALNUM:
				1955	for (cu = 1; cu <= 255; cu++)
				1956	if (isalnum(cu))
				1957	regc(cu);
				1958	break;
				1959	case CLASS_ALPHA:
				1960	for (cu = 1; cu <= 255; cu++)
				1961	if (isalpha(cu))
				1962	regc(cu);
				1963	break;
				1964	case CLASS_BLANK:
				1965	regc(' ');
				1966	regc('\t');
				1967	break;
				1968	case CLASS_CNTRL:
				1969	for (cu = 1; cu <= 255; cu++)
				1970	if (iscntrl(cu))
				1971	regc(cu);
				1972	break;
				1973	case CLASS_DIGIT:
				1974	for (cu = 1; cu <= 255; cu++)
				1975	if (VIM_ISDIGIT(cu))
				1976	regc(cu);
				1977	break;
				1978	case CLASS_GRAPH:
				1979	for (cu = 1; cu <= 255; cu++)
				1980	if (isgraph(cu))
				1981	regc(cu);
				1982	break;
				1983	case CLASS_LOWER:
				1984	for (cu = 1; cu <= 255; cu++)
				1985	if (islower(cu))
				1986	regc(cu);
				1987	break;
				1988	case CLASS_PRINT:
				1989	for (cu = 1; cu <= 255; cu++)
				1990	if (vim_isprintc(cu))
				1991	regc(cu);
				1992	break;
				1993	case CLASS_PUNCT:
				1994	for (cu = 1; cu <= 255; cu++)
				1995	if (ispunct(cu))
				1996	regc(cu);
				1997	break;
				1998	case CLASS_SPACE:
				1999	for (cu = 9; cu <= 13; cu++)
				2000	regc(cu);
				2001	regc(' ');
				2002	break;
				2003	case CLASS_UPPER:
				2004	for (cu = 1; cu <= 255; cu++)
				2005	if (isupper(cu))
				2006	regc(cu);
				2007	break;
				2008	case CLASS_XDIGIT:
				2009	for (cu = 1; cu <= 255; cu++)
				2010	if (vim_isxdigit(cu))
				2011	regc(cu);
				2012	break;
				2013	case CLASS_TAB:
				2014	regc('\t');
				2015	break;
				2016	case CLASS_RETURN:
				2017	regc('\r');
				2018	break;
				2019	case CLASS_BACKSPACE:
				2020	regc('\b');
				2021	break;
				2022	case CLASS_ESCAPE:
				2023	regc('\033');
				2024	break;
				2025	}
				2026	}
				2027	else
				2028	{
				2029	#ifdef FEAT_MBYTE
				2030	if (has_mbyte)
				2031	{
				2032	int len;
				2033
				2034	/* produce a multibyte character, including any
				2035	* following composing characters */
				2036	startc = mb_ptr2char(regparse);
				2037	len = (*mb_ptr2len_check)(regparse);
				2038	if (enc_utf8 && utf_char2len(startc) != len)
				2039	startc = -1; /* composing chars */
				2040	while (--len >= 0)
				2041	regc(*regparse++);
				2042	}
				2043	else
				2044	#endif
				2045	{
				2046	startc = *regparse++;
				2047	regc(startc);
				2048	}
				2049	}
				2050	}
				2051	regc(NUL);
				2052	prevchr_len = 1; /* last char was the ']' */
				2053	if (*regparse != ']')
				2054	EMSG_RET_NULL(_(e_toomsbra)); /* Cannot happen? */
				2055	skipchr(); /* let's be friends with the lexer again */
				2056	*flagp \|= HASWIDTH \| SIMPLE;
				2057	break;
				2058	}
				2059	}
				2060	/* FALLTHROUGH */
				2061
				2062	default:
				2063	{
				2064	int len;
				2065
				2066	#ifdef FEAT_MBYTE
				2067	/* A multi-byte character is handled as a separate atom if it's
				2068	* before a multi. */
				2069	if (has_mbyte && (*mb_char2len)(c) > 1
				2070	&& re_multi_type(peekchr()) != NOT_MULTI)
				2071	{
				2072	ret = regnode(MULTIBYTECODE);
				2073	regmbc(c);
				2074	*flagp \|= HASWIDTH \| SIMPLE;
				2075	break;
				2076	}
				2077	#endif
				2078
				2079	ret = regnode(EXACTLY);
				2080
				2081	/*
				2082	* Append characters as long as:
				2083	* - there is no following multi, we then need the character in
				2084	* front of it as a single character operand
				2085	* - not running into a Magic character
				2086	* - "one_exactly" is not set
				2087	* But always emit at least one character. Might be a Multi,
				2088	* e.g., a "[" without matching "]".
				2089	*/
				2090	for (len = 0; c != NUL && (len == 0
				2091	\|\| (re_multi_type(peekchr()) == NOT_MULTI
				2092	&& !one_exactly
				2093	&& !is_Magic(c))); ++len)
				2094	{
				2095	c = no_Magic(c);
				2096	#ifdef FEAT_MBYTE
				2097	if (has_mbyte)
				2098	{
				2099	regmbc(c);
				2100	if (enc_utf8)
				2101	{
				2102	int off;
				2103	int l;
				2104
				2105	/* Need to get composing character too, directly
				2106	* access regparse for that, because skipchr() skips
				2107	* over composing chars. */
				2108	ungetchr();
				2109	if (*regparse == '\\' && regparse[1] != NUL)
				2110	off = 1;
				2111	else
				2112	off = 0;
				2113	for (;;)
				2114	{
				2115	l = utf_ptr2len_check(regparse + off);
				2116	if (!UTF_COMPOSINGLIKE(regparse + off,
				2117	regparse + off + l))
				2118	break;
				2119	off += l;
				2120	regmbc(utf_ptr2char(regparse + off));
				2121	}
				2122	skipchr();
				2123	}
				2124	}
				2125	else
				2126	#endif
				2127	regc(c);
				2128	c = getchr();
				2129	}
				2130	ungetchr();
				2131
				2132	regc(NUL);
				2133	*flagp \|= HASWIDTH;
				2134	if (len == 1)
				2135	*flagp \|= SIMPLE;
				2136	}
				2137	break;
				2138	}
				2139
				2140	return ret;
				2141	}
				2142
				2143	/*
				2144	* emit a node
				2145	* Return pointer to generated code.
				2146	*/
				2147	static char_u *
				2148	regnode(op)
				2149	int op;
				2150	{
				2151	char_u *ret;
				2152
				2153	ret = regcode;
				2154	if (ret == JUST_CALC_SIZE)
				2155	regsize += 3;
				2156	else
				2157	{
				2158	*regcode++ = op;
				2159	regcode++ = NUL; / Null "next" pointer. */
				2160	*regcode++ = NUL;
				2161	}
				2162	return ret;
				2163	}
				2164
				2165	/*
				2166	* Emit (if appropriate) a byte of code
				2167	*/
				2168	static void
				2169	regc(b)
				2170	int b;
				2171	{
				2172	if (regcode == JUST_CALC_SIZE)
				2173	regsize++;
				2174	else
				2175	*regcode++ = b;
				2176	}
				2177
				2178	#ifdef FEAT_MBYTE
				2179	/*
				2180	* Emit (if appropriate) a multi-byte character of code
				2181	*/
				2182	static void
				2183	regmbc(c)
				2184	int c;
				2185	{
				2186	if (regcode == JUST_CALC_SIZE)
				2187	regsize += (*mb_char2len)(c);
				2188	else
				2189	regcode += (*mb_char2bytes)(c, regcode);
				2190	}
				2191	#endif
				2192
				2193	/*
				2194	* reginsert - insert an operator in front of already-emitted operand
				2195	*
				2196	* Means relocating the operand.
				2197	*/
				2198	static void
				2199	reginsert(op, opnd)
				2200	int op;
				2201	char_u *opnd;
				2202	{
				2203	char_u *src;
				2204	char_u *dst;
				2205	char_u *place;
				2206
				2207	if (regcode == JUST_CALC_SIZE)
				2208	{
				2209	regsize += 3;
				2210	return;
				2211	}
				2212	src = regcode;
				2213	regcode += 3;
				2214	dst = regcode;
				2215	while (src > opnd)
				2216	--dst = --src;
				2217
				2218	place = opnd; /* Op node, where operand used to be. */
				2219	*place++ = op;
				2220	*place++ = NUL;
				2221	*place = NUL;
				2222	}
				2223
				2224	/*
				2225	* reginsert_limits - insert an operator in front of already-emitted operand.
				2226	* The operator has the given limit values as operands. Also set next pointer.
				2227	*
				2228	* Means relocating the operand.
				2229	*/
				2230	static void
				2231	reginsert_limits(op, minval, maxval, opnd)
				2232	int op;
				2233	long minval;
				2234	long maxval;
				2235	char_u *opnd;
				2236	{
				2237	char_u *src;
				2238	char_u *dst;
				2239	char_u *place;
				2240
				2241	if (regcode == JUST_CALC_SIZE)
				2242	{
				2243	regsize += 11;
				2244	return;
				2245	}
				2246	src = regcode;
				2247	regcode += 11;
				2248	dst = regcode;
				2249	while (src > opnd)
				2250	--dst = --src;
				2251
				2252	place = opnd; /* Op node, where operand used to be. */
				2253	*place++ = op;
				2254	*place++ = NUL;
				2255	*place++ = NUL;
				2256	place = re_put_long(place, (long_u)minval);
				2257	place = re_put_long(place, (long_u)maxval);
				2258	regtail(opnd, place);
				2259	}
				2260
				2261	/*
				2262	* Write a long as four bytes at "p" and return pointer to the next char.
				2263	*/
				2264	static char_u *
				2265	re_put_long(p, val)
				2266	char_u *p;
				2267	long_u val;
				2268	{
				2269	*p++ = (char_u) ((val >> 24) & 0377);
				2270	*p++ = (char_u) ((val >> 16) & 0377);
				2271	*p++ = (char_u) ((val >> 8) & 0377);
				2272	*p++ = (char_u) (val & 0377);
				2273	return p;
				2274	}
				2275
				2276	/*
				2277	* regtail - set the next-pointer at the end of a node chain
				2278	*/
				2279	static void
				2280	regtail(p, val)
				2281	char_u *p;
				2282	char_u *val;
				2283	{
				2284	char_u *scan;
				2285	char_u *temp;
				2286	int offset;
				2287
				2288	if (p == JUST_CALC_SIZE)
				2289	return;
				2290
				2291	/* Find last node. */
				2292	scan = p;
				2293	for (;;)
				2294	{
				2295	temp = regnext(scan);
				2296	if (temp == NULL)
				2297	break;
				2298	scan = temp;
				2299	}
				2300
				2301	if (OP(scan) == BACK)
				2302	offset = (int)(scan - val);
				2303	else
				2304	offset = (int)(val - scan);
				2305	*(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
				2306	*(scan + 2) = (char_u) (offset & 0377);
				2307	}
				2308
				2309	/*
				2310	* regoptail - regtail on item after a BRANCH; nop if none
				2311	*/
				2312	static void
				2313	regoptail(p, val)
				2314	char_u *p;
				2315	char_u *val;
				2316	{
				2317	/* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
				2318	if (p == NULL \|\| p == JUST_CALC_SIZE
				2319	\|\| (OP(p) != BRANCH
				2320	&& (OP(p) < BRACE_COMPLEX \|\| OP(p) > BRACE_COMPLEX + 9)))
				2321	return;
				2322	regtail(OPERAND(p), val);
				2323	}
				2324
				2325	/*
				2326	* getchr() - get the next character from the pattern. We know about
				2327	* magic and such, so therefore we need a lexical analyzer.
				2328	*/
				2329
				2330	/* static int curchr; */
				2331	static int prevprevchr;
				2332	static int prevchr;
				2333	static int nextchr; /* used for ungetchr() */
				2334	/*
				2335	* Note: prevchr is sometimes -1 when we are not at the start,
				2336	* eg in /[ ^I]^ the pattern was never found even if it existed, because ^ was
				2337	* taken to be magic -- webb
				2338	*/
				2339	static int at_start; /* True when on the first character */
				2340	static int prev_at_start; /* True when on the second character */
				2341
				2342	static void
				2343	initchr(str)
				2344	char_u *str;
				2345	{
				2346	regparse = str;
				2347	prevchr_len = 0;
				2348	curchr = prevprevchr = prevchr = nextchr = -1;
				2349	at_start = TRUE;
				2350	prev_at_start = FALSE;
				2351	}
				2352
				2353	static int
				2354	peekchr()
				2355	{
				2356	if (curchr == -1)
				2357	{
				2358	switch (curchr = regparse[0])
				2359	{
				2360	case '.':
				2361	case '[':
				2362	case '~':
				2363	/* magic when 'magic' is on */
				2364	if (reg_magic >= MAGIC_ON)
				2365	curchr = Magic(curchr);
				2366	break;
				2367	case '(':
				2368	case ')':
				2369	case '{':
				2370	case '%':
				2371	case '+':
				2372	case '=':
				2373	case '?':
				2374	case '@':
				2375	case '!':
				2376	case '&':
				2377	case '\|':
				2378	case '<':
				2379	case '>':
				2380	case '#': /* future ext. */
				2381	case '"': /* future ext. */
				2382	case '\'': /* future ext. */
				2383	case ',': /* future ext. */
				2384	case '-': /* future ext. */
				2385	case ':': /* future ext. */
				2386	case ';': /* future ext. */
				2387	case '`': /* future ext. */
				2388	case '/': /* Can't be used in / command */
				2389	/* magic only after "\v" */
				2390	if (reg_magic == MAGIC_ALL)
				2391	curchr = Magic(curchr);
				2392	break;
				2393	case '*':
				2394	/* * is not magic as the very first character, eg "?*ptr" and when
				2395	* after '^', eg "/^ptr" /
				2396	if (reg_magic >= MAGIC_ON && !at_start
				2397	&& !(prev_at_start && prevchr == Magic('^')))
				2398	curchr = Magic('*');
				2399	break;
				2400	case '^':
				2401	/* '^' is only magic as the very first character and if it's after
				2402	* "\(", "\\|", "\&' or "\n" */
				2403	if (reg_magic >= MAGIC_OFF
				2404	&& (at_start
				2405	\|\| reg_magic == MAGIC_ALL
				2406	\|\| prevchr == Magic('(')
				2407	\|\| prevchr == Magic('\|')
				2408	\|\| prevchr == Magic('&')
				2409	\|\| prevchr == Magic('n')
				2410	\|\| (no_Magic(prevchr) == '('
				2411	&& prevprevchr == Magic('%'))))
				2412	{
				2413	curchr = Magic('^');
				2414	at_start = TRUE;
				2415	prev_at_start = FALSE;
				2416	}
				2417	break;
				2418	case '$':
				2419	/* '$' is only magic as the very last char and if it's in front of
				2420	* either "\\|", "\)", "\&", or "\n" */
				2421	if (reg_magic >= MAGIC_OFF)
				2422	{
				2423	char_u *p = regparse + 1;
				2424
				2425	/* ignore \c \C \m and \M after '$' */
				2426	while (p[0] == '\\' && (p[1] == 'c' \|\| p[1] == 'C'
				2427	\|\| p[1] == 'm' \|\| p[1] == 'M' \|\| p[1] == 'Z'))
				2428	p += 2;
				2429	if (p[0] == NUL
				2430	\|\| (p[0] == '\\'
				2431	&& (p[1] == '\|' \|\| p[1] == '&' \|\| p[1] == ')'
				2432	\|\| p[1] == 'n'))
				2433	\|\| reg_magic == MAGIC_ALL)
				2434	curchr = Magic('$');
				2435	}
				2436	break;
				2437	case '\\':
				2438	{
				2439	int c = regparse[1];
				2440
				2441	if (c == NUL)
				2442	curchr = '\\'; /* trailing '\' */
				2443	else if (
				2444	#ifdef EBCDIC
				2445	vim_strchr(META, c)
				2446	#else
				2447	c <= '~' && META_flags[c]
				2448	#endif
				2449	)
				2450	{
				2451	/*
				2452	* META contains everything that may be magic sometimes,
				2453	* except ^ and $ ("\^" and "\$" are only magic after
				2454	* "\v"). We now fetch the next character and toggle its
				2455	* magicness. Therefore, \ is so meta-magic that it is
				2456	* not in META.
				2457	*/
				2458	curchr = -1;
				2459	prev_at_start = at_start;
				2460	at_start = FALSE; /* be able to say "/\ptr" /
				2461	++regparse;
				2462	peekchr();
				2463	--regparse;
				2464	curchr = toggle_Magic(curchr);
				2465	}
				2466	else if (vim_strchr(REGEXP_ABBR, c))
				2467	{
				2468	/*
				2469	* Handle abbreviations, like "\t" for TAB -- webb
				2470	*/
				2471	curchr = backslash_trans(c);
				2472	}
				2473	else if (reg_magic == MAGIC_NONE && (c == '$' \|\| c == '^'))
				2474	curchr = toggle_Magic(c);
				2475	else
				2476	{
				2477	/*
				2478	* Next character can never be (made) magic?
				2479	* Then backslashing it won't do anything.
				2480	*/
				2481	#ifdef FEAT_MBYTE
				2482	if (has_mbyte)
				2483	curchr = (*mb_ptr2char)(regparse + 1);
				2484	else
				2485	#endif
				2486	curchr = c;
				2487	}
				2488	break;
				2489	}
				2490
				2491	#ifdef FEAT_MBYTE
				2492	default:
				2493	if (has_mbyte)
				2494	curchr = (*mb_ptr2char)(regparse);
				2495	#endif
				2496	}
				2497	}
				2498
				2499	return curchr;
				2500	}
				2501
				2502	/*
				2503	* Eat one lexed character. Do this in a way that we can undo it.
				2504	*/
				2505	static void
				2506	skipchr()
				2507	{
				2508	/* peekchr() eats a backslash, do the same here */
				2509	if (*regparse == '\\')
				2510	prevchr_len = 1;
				2511	else
				2512	prevchr_len = 0;
				2513	if (regparse[prevchr_len] != NUL)
				2514	{
				2515	#ifdef FEAT_MBYTE
				2516	if (has_mbyte)
				2517	prevchr_len += (*mb_ptr2len_check)(regparse + prevchr_len);
				2518	else
				2519	#endif
				2520	++prevchr_len;
				2521	}
				2522	regparse += prevchr_len;
				2523	prev_at_start = at_start;
				2524	at_start = FALSE;
				2525	prevprevchr = prevchr;
				2526	prevchr = curchr;
				2527	curchr = nextchr; /* use previously unget char, or -1 */
				2528	nextchr = -1;
				2529	}
				2530
				2531	/*
				2532	* Skip a character while keeping the value of prev_at_start for at_start.
				2533	* prevchr and prevprevchr are also kept.
				2534	*/
				2535	static void
				2536	skipchr_keepstart()
				2537	{
				2538	int as = prev_at_start;
				2539	int pr = prevchr;
				2540	int prpr = prevprevchr;
				2541
				2542	skipchr();
				2543	at_start = as;
				2544	prevchr = pr;
				2545	prevprevchr = prpr;
				2546	}
				2547
				2548	static int
				2549	getchr()
				2550	{
				2551	int chr = peekchr();
				2552
				2553	skipchr();
				2554	return chr;
				2555	}
				2556
				2557	/*
				2558	* put character back. Works only once!
				2559	*/
				2560	static void
				2561	ungetchr()
				2562	{
				2563	nextchr = curchr;
				2564	curchr = prevchr;
				2565	prevchr = prevprevchr;
				2566	at_start = prev_at_start;
				2567	prev_at_start = FALSE;
				2568
				2569	/* Backup regparse, so that it's at the same position as before the
				2570	* getchr(). */
				2571	regparse -= prevchr_len;
				2572	}
				2573
				2574	/*
Bram Moolenaar	7b0294c	2004-10-11 10:16:09 +0000	[diff] [blame]	2575	* Get and return the value of the hex string at the current position.
				2576	* Return -1 if there is no valid hex number.
				2577	* The position is updated:
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	2578	* blahblah\%x20asdf
				2579	* before-^ ^-after
				2580	* The parameter controls the maximum number of input characters. This will be
				2581	* 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
				2582	*/
				2583	static int
				2584	gethexchrs(maxinputlen)
				2585	int maxinputlen;
				2586	{
				2587	int nr = 0;
				2588	int c;
				2589	int i;
				2590
				2591	for (i = 0; i < maxinputlen; ++i)
				2592	{
				2593	c = regparse[0];
				2594	if (!vim_isxdigit(c))
				2595	break;
				2596	nr <<= 4;
				2597	nr \|= hex2nr(c);
				2598	++regparse;
				2599	}
				2600
				2601	if (i == 0)
				2602	return -1;
				2603	return nr;
				2604	}
				2605
				2606	/*
				2607	* get and return the value of the decimal string immediately after the
				2608	* current position. Return -1 for invalid. Consumes all digits.
				2609	*/
				2610	static int
				2611	getdecchrs()
				2612	{
				2613	int nr = 0;
				2614	int c;
				2615	int i;
				2616
				2617	for (i = 0; ; ++i)
				2618	{
				2619	c = regparse[0];
				2620	if (c < '0' \|\| c > '9')
				2621	break;
				2622	nr *= 10;
				2623	nr += c - '0';
				2624	++regparse;
				2625	}
				2626
				2627	if (i == 0)
				2628	return -1;
				2629	return nr;
				2630	}
				2631
				2632	/*
				2633	* get and return the value of the octal string immediately after the current
				2634	* position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
				2635	* numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
				2636	* treat 8 or 9 as recognised characters. Position is updated:
				2637	* blahblah\%o210asdf
				2638	* before-^ ^-after
				2639	*/
				2640	static int
				2641	getoctchrs()
				2642	{
				2643	int nr = 0;
				2644	int c;
				2645	int i;
				2646
				2647	for (i = 0; i < 3 && nr < 040; ++i)
				2648	{
				2649	c = regparse[0];
				2650	if (c < '0' \|\| c > '7')
				2651	break;
				2652	nr <<= 3;
				2653	nr \|= hex2nr(c);
				2654	++regparse;
				2655	}
				2656
				2657	if (i == 0)
				2658	return -1;
				2659	return nr;
				2660	}
				2661
				2662	/*
				2663	* Get a number after a backslash that is inside [].
				2664	* When nothing is recognized return a backslash.
				2665	*/
				2666	static int
				2667	coll_get_char()
				2668	{
				2669	int nr = -1;
				2670
				2671	switch (*regparse++)
				2672	{
				2673	case 'd': nr = getdecchrs(); break;
				2674	case 'o': nr = getoctchrs(); break;
				2675	case 'x': nr = gethexchrs(2); break;
				2676	case 'u': nr = gethexchrs(4); break;
				2677	case 'U': nr = gethexchrs(8); break;
				2678	}
				2679	if (nr < 0)
				2680	{
				2681	/* If getting the number fails be backwards compatible: the character
				2682	* is a backslash. */
				2683	--regparse;
				2684	nr = '\\';
				2685	}
				2686	return nr;
				2687	}
				2688
				2689	/*
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2690	* read_limits - Read two integers to be taken as a minimum and maximum.
				2691	* If the first character is '-', then the range is reversed.
				2692	* Should end with 'end'. If minval is missing, zero is default, if maxval is
				2693	* missing, a very big number is the default.
				2694	*/
				2695	static int
				2696	read_limits(minval, maxval)
				2697	long *minval;
				2698	long *maxval;
				2699	{
				2700	int reverse = FALSE;
				2701	char_u *first_char;
				2702	long tmp;
				2703
				2704	if (*regparse == '-')
				2705	{
				2706	/* Starts with '-', so reverse the range later */
				2707	regparse++;
				2708	reverse = TRUE;
				2709	}
				2710	first_char = regparse;
				2711	*minval = getdigits(&regparse);
				2712	if (regparse == ',') / There is a comma */
				2713	{
				2714	if (vim_isdigit(*++regparse))
				2715	*maxval = getdigits(&regparse);
				2716	else
				2717	*maxval = MAX_LIMIT;
				2718	}
				2719	else if (VIM_ISDIGIT(*first_char))
				2720	maxval = minval; /* It was \{n} or \{-n} */
				2721	else
				2722	maxval = MAX_LIMIT; / It was \{} or \{-} */
				2723	if (*regparse == '\\')
				2724	regparse++; /* Allow either \{...} or \{...\} */
				2725	if (regparse != '}' \|\| (maxval == 0 && *minval == 0))
				2726	{
				2727	sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"),
				2728	reg_magic == MAGIC_ALL ? "" : "\\");
				2729	EMSG_RET_FAIL(IObuff);
				2730	}
				2731
				2732	/*
				2733	* Reverse the range if there was a '-', or make sure it is in the right
				2734	* order otherwise.
				2735	*/
				2736	if ((!reverse && minval > maxval) \|\| (reverse && minval < maxval))
				2737	{
				2738	tmp = *minval;
				2739	minval = maxval;
				2740	*maxval = tmp;
				2741	}
				2742	skipchr(); /* let's be friends with the lexer again */
				2743	return OK;
				2744	}
				2745
				2746	/*
				2747	* vim_regexec and friends
				2748	*/
				2749
				2750	/*
				2751	* Global work variables for vim_regexec().
				2752	*/
				2753
				2754	/* The current match-position is remembered with these variables: */
				2755	static linenr_T reglnum; /* line number, relative to first line */
				2756	static char_u regline; / start of current line */
				2757	static char_u reginput; / current input, points into "regline" */
				2758
				2759	static int need_clear_subexpr; /* subexpressions still need to be
				2760	* cleared */
				2761	#ifdef FEAT_SYN_HL
				2762	static int need_clear_zsubexpr = FALSE; /* extmatch subexpressions
				2763	* still need to be cleared */
				2764	#endif
				2765
				2766	static int out_of_stack; /* TRUE when ran out of stack space */
				2767
				2768	/*
				2769	* Structure used to save the current input state, when it needs to be
				2770	* restored after trying a match. Used by reg_save() and reg_restore().
				2771	*/
				2772	typedef struct
				2773	{
				2774	union
				2775	{
				2776	char_u ptr; / reginput pointer, for single-line regexp */
				2777	lpos_T pos; /* reginput pos, for multi-line regexp */
				2778	} rs_u;
				2779	} regsave_T;
				2780
				2781	/* struct to save start/end pointer/position in for */
				2782	typedef struct
				2783	{
				2784	union
				2785	{
				2786	char_u *ptr;
				2787	lpos_T pos;
				2788	} se_u;
				2789	} save_se_T;
				2790
				2791	static char_u *reg_getline __ARGS((linenr_T lnum));
				2792	static long vim_regexec_both __ARGS((char_u *line, colnr_T col));
				2793	static long regtry __ARGS((regprog_T *prog, colnr_T col));
				2794	static void cleanup_subexpr __ARGS((void));
				2795	#ifdef FEAT_SYN_HL
				2796	static void cleanup_zsubexpr __ARGS((void));
				2797	#endif
				2798	static void reg_nextline __ARGS((void));
				2799	static void reg_save __ARGS((regsave_T *save));
				2800	static void reg_restore __ARGS((regsave_T *save));
				2801	static int reg_save_equal __ARGS((regsave_T *save));
				2802	static void save_se_multi __ARGS((save_se_T savep, lpos_T posp));
				2803	static void save_se_one __ARGS((save_se_T savep, char_u *pp));
				2804
				2805	/* Save the sub-expressions before attempting a match. */
				2806	#define save_se(savep, posp, pp) \
				2807	REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
				2808
				2809	/* After a failed match restore the sub-expressions. */
				2810	#define restore_se(savep, posp, pp) { \
				2811	if (REG_MULTI) \
				2812	*(posp) = (savep)->se_u.pos; \
				2813	else \
				2814	*(pp) = (savep)->se_u.ptr; }
				2815
				2816	static int re_num_cmp __ARGS((long_u val, char_u *scan));
				2817	static int regmatch __ARGS((char_u *prog));
				2818	static int regrepeat __ARGS((char_u *p, long maxcount));
				2819
				2820	#ifdef DEBUG
				2821	int regnarrate = 0;
				2822	#endif
				2823
				2824	/*
				2825	* Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
				2826	* Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
				2827	* contains '\c' or '\C' the value is overruled.
				2828	*/
				2829	static int ireg_ic;
				2830
				2831	#ifdef FEAT_MBYTE
				2832	/*
				2833	* Similar to ireg_ic, but only for 'combining' characters. Set with \Z flag
				2834	* in the regexp. Defaults to false, always.
				2835	*/
				2836	static int ireg_icombine;
				2837	#endif
				2838
				2839	/*
				2840	* Sometimes need to save a copy of a line. Since alloc()/free() is very
				2841	* slow, we keep one allocated piece of memory and only re-allocate it when
				2842	* it's too small. It's freed in vim_regexec_both() when finished.
				2843	*/
				2844	static char_u *reg_tofree;
				2845	static unsigned reg_tofreelen;
				2846
				2847	/*
				2848	* These variables are set when executing a regexp to speed up the execution.
				2849	* Which ones are set depends on whethere a single-line or multi-line match is
				2850	* done:
				2851	* single-line multi-line
				2852	* reg_match &regmatch_T NULL
				2853	* reg_mmatch NULL &regmmatch_T
				2854	* reg_startp reg_match->startp <invalid>
				2855	* reg_endp reg_match->endp <invalid>
				2856	* reg_startpos <invalid> reg_mmatch->startpos
				2857	* reg_endpos <invalid> reg_mmatch->endpos
				2858	* reg_win NULL window in which to search
				2859	* reg_buf <invalid> buffer in which to search
				2860	* reg_firstlnum <invalid> first line in which to search
				2861	* reg_maxline 0 last line nr
				2862	* reg_line_lbr FALSE or TRUE FALSE
				2863	*/
				2864	static regmatch_T *reg_match;
				2865	static regmmatch_T *reg_mmatch;
				2866	static char_u **reg_startp = NULL;
				2867	static char_u **reg_endp = NULL;
				2868	static lpos_T *reg_startpos = NULL;
				2869	static lpos_T *reg_endpos = NULL;
				2870	static win_T *reg_win;
				2871	static buf_T *reg_buf;
				2872	static linenr_T reg_firstlnum;
				2873	static linenr_T reg_maxline;
				2874	static int reg_line_lbr; /* "\n" in string is line break */
				2875
				2876	/*
				2877	* Get pointer to the line "lnum", which is relative to "reg_firstlnum".
				2878	*/
				2879	static char_u *
				2880	reg_getline(lnum)
				2881	linenr_T lnum;
				2882	{
				2883	/* when looking behind for a match/no-match lnum is negative. But we
				2884	* can't go before line 1 */
				2885	if (reg_firstlnum + lnum < 1)
				2886	return NULL;
				2887	return ml_get_buf(reg_buf, reg_firstlnum + lnum, FALSE);
				2888	}
				2889
				2890	static regsave_T behind_pos;
				2891
				2892	#ifdef FEAT_SYN_HL
				2893	static char_u reg_startzp[NSUBEXP]; / Workspace to mark beginning */
				2894	static char_u reg_endzp[NSUBEXP]; / and end of \z(...\) matches */
				2895	static lpos_T reg_startzpos[NSUBEXP]; /* idem, beginning pos */
				2896	static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
				2897	#endif
				2898
				2899	/* TRUE if using multi-line regexp. */
				2900	#define REG_MULTI (reg_match == NULL)
				2901
				2902	/*
				2903	* Match a regexp against a string.
				2904	* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
				2905	* Uses curbuf for line count and 'iskeyword'.
				2906	*
				2907	* Return TRUE if there is a match, FALSE if not.
				2908	*/
				2909	int
				2910	vim_regexec(rmp, line, col)
				2911	regmatch_T *rmp;
				2912	char_u line; / string to match against */
				2913	colnr_T col; /* column to start looking for match */
				2914	{
				2915	reg_match = rmp;
				2916	reg_mmatch = NULL;
				2917	reg_maxline = 0;
				2918	reg_line_lbr = FALSE;
				2919	reg_win = NULL;
				2920	ireg_ic = rmp->rm_ic;
				2921	#ifdef FEAT_MBYTE
				2922	ireg_icombine = FALSE;
				2923	#endif
				2924	return (vim_regexec_both(line, col) != 0);
				2925	}
				2926
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	2927	#if defined(FEAT_MODIFY_FNAME) \|\| defined(FEAT_EVAL) \
				2928	\|\| defined(FIND_REPLACE_DIALOG) \|\| defined(PROTO)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2929	/*
				2930	* Like vim_regexec(), but consider a "\n" in "line" to be a line break.
				2931	*/
				2932	int
				2933	vim_regexec_nl(rmp, line, col)
				2934	regmatch_T *rmp;
				2935	char_u line; / string to match against */
				2936	colnr_T col; /* column to start looking for match */
				2937	{
				2938	reg_match = rmp;
				2939	reg_mmatch = NULL;
				2940	reg_maxline = 0;
				2941	reg_line_lbr = TRUE;
				2942	reg_win = NULL;
				2943	ireg_ic = rmp->rm_ic;
				2944	#ifdef FEAT_MBYTE
				2945	ireg_icombine = FALSE;
				2946	#endif
				2947	return (vim_regexec_both(line, col) != 0);
				2948	}
				2949	#endif
				2950
				2951	/*
				2952	* Match a regexp against multiple lines.
				2953	* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
				2954	* Uses curbuf for line count and 'iskeyword'.
				2955	*
				2956	* Return zero if there is no match. Return number of lines contained in the
				2957	* match otherwise.
				2958	*/
				2959	long
				2960	vim_regexec_multi(rmp, win, buf, lnum, col)
				2961	regmmatch_T *rmp;
				2962	win_T win; / window in which to search or NULL */
				2963	buf_T buf; / buffer in which to search */
				2964	linenr_T lnum; /* nr of line to start looking for match */
				2965	colnr_T col; /* column to start looking for match */
				2966	{
				2967	long r;
				2968	buf_T *save_curbuf = curbuf;
				2969
				2970	reg_match = NULL;
				2971	reg_mmatch = rmp;
				2972	reg_buf = buf;
				2973	reg_win = win;
				2974	reg_firstlnum = lnum;
				2975	reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
				2976	reg_line_lbr = FALSE;
				2977	ireg_ic = rmp->rmm_ic;
				2978	#ifdef FEAT_MBYTE
				2979	ireg_icombine = FALSE;
				2980	#endif
				2981
				2982	/* Need to switch to buffer "buf" to make vim_iswordc() work. */
				2983	curbuf = buf;
				2984	r = vim_regexec_both(NULL, col);
				2985	curbuf = save_curbuf;
				2986
				2987	return r;
				2988	}
				2989
				2990	/*
				2991	* Match a regexp against a string ("line" points to the string) or multiple
				2992	* lines ("line" is NULL, use reg_getline()).
				2993	*/
				2994	#ifdef HAVE_SETJMP_H
				2995	static long
				2996	vim_regexec_both(line_arg, col_arg)
				2997	char_u *line_arg;
				2998	colnr_T col_arg; /* column to start looking for match */
				2999	#else
				3000	static long
				3001	vim_regexec_both(line, col)
				3002	char_u *line;
				3003	colnr_T col; /* column to start looking for match */
				3004	#endif
				3005	{
				3006	regprog_T *prog;
				3007	char_u *s;
				3008	long retval;
				3009	#ifdef HAVE_SETJMP_H
				3010	char_u *line;
				3011	colnr_T col;
				3012	#endif
				3013
				3014	reg_tofree = NULL;
				3015
				3016	#ifdef HAVE_TRY_EXCEPT
				3017	__try
				3018	{
				3019	#endif
				3020
				3021	#ifdef HAVE_SETJMP_H
				3022	/*
				3023	* Matching with a regexp may cause a very deep recursive call of
				3024	* regmatch(). Vim will crash when running out of stack space. Catch
				3025	* this here if the system supports it.
				3026	*/
				3027	mch_startjmp();
				3028	if (SETJMP(lc_jump_env) != 0)
				3029	{
				3030	mch_didjmp();
				3031	# ifdef SIGHASARG
				3032	if (lc_signal != SIGINT)
				3033	# endif
				3034	EMSG(_("E361: Crash intercepted; regexp too complex?"));
				3035	retval = 0L;
				3036	goto theend;
				3037	}
				3038
				3039	/* Trick to avoid "might be clobbered by `longjmp'" warning from gcc. */
				3040	line = line_arg;
				3041	col = col_arg;
				3042	#endif
				3043	retval = 0L;
				3044
				3045	if (REG_MULTI)
				3046	{
				3047	prog = reg_mmatch->regprog;
				3048	line = reg_getline((linenr_T)0);
				3049	reg_startpos = reg_mmatch->startpos;
				3050	reg_endpos = reg_mmatch->endpos;
				3051	}
				3052	else
				3053	{
				3054	prog = reg_match->regprog;
				3055	reg_startp = reg_match->startp;
				3056	reg_endp = reg_match->endp;
				3057	}
				3058
				3059	/* Be paranoid... */
				3060	if (prog == NULL \|\| line == NULL)
				3061	{
				3062	EMSG(_(e_null));
				3063	goto theend;
				3064	}
				3065
				3066	/* Check validity of program. */
				3067	if (prog_magic_wrong())
				3068	goto theend;
				3069
				3070	/* If pattern contains "\c" or "\C": overrule value of ireg_ic */
				3071	if (prog->regflags & RF_ICASE)
				3072	ireg_ic = TRUE;
				3073	else if (prog->regflags & RF_NOICASE)
				3074	ireg_ic = FALSE;
				3075
				3076	#ifdef FEAT_MBYTE
				3077	/* If pattern contains "\Z" overrule value of ireg_icombine */
				3078	if (prog->regflags & RF_ICOMBINE)
				3079	ireg_icombine = TRUE;
				3080	#endif
				3081
				3082	/* If there is a "must appear" string, look for it. */
				3083	if (prog->regmust != NULL)
				3084	{
				3085	int c;
				3086
				3087	#ifdef FEAT_MBYTE
				3088	if (has_mbyte)
				3089	c = (*mb_ptr2char)(prog->regmust);
				3090	else
				3091	#endif
				3092	c = *prog->regmust;
				3093	s = line + col;
				3094	while ((s = cstrchr(s, c)) != NULL)
				3095	{
				3096	if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
				3097	break; /* Found it. */
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	3098	mb_ptr_adv(s);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3099	}
				3100	if (s == NULL) /* Not present. */
				3101	goto theend;
				3102	}
				3103
				3104	regline = line;
				3105	reglnum = 0;
				3106	out_of_stack = FALSE;
				3107
				3108	/* Simplest case: Anchored match need be tried only once. */
				3109	if (prog->reganch)
				3110	{
				3111	int c;
				3112
				3113	#ifdef FEAT_MBYTE
				3114	if (has_mbyte)
				3115	c = (*mb_ptr2char)(regline + col);
				3116	else
				3117	#endif
				3118	c = regline[col];
				3119	if (prog->regstart == NUL
				3120	\|\| prog->regstart == c
				3121	\|\| (ireg_ic && ((
				3122	#ifdef FEAT_MBYTE
				3123	(enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
				3124	\|\| (c < 255 && prog->regstart < 255 &&
				3125	#endif
				3126	TOLOWER_LOC(prog->regstart) == TOLOWER_LOC(c)))))
				3127	retval = regtry(prog, col);
				3128	else
				3129	retval = 0;
				3130	}
				3131	else
				3132	{
				3133	/* Messy cases: unanchored match. */
				3134	while (!got_int && !out_of_stack)
				3135	{
				3136	if (prog->regstart != NUL)
				3137	{
				3138	/* Skip until the char we know it must start with. */
				3139	s = cstrchr(regline + col, prog->regstart);
				3140	if (s == NULL)
				3141	{
				3142	retval = 0;
				3143	break;
				3144	}
				3145	col = (int)(s - regline);
				3146	}
				3147
				3148	retval = regtry(prog, col);
				3149	if (retval > 0)
				3150	break;
				3151
				3152	/* if not currently on the first line, get it again */
				3153	if (reglnum != 0)
				3154	{
				3155	regline = reg_getline((linenr_T)0);
				3156	reglnum = 0;
				3157	}
				3158	if (regline[col] == NUL)
				3159	break;
				3160	#ifdef FEAT_MBYTE
				3161	if (has_mbyte)
				3162	col += (*mb_ptr2len_check)(regline + col);
				3163	else
				3164	#endif
				3165	++col;
				3166	}
				3167	}
				3168
				3169	if (out_of_stack)
				3170	EMSG(_("E363: pattern caused out-of-stack error"));
				3171
				3172	#ifdef HAVE_TRY_EXCEPT
				3173	}
				3174	__except(EXCEPTION_EXECUTE_HANDLER)
				3175	{
				3176	if (GetExceptionCode() == EXCEPTION_STACK_OVERFLOW)
				3177	{
				3178	RESETSTKOFLW();
				3179	EMSG(_("E363: pattern caused out-of-stack error"));
				3180	}
				3181	else
				3182	EMSG(_("E361: Crash intercepted; regexp too complex?"));
				3183	retval = 0L;
				3184	}
				3185	#endif
				3186
				3187	theend:
				3188	/* Didn't find a match. */
				3189	vim_free(reg_tofree);
				3190	#ifdef HAVE_SETJMP_H
				3191	mch_endjmp();
				3192	#endif
				3193	return retval;
				3194	}
				3195
				3196	#ifdef FEAT_SYN_HL
				3197	static reg_extmatch_T *make_extmatch __ARGS((void));
				3198
				3199	/*
				3200	* Create a new extmatch and mark it as referenced once.
				3201	*/
				3202	static reg_extmatch_T *
				3203	make_extmatch()
				3204	{
				3205	reg_extmatch_T *em;
				3206
				3207	em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T));
				3208	if (em != NULL)
				3209	em->refcnt = 1;
				3210	return em;
				3211	}
				3212
				3213	/*
				3214	* Add a reference to an extmatch.
				3215	*/
				3216	reg_extmatch_T *
				3217	ref_extmatch(em)
				3218	reg_extmatch_T *em;
				3219	{
				3220	if (em != NULL)
				3221	em->refcnt++;
				3222	return em;
				3223	}
				3224
				3225	/*
				3226	* Remove a reference to an extmatch. If there are no references left, free
				3227	* the info.
				3228	*/
				3229	void
				3230	unref_extmatch(em)
				3231	reg_extmatch_T *em;
				3232	{
				3233	int i;
				3234
				3235	if (em != NULL && --em->refcnt <= 0)
				3236	{
				3237	for (i = 0; i < NSUBEXP; ++i)
				3238	vim_free(em->matches[i]);
				3239	vim_free(em);
				3240	}
				3241	}
				3242	#endif
				3243
				3244	/*
				3245	* regtry - try match of "prog" with at regline["col"].
				3246	* Returns 0 for failure, number of lines contained in the match otherwise.
				3247	*/
				3248	static long
				3249	regtry(prog, col)
				3250	regprog_T *prog;
				3251	colnr_T col;
				3252	{
				3253	reginput = regline + col;
				3254	need_clear_subexpr = TRUE;
				3255	#ifdef FEAT_SYN_HL
				3256	/* Clear the external match subpointers if necessary. */
				3257	if (prog->reghasz == REX_SET)
				3258	need_clear_zsubexpr = TRUE;
				3259	#endif
				3260
				3261	if (regmatch(prog->program + 1))
				3262	{
				3263	cleanup_subexpr();
				3264	if (REG_MULTI)
				3265	{
				3266	if (reg_startpos[0].lnum < 0)
				3267	{
				3268	reg_startpos[0].lnum = 0;
				3269	reg_startpos[0].col = col;
				3270	}
				3271	if (reg_endpos[0].lnum < 0)
				3272	{
				3273	reg_endpos[0].lnum = reglnum;
				3274	reg_endpos[0].col = (int)(reginput - regline);
				3275	}
				3276	else
				3277	/* Use line number of "\ze". */
				3278	reglnum = reg_endpos[0].lnum;
				3279	}
				3280	else
				3281	{
				3282	if (reg_startp[0] == NULL)
				3283	reg_startp[0] = regline + col;
				3284	if (reg_endp[0] == NULL)
				3285	reg_endp[0] = reginput;
				3286	}
				3287	#ifdef FEAT_SYN_HL
				3288	/* Package any found \z(...\) matches for export. Default is none. */
				3289	unref_extmatch(re_extmatch_out);
				3290	re_extmatch_out = NULL;
				3291
				3292	if (prog->reghasz == REX_SET)
				3293	{
				3294	int i;
				3295
				3296	cleanup_zsubexpr();
				3297	re_extmatch_out = make_extmatch();
				3298	for (i = 0; i < NSUBEXP; i++)
				3299	{
				3300	if (REG_MULTI)
				3301	{
				3302	/* Only accept single line matches. */
				3303	if (reg_startzpos[i].lnum >= 0
				3304	&& reg_endzpos[i].lnum == reg_startzpos[i].lnum)
				3305	re_extmatch_out->matches[i] =
				3306	vim_strnsave(reg_getline(reg_startzpos[i].lnum)
				3307	+ reg_startzpos[i].col,
				3308	reg_endzpos[i].col - reg_startzpos[i].col);
				3309	}
				3310	else
				3311	{
				3312	if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
				3313	re_extmatch_out->matches[i] =
				3314	vim_strnsave(reg_startzp[i],
				3315	(int)(reg_endzp[i] - reg_startzp[i]));
				3316	}
				3317	}
				3318	}
				3319	#endif
				3320	return 1 + reglnum;
				3321	}
				3322	return 0;
				3323	}
				3324
				3325	#ifdef FEAT_MBYTE
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3326	static int reg_prev_class __ARGS((void));
				3327
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3328	/*
				3329	* Get class of previous character.
				3330	*/
				3331	static int
				3332	reg_prev_class()
				3333	{
				3334	if (reginput > regline)
				3335	return mb_get_class(reginput - 1
				3336	- (*mb_head_off)(regline, reginput - 1));
				3337	return -1;
				3338	}
				3339
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3340	#endif
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	3341	#define ADVANCE_REGINPUT() mb_ptr_adv(reginput)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3342
				3343	/*
				3344	* The arguments from BRACE_LIMITS are stored here. They are actually local
				3345	* to regmatch(), but they are here to reduce the amount of stack space used
				3346	* (it can be called recursively many times).
				3347	*/
				3348	static long bl_minval;
				3349	static long bl_maxval;
				3350
				3351	/*
				3352	* regmatch - main matching routine
				3353	*
				3354	* Conceptually the strategy is simple: Check to see whether the current
				3355	* node matches, call self recursively to see whether the rest matches,
				3356	* and then act accordingly. In practice we make some effort to avoid
				3357	* recursion, in particular by going through "ordinary" nodes (that don't
				3358	* need to know whether the rest of the match failed) by a loop instead of
				3359	* by recursion.
				3360	*
				3361	* Returns TRUE when there is a match. Leaves reginput and reglnum just after
				3362	* the last matched character.
				3363	* Returns FALSE when there is no match. Leaves reginput and reglnum in an
				3364	* undefined state!
				3365	*/
				3366	static int
				3367	regmatch(scan)
				3368	char_u scan; / Current node. */
				3369	{
				3370	char_u next; / Next node. */
				3371	int op;
				3372	int c;
				3373
				3374	#ifdef HAVE_GETRLIMIT
				3375	/* Check if we are running out of stack space. Could be caused by
				3376	* recursively calling ourselves. */
				3377	if (out_of_stack \|\| mch_stackcheck((char *)&op) == FAIL)
				3378	{
				3379	out_of_stack = TRUE;
				3380	return FALSE;
				3381	}
				3382	#endif
				3383
				3384	/* Some patterns my cause a long time to match, even though they are not
				3385	* illegal. E.g., "$[a-z]\+$\+Q". Allow breaking them with CTRL-C. */
				3386	fast_breakcheck();
				3387
				3388	#ifdef DEBUG
				3389	if (scan != NULL && regnarrate)
				3390	{
				3391	mch_errmsg(regprop(scan));
				3392	mch_errmsg("(\n");
				3393	}
				3394	#endif
				3395	while (scan != NULL)
				3396	{
				3397	if (got_int \|\| out_of_stack)
				3398	return FALSE;
				3399	#ifdef DEBUG
				3400	if (regnarrate)
				3401	{
				3402	mch_errmsg(regprop(scan));
				3403	mch_errmsg("...\n");
				3404	# ifdef FEAT_SYN_HL
				3405	if (re_extmatch_in != NULL)
				3406	{
				3407	int i;
				3408
				3409	mch_errmsg(_("External submatches:\n"));
				3410	for (i = 0; i < NSUBEXP; i++)
				3411	{
				3412	mch_errmsg(" \"");
				3413	if (re_extmatch_in->matches[i] != NULL)
				3414	mch_errmsg(re_extmatch_in->matches[i]);
				3415	mch_errmsg("\"\n");
				3416	}
				3417	}
				3418	# endif
				3419	}
				3420	#endif
				3421	next = regnext(scan);
				3422
				3423	op = OP(scan);
				3424	/* Check for character class with NL added. */
				3425	if (WITH_NL(op) && *reginput == NUL && reglnum < reg_maxline)
				3426	{
				3427	reg_nextline();
				3428	}
				3429	else if (reg_line_lbr && WITH_NL(op) && *reginput == '\n')
				3430	{
				3431	ADVANCE_REGINPUT();
				3432	}
				3433	else
				3434	{
				3435	if (WITH_NL(op))
				3436	op -= ADD_NL;
				3437	#ifdef FEAT_MBYTE
				3438	if (has_mbyte)
				3439	c = (*mb_ptr2char)(reginput);
				3440	else
				3441	#endif
				3442	c = *reginput;
				3443	switch (op)
				3444	{
				3445	case BOL:
				3446	if (reginput != regline)
				3447	return FALSE;
				3448	break;
				3449
				3450	case EOL:
				3451	if (c != NUL)
				3452	return FALSE;
				3453	break;
				3454
				3455	case RE_BOF:
				3456	/* Passing -1 to the getline() function provided for the search
				3457	* should always return NULL if the current line is the first
				3458	* line of the file. */
				3459	if (reglnum != 0 \|\| reginput != regline
				3460	\|\| (REG_MULTI && reg_getline((linenr_T)-1) != NULL))
				3461	return FALSE;
				3462	break;
				3463
				3464	case RE_EOF:
				3465	if (reglnum != reg_maxline \|\| c != NUL)
				3466	return FALSE;
				3467	break;
				3468
				3469	case CURSOR:
				3470	/* Check if the buffer is in a window and compare the
				3471	* reg_win->w_cursor position to the match position. */
				3472	if (reg_win == NULL
				3473	\|\| (reglnum + reg_firstlnum != reg_win->w_cursor.lnum)
				3474	\|\| ((colnr_T)(reginput - regline) != reg_win->w_cursor.col))
				3475	return FALSE;
				3476	break;
				3477
				3478	case RE_LNUM:
				3479	if (!REG_MULTI \|\| !re_num_cmp((long_u)(reglnum + reg_firstlnum),
				3480	scan))
				3481	return FALSE;
				3482	break;
				3483
				3484	case RE_COL:
				3485	if (!re_num_cmp((long_u)(reginput - regline) + 1, scan))
				3486	return FALSE;
				3487	break;
				3488
				3489	case RE_VCOL:
				3490	if (!re_num_cmp((long_u)win_linetabsize(
				3491	reg_win == NULL ? curwin : reg_win,
				3492	regline, (colnr_T)(reginput - regline)) + 1, scan))
				3493	return FALSE;
				3494	break;
				3495
				3496	case BOW: /* \<word; reginput points to w */
				3497	if (c == NUL) /* Can't match at end of line */
				3498	return FALSE;
				3499	#ifdef FEAT_MBYTE
				3500	if (has_mbyte)
				3501	{
				3502	int this_class;
				3503
				3504	/* Get class of current and previous char (if it exists). */
				3505	this_class = mb_get_class(reginput);
				3506	if (this_class <= 1)
				3507	return FALSE; /* not on a word at all */
				3508	if (reg_prev_class() == this_class)
				3509	return FALSE; /* previous char is in same word */
				3510	}
				3511	#endif
				3512	else
				3513	{
				3514	if (!vim_iswordc(c)
				3515	\|\| (reginput > regline && vim_iswordc(reginput[-1])))
				3516	return FALSE;
				3517	}
				3518	break;
				3519
				3520	case EOW: /* word\>; reginput points after d */
				3521	if (reginput == regline) /* Can't match at start of line */
				3522	return FALSE;
				3523	#ifdef FEAT_MBYTE
				3524	if (has_mbyte)
				3525	{
				3526	int this_class, prev_class;
				3527
				3528	/* Get class of current and previous char (if it exists). */
				3529	this_class = mb_get_class(reginput);
				3530	prev_class = reg_prev_class();
				3531	if (this_class == prev_class)
				3532	return FALSE;
				3533	if (prev_class == 0 \|\| prev_class == 1)
				3534	return FALSE;
				3535	}
				3536	else
				3537	#endif
				3538	{
				3539	if (!vim_iswordc(reginput[-1]))
				3540	return FALSE;
				3541	if (reginput[0] != NUL && vim_iswordc(c))
				3542	return FALSE;
				3543	}
				3544	break; /* Matched with EOW */
				3545
				3546	case ANY:
				3547	if (c == NUL)
				3548	return FALSE;
				3549	ADVANCE_REGINPUT();
				3550	break;
				3551
				3552	case IDENT:
				3553	if (!vim_isIDc(c))
				3554	return FALSE;
				3555	ADVANCE_REGINPUT();
				3556	break;
				3557
				3558	case SIDENT:
				3559	if (VIM_ISDIGIT(*reginput) \|\| !vim_isIDc(c))
				3560	return FALSE;
				3561	ADVANCE_REGINPUT();
				3562	break;
				3563
				3564	case KWORD:
				3565	if (!vim_iswordp(reginput))
				3566	return FALSE;
				3567	ADVANCE_REGINPUT();
				3568	break;
				3569
				3570	case SKWORD:
				3571	if (VIM_ISDIGIT(*reginput) \|\| !vim_iswordp(reginput))
				3572	return FALSE;
				3573	ADVANCE_REGINPUT();
				3574	break;
				3575
				3576	case FNAME:
				3577	if (!vim_isfilec(c))
				3578	return FALSE;
				3579	ADVANCE_REGINPUT();
				3580	break;
				3581
				3582	case SFNAME:
				3583	if (VIM_ISDIGIT(*reginput) \|\| !vim_isfilec(c))
				3584	return FALSE;
				3585	ADVANCE_REGINPUT();
				3586	break;
				3587
				3588	case PRINT:
				3589	if (ptr2cells(reginput) != 1)
				3590	return FALSE;
				3591	ADVANCE_REGINPUT();
				3592	break;
				3593
				3594	case SPRINT:
				3595	if (VIM_ISDIGIT(*reginput) \|\| ptr2cells(reginput) != 1)
				3596	return FALSE;
				3597	ADVANCE_REGINPUT();
				3598	break;
				3599
				3600	case WHITE:
				3601	if (!vim_iswhite(c))
				3602	return FALSE;
				3603	ADVANCE_REGINPUT();
				3604	break;
				3605
				3606	case NWHITE:
				3607	if (c == NUL \|\| vim_iswhite(c))
				3608	return FALSE;
				3609	ADVANCE_REGINPUT();
				3610	break;
				3611
				3612	case DIGIT:
				3613	if (!ri_digit(c))
				3614	return FALSE;
				3615	ADVANCE_REGINPUT();
				3616	break;
				3617
				3618	case NDIGIT:
				3619	if (c == NUL \|\| ri_digit(c))
				3620	return FALSE;
				3621	ADVANCE_REGINPUT();
				3622	break;
				3623
				3624	case HEX:
				3625	if (!ri_hex(c))
				3626	return FALSE;
				3627	ADVANCE_REGINPUT();
				3628	break;
				3629
				3630	case NHEX:
				3631	if (c == NUL \|\| ri_hex(c))
				3632	return FALSE;
				3633	ADVANCE_REGINPUT();
				3634	break;
				3635
				3636	case OCTAL:
				3637	if (!ri_octal(c))
				3638	return FALSE;
				3639	ADVANCE_REGINPUT();
				3640	break;
				3641
				3642	case NOCTAL:
				3643	if (c == NUL \|\| ri_octal(c))
				3644	return FALSE;
				3645	ADVANCE_REGINPUT();
				3646	break;
				3647
				3648	case WORD:
				3649	if (!ri_word(c))
				3650	return FALSE;
				3651	ADVANCE_REGINPUT();
				3652	break;
				3653
				3654	case NWORD:
				3655	if (c == NUL \|\| ri_word(c))
				3656	return FALSE;
				3657	ADVANCE_REGINPUT();
				3658	break;
				3659
				3660	case HEAD:
				3661	if (!ri_head(c))
				3662	return FALSE;
				3663	ADVANCE_REGINPUT();
				3664	break;
				3665
				3666	case NHEAD:
				3667	if (c == NUL \|\| ri_head(c))
				3668	return FALSE;
				3669	ADVANCE_REGINPUT();
				3670	break;
				3671
				3672	case ALPHA:
				3673	if (!ri_alpha(c))
				3674	return FALSE;
				3675	ADVANCE_REGINPUT();
				3676	break;
				3677
				3678	case NALPHA:
				3679	if (c == NUL \|\| ri_alpha(c))
				3680	return FALSE;
				3681	ADVANCE_REGINPUT();
				3682	break;
				3683
				3684	case LOWER:
				3685	if (!ri_lower(c))
				3686	return FALSE;
				3687	ADVANCE_REGINPUT();
				3688	break;
				3689
				3690	case NLOWER:
				3691	if (c == NUL \|\| ri_lower(c))
				3692	return FALSE;
				3693	ADVANCE_REGINPUT();
				3694	break;
				3695
				3696	case UPPER:
				3697	if (!ri_upper(c))
				3698	return FALSE;
				3699	ADVANCE_REGINPUT();
				3700	break;
				3701
				3702	case NUPPER:
				3703	if (c == NUL \|\| ri_upper(c))
				3704	return FALSE;
				3705	ADVANCE_REGINPUT();
				3706	break;
				3707
				3708	case EXACTLY:
				3709	{
				3710	int len;
				3711	char_u *opnd;
				3712
				3713	opnd = OPERAND(scan);
				3714	/* Inline the first byte, for speed. */
				3715	if (opnd != reginput
				3716	&& (!ireg_ic \|\| (
				3717	#ifdef FEAT_MBYTE
				3718	!enc_utf8 &&
				3719	#endif
				3720	TOLOWER_LOC(opnd) != TOLOWER_LOC(reginput))))
				3721	return FALSE;
				3722	if (*opnd == NUL)
				3723	{
				3724	/* match empty string always works; happens when "~" is
				3725	* empty. */
				3726	}
				3727	else if (opnd[1] == NUL
				3728	#ifdef FEAT_MBYTE
				3729	&& !(enc_utf8 && ireg_ic)
				3730	#endif
				3731	)
				3732	++reginput; /* matched a single char */
				3733	else
				3734	{
				3735	len = (int)STRLEN(opnd);
				3736	/* Need to match first byte again for multi-byte. */
				3737	if (cstrncmp(opnd, reginput, &len) != 0)
				3738	return FALSE;
				3739	#ifdef FEAT_MBYTE
				3740	/* Check for following composing character. */
				3741	if (enc_utf8 && UTF_COMPOSINGLIKE(reginput, reginput + len))
				3742	{
				3743	/* raaron: This code makes a composing character get
				3744	* ignored, which is the correct behavior (sometimes)
				3745	* for voweled Hebrew texts. */
				3746	if (!ireg_icombine)
				3747	return FALSE;
				3748	}
				3749	else
				3750	#endif
				3751	reginput += len;
				3752	}
				3753	}
				3754	break;
				3755
				3756	case ANYOF:
				3757	case ANYBUT:
				3758	if (c == NUL)
				3759	return FALSE;
				3760	if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
				3761	return FALSE;
				3762	ADVANCE_REGINPUT();
				3763	break;
				3764
				3765	#ifdef FEAT_MBYTE
				3766	case MULTIBYTECODE:
				3767	if (has_mbyte)
				3768	{
				3769	int i, len;
				3770	char_u *opnd;
				3771
				3772	opnd = OPERAND(scan);
				3773	/* Safety check (just in case 'encoding' was changed since
				3774	* compiling the program). */
				3775	if ((len = (*mb_ptr2len_check)(opnd)) < 2)
				3776	return FALSE;
				3777	for (i = 0; i < len; ++i)
				3778	if (opnd[i] != reginput[i])
				3779	return FALSE;
				3780	reginput += len;
				3781	}
				3782	else
				3783	return FALSE;
				3784	break;
				3785	#endif
				3786
				3787	case NOTHING:
				3788	break;
				3789
				3790	case BACK:
				3791	break;
				3792
				3793	case MOPEN + 0: /* Match start: \zs */
				3794	case MOPEN + 1: /* \( */
				3795	case MOPEN + 2:
				3796	case MOPEN + 3:
				3797	case MOPEN + 4:
				3798	case MOPEN + 5:
				3799	case MOPEN + 6:
				3800	case MOPEN + 7:
				3801	case MOPEN + 8:
				3802	case MOPEN + 9:
				3803	{
				3804	int no;
				3805	save_se_T save;
				3806
				3807	no = op - MOPEN;
				3808	cleanup_subexpr();
				3809	save_se(&save, &reg_startpos[no], &reg_startp[no]);
				3810
				3811	if (regmatch(next))
				3812	return TRUE;
				3813
				3814	restore_se(&save, &reg_startpos[no], &reg_startp[no]);
				3815	return FALSE;
				3816	}
				3817	/* break; Not Reached */
				3818
				3819	case NOPEN: /* \%( */
				3820	case NCLOSE: /* \) after \%( */
				3821	if (regmatch(next))
				3822	return TRUE;
				3823	return FALSE;
				3824	/* break; Not Reached */
				3825
				3826	#ifdef FEAT_SYN_HL
				3827	case ZOPEN + 1:
				3828	case ZOPEN + 2:
				3829	case ZOPEN + 3:
				3830	case ZOPEN + 4:
				3831	case ZOPEN + 5:
				3832	case ZOPEN + 6:
				3833	case ZOPEN + 7:
				3834	case ZOPEN + 8:
				3835	case ZOPEN + 9:
				3836	{
				3837	int no;
				3838	save_se_T save;
				3839
				3840	no = op - ZOPEN;
				3841	cleanup_zsubexpr();
				3842	save_se(&save, &reg_startzpos[no], &reg_startzp[no]);
				3843
				3844	if (regmatch(next))
				3845	return TRUE;
				3846
				3847	restore_se(&save, &reg_startzpos[no], &reg_startzp[no]);
				3848	return FALSE;
				3849	}
				3850	/* break; Not Reached */
				3851	#endif
				3852
				3853	case MCLOSE + 0: /* Match end: \ze */
				3854	case MCLOSE + 1: /* \) */
				3855	case MCLOSE + 2:
				3856	case MCLOSE + 3:
				3857	case MCLOSE + 4:
				3858	case MCLOSE + 5:
				3859	case MCLOSE + 6:
				3860	case MCLOSE + 7:
				3861	case MCLOSE + 8:
				3862	case MCLOSE + 9:
				3863	{
				3864	int no;
				3865	save_se_T save;
				3866
				3867	no = op - MCLOSE;
				3868	cleanup_subexpr();
				3869	save_se(&save, &reg_endpos[no], &reg_endp[no]);
				3870
				3871	if (regmatch(next))
				3872	return TRUE;
				3873
				3874	restore_se(&save, &reg_endpos[no], &reg_endp[no]);
				3875	return FALSE;
				3876	}
				3877	/* break; Not Reached */
				3878
				3879	#ifdef FEAT_SYN_HL
				3880	case ZCLOSE + 1: /* \) after \z( */
				3881	case ZCLOSE + 2:
				3882	case ZCLOSE + 3:
				3883	case ZCLOSE + 4:
				3884	case ZCLOSE + 5:
				3885	case ZCLOSE + 6:
				3886	case ZCLOSE + 7:
				3887	case ZCLOSE + 8:
				3888	case ZCLOSE + 9:
				3889	{
				3890	int no;
				3891	save_se_T save;
				3892
				3893	no = op - ZCLOSE;
				3894	cleanup_zsubexpr();
				3895	save_se(&save, &reg_endzpos[no], &reg_endzp[no]);
				3896
				3897	if (regmatch(next))
				3898	return TRUE;
				3899
				3900	restore_se(&save, &reg_endzpos[no], &reg_endzp[no]);
				3901	return FALSE;
				3902	}
				3903	/* break; Not Reached */
				3904	#endif
				3905
				3906	case BACKREF + 1:
				3907	case BACKREF + 2:
				3908	case BACKREF + 3:
				3909	case BACKREF + 4:
				3910	case BACKREF + 5:
				3911	case BACKREF + 6:
				3912	case BACKREF + 7:
				3913	case BACKREF + 8:
				3914	case BACKREF + 9:
				3915	{
				3916	int no;
				3917	int len;
				3918	linenr_T clnum;
				3919	colnr_T ccol;
				3920	char_u *p;
				3921
				3922	no = op - BACKREF;
				3923	cleanup_subexpr();
				3924	if (!REG_MULTI) /* Single-line regexp */
				3925	{
				3926	if (reg_endp[no] == NULL)
				3927	{
				3928	/* Backref was not set: Match an empty string. */
				3929	len = 0;
				3930	}
				3931	else
				3932	{
				3933	/* Compare current input with back-ref in the same
				3934	* line. */
				3935	len = (int)(reg_endp[no] - reg_startp[no]);
				3936	if (cstrncmp(reg_startp[no], reginput, &len) != 0)
				3937	return FALSE;
				3938	}
				3939	}
				3940	else /* Multi-line regexp */
				3941	{
				3942	if (reg_endpos[no].lnum < 0)
				3943	{
				3944	/* Backref was not set: Match an empty string. */
				3945	len = 0;
				3946	}
				3947	else
				3948	{
				3949	if (reg_startpos[no].lnum == reglnum
				3950	&& reg_endpos[no].lnum == reglnum)
				3951	{
				3952	/* Compare back-ref within the current line. */
				3953	len = reg_endpos[no].col - reg_startpos[no].col;
				3954	if (cstrncmp(regline + reg_startpos[no].col,
				3955	reginput, &len) != 0)
				3956	return FALSE;
				3957	}
				3958	else
				3959	{
				3960	/* Messy situation: Need to compare between two
				3961	* lines. */
				3962	ccol = reg_startpos[no].col;
				3963	clnum = reg_startpos[no].lnum;
				3964	for (;;)
				3965	{
				3966	/* Since getting one line may invalidate
				3967	* the other, need to make copy. Slow! */
				3968	if (regline != reg_tofree)
				3969	{
				3970	len = (int)STRLEN(regline);
				3971	if (reg_tofree == NULL
				3972	\|\| len >= (int)reg_tofreelen)
				3973	{
				3974	len += 50; /* get some extra */
				3975	vim_free(reg_tofree);
				3976	reg_tofree = alloc(len);
				3977	if (reg_tofree == NULL)
				3978	return FALSE; /* out of memory! */
				3979	reg_tofreelen = len;
				3980	}
				3981	STRCPY(reg_tofree, regline);
				3982	reginput = reg_tofree
				3983	+ (reginput - regline);
				3984	regline = reg_tofree;
				3985	}
				3986
				3987	/* Get the line to compare with. */
				3988	p = reg_getline(clnum);
				3989	if (clnum == reg_endpos[no].lnum)
				3990	len = reg_endpos[no].col - ccol;
				3991	else
				3992	len = (int)STRLEN(p + ccol);
				3993
				3994	if (cstrncmp(p + ccol, reginput, &len) != 0)
				3995	return FALSE; /* doesn't match */
				3996	if (clnum == reg_endpos[no].lnum)
				3997	break; /* match and at end! */
				3998	if (reglnum == reg_maxline)
				3999	return FALSE; /* text too short */
				4000
				4001	/* Advance to next line. */
				4002	reg_nextline();
				4003	++clnum;
				4004	ccol = 0;
				4005	if (got_int \|\| out_of_stack)
				4006	return FALSE;
				4007	}
				4008
				4009	/* found a match! Note that regline may now point
				4010	* to a copy of the line, that should not matter. */
				4011	}
				4012	}
				4013	}
				4014
				4015	/* Matched the backref, skip over it. */
				4016	reginput += len;
				4017	}
				4018	break;
				4019
				4020	#ifdef FEAT_SYN_HL
				4021	case ZREF + 1:
				4022	case ZREF + 2:
				4023	case ZREF + 3:
				4024	case ZREF + 4:
				4025	case ZREF + 5:
				4026	case ZREF + 6:
				4027	case ZREF + 7:
				4028	case ZREF + 8:
				4029	case ZREF + 9:
				4030	{
				4031	int no;
				4032	int len;
				4033
				4034	cleanup_zsubexpr();
				4035	no = op - ZREF;
				4036	if (re_extmatch_in != NULL
				4037	&& re_extmatch_in->matches[no] != NULL)
				4038	{
				4039	len = (int)STRLEN(re_extmatch_in->matches[no]);
				4040	if (cstrncmp(re_extmatch_in->matches[no],
				4041	reginput, &len) != 0)
				4042	return FALSE;
				4043	reginput += len;
				4044	}
				4045	else
				4046	{
				4047	/* Backref was not set: Match an empty string. */
				4048	}
				4049	}
				4050	break;
				4051	#endif
				4052
				4053	case BRANCH:
				4054	{
				4055	if (OP(next) != BRANCH) /* No choice. */
				4056	next = OPERAND(scan); /* Avoid recursion. */
				4057	else
				4058	{
				4059	regsave_T save;
				4060
				4061	do
				4062	{
				4063	reg_save(&save);
				4064	if (regmatch(OPERAND(scan)))
				4065	return TRUE;
				4066	reg_restore(&save);
				4067	scan = regnext(scan);
				4068	} while (scan != NULL && OP(scan) == BRANCH);
				4069	return FALSE;
				4070	/* NOTREACHED */
				4071	}
				4072	}
				4073	break;
				4074
				4075	case BRACE_LIMITS:
				4076	{
				4077	int no;
				4078
				4079	if (OP(next) == BRACE_SIMPLE)
				4080	{
				4081	bl_minval = OPERAND_MIN(scan);
				4082	bl_maxval = OPERAND_MAX(scan);
				4083	}
				4084	else if (OP(next) >= BRACE_COMPLEX
				4085	&& OP(next) < BRACE_COMPLEX + 10)
				4086	{
				4087	no = OP(next) - BRACE_COMPLEX;
				4088	brace_min[no] = OPERAND_MIN(scan);
				4089	brace_max[no] = OPERAND_MAX(scan);
				4090	brace_count[no] = 0;
				4091	}
				4092	else
				4093	{
				4094	EMSG(_(e_internal)); /* Shouldn't happen */
				4095	return FALSE;
				4096	}
				4097	}
				4098	break;
				4099
				4100	case BRACE_COMPLEX + 0:
				4101	case BRACE_COMPLEX + 1:
				4102	case BRACE_COMPLEX + 2:
				4103	case BRACE_COMPLEX + 3:
				4104	case BRACE_COMPLEX + 4:
				4105	case BRACE_COMPLEX + 5:
				4106	case BRACE_COMPLEX + 6:
				4107	case BRACE_COMPLEX + 7:
				4108	case BRACE_COMPLEX + 8:
				4109	case BRACE_COMPLEX + 9:
				4110	{
				4111	int no;
				4112	regsave_T save;
				4113
				4114	no = op - BRACE_COMPLEX;
				4115	++brace_count[no];
				4116
				4117	/* If not matched enough times yet, try one more */
				4118	if (brace_count[no] <= (brace_min[no] <= brace_max[no]
				4119	? brace_min[no] : brace_max[no]))
				4120	{
				4121	reg_save(&save);
				4122	if (regmatch(OPERAND(scan)))
				4123	return TRUE;
				4124	reg_restore(&save);
				4125	--brace_count[no]; /* failed, decrement match count */
				4126	return FALSE;
				4127	}
				4128
				4129	/* If matched enough times, may try matching some more */
				4130	if (brace_min[no] <= brace_max[no])
				4131	{
				4132	/* Range is the normal way around, use longest match */
				4133	if (brace_count[no] <= brace_max[no])
				4134	{
				4135	reg_save(&save);
				4136	if (regmatch(OPERAND(scan)))
				4137	return TRUE; /* matched some more times */
				4138	reg_restore(&save);
				4139	--brace_count[no]; /* matched just enough times */
				4140	/* continue with the items after \{} */
				4141	}
				4142	}
				4143	else
				4144	{
				4145	/* Range is backwards, use shortest match first */
				4146	if (brace_count[no] <= brace_min[no])
				4147	{
				4148	reg_save(&save);
				4149	if (regmatch(next))
				4150	return TRUE;
				4151	reg_restore(&save);
				4152	next = OPERAND(scan);
				4153	/* must try to match one more item */
				4154	}
				4155	}
				4156	}
				4157	break;
				4158
				4159	case BRACE_SIMPLE:
				4160	case STAR:
				4161	case PLUS:
				4162	{
				4163	int nextb; /* next byte */
				4164	int nextb_ic; /* next byte reverse case */
				4165	long count;
				4166	regsave_T save;
				4167	long minval;
				4168	long maxval;
				4169
				4170	/*
				4171	* Lookahead to avoid useless match attempts when we know
				4172	* what character comes next.
				4173	*/
				4174	if (OP(next) == EXACTLY)
				4175	{
				4176	nextb = *OPERAND(next);
				4177	if (ireg_ic)
				4178	{
				4179	if (isupper(nextb))
				4180	nextb_ic = TOLOWER_LOC(nextb);
				4181	else
				4182	nextb_ic = TOUPPER_LOC(nextb);
				4183	}
				4184	else
				4185	nextb_ic = nextb;
				4186	}
				4187	else
				4188	{
				4189	nextb = NUL;
				4190	nextb_ic = NUL;
				4191	}
				4192	if (op != BRACE_SIMPLE)
				4193	{
				4194	minval = (op == STAR) ? 0 : 1;
				4195	maxval = MAX_LIMIT;
				4196	}
				4197	else
				4198	{
				4199	minval = bl_minval;
				4200	maxval = bl_maxval;
				4201	}
				4202
				4203	/*
				4204	* When maxval > minval, try matching as much as possible, up
				4205	* to maxval. When maxval < minval, try matching at least the
				4206	* minimal number (since the range is backwards, that's also
				4207	* maxval!).
				4208	*/
				4209	count = regrepeat(OPERAND(scan), maxval);
				4210	if (got_int)
				4211	return FALSE;
				4212	if (minval <= maxval)
				4213	{
				4214	/* Range is the normal way around, use longest match */
				4215	while (count >= minval)
				4216	{
				4217	/* If it could match, try it. */
				4218	if (nextb == NUL \|\| *reginput == nextb
				4219	\|\| *reginput == nextb_ic)
				4220	{
				4221	reg_save(&save);
				4222	if (regmatch(next))
				4223	return TRUE;
				4224	reg_restore(&save);
				4225	}
				4226	/* Couldn't or didn't match -- back up one char. */
				4227	if (--count < minval)
				4228	break;
				4229	if (reginput == regline)
				4230	{
				4231	/* backup to last char of previous line */
				4232	--reglnum;
				4233	regline = reg_getline(reglnum);
				4234	/* Just in case regrepeat() didn't count right. */
				4235	if (regline == NULL)
				4236	return FALSE;
				4237	reginput = regline + STRLEN(regline);
				4238	fast_breakcheck();
				4239	if (got_int \|\| out_of_stack)
				4240	return FALSE;
				4241	}
				4242	else
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4243	mb_ptr_back(regline, reginput);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4244	}
				4245	}
				4246	else
				4247	{
				4248	/* Range is backwards, use shortest match first.
				4249	* Careful: maxval and minval are exchanged! */
				4250	if (count < maxval)
				4251	return FALSE;
				4252	for (;;)
				4253	{
				4254	/* If it could work, try it. */
				4255	if (nextb == NUL \|\| *reginput == nextb
				4256	\|\| *reginput == nextb_ic)
				4257	{
				4258	reg_save(&save);
				4259	if (regmatch(next))
				4260	return TRUE;
				4261	reg_restore(&save);
				4262	}
				4263	/* Couldn't or didn't match: try advancing one char. */
				4264	if (count == minval
				4265	\|\| regrepeat(OPERAND(scan), 1L) == 0)
				4266	break;
				4267	++count;
				4268	if (got_int \|\| out_of_stack)
				4269	return FALSE;
				4270	}
				4271	}
				4272	return FALSE;
				4273	}
				4274	/* break; Not Reached */
				4275
				4276	case NOMATCH:
				4277	{
				4278	regsave_T save;
				4279
				4280	/* If the operand matches, we fail. Otherwise backup and
				4281	* continue with the next item. */
				4282	reg_save(&save);
				4283	if (regmatch(OPERAND(scan)))
				4284	return FALSE;
				4285	reg_restore(&save);
				4286	}
				4287	break;
				4288
				4289	case MATCH:
				4290	case SUBPAT:
				4291	{
				4292	regsave_T save;
				4293
				4294	/* If the operand doesn't match, we fail. Otherwise backup
				4295	* and continue with the next item. */
				4296	reg_save(&save);
				4297	if (!regmatch(OPERAND(scan)))
				4298	return FALSE;
				4299	if (op == MATCH) /* zero-width */
				4300	reg_restore(&save);
				4301	}
				4302	break;
				4303
				4304	case BEHIND:
				4305	case NOBEHIND:
				4306	{
				4307	regsave_T save_after, save_start;
				4308	regsave_T save_behind_pos;
				4309	int needmatch = (op == BEHIND);
				4310
				4311	/*
				4312	* Look back in the input of the operand matches or not. This
				4313	* must be done at every position in the input and checking if
				4314	* the match ends at the current position.
				4315	* First check if the next item matches, that's probably
				4316	* faster.
				4317	*/
				4318	reg_save(&save_start);
				4319	if (regmatch(next))
				4320	{
				4321	/* save the position after the found match for next */
				4322	reg_save(&save_after);
				4323
				4324	/* start looking for a match with operand at the current
				4325	* postion. Go back one character until we find the
				4326	* result, hitting the start of the line or the previous
				4327	* line (for multi-line matching).
				4328	* Set behind_pos to where the match should end, BHPOS
				4329	* will match it. */
				4330	save_behind_pos = behind_pos;
				4331	behind_pos = save_start;
				4332	for (;;)
				4333	{
				4334	reg_restore(&save_start);
				4335	if (regmatch(OPERAND(scan))
				4336	&& reg_save_equal(&behind_pos))
				4337	{
				4338	behind_pos = save_behind_pos;
				4339	/* found a match that ends where "next" started */
				4340	if (needmatch)
				4341	{
				4342	reg_restore(&save_after);
				4343	return TRUE;
				4344	}
				4345	return FALSE;
				4346	}
				4347	/*
				4348	* No match: Go back one character. May go to
				4349	* previous line once.
				4350	*/
				4351	if (REG_MULTI)
				4352	{
				4353	if (save_start.rs_u.pos.col == 0)
				4354	{
				4355	if (save_start.rs_u.pos.lnum
				4356	< behind_pos.rs_u.pos.lnum
				4357	\|\| reg_getline(
				4358	--save_start.rs_u.pos.lnum) == NULL)
				4359	break;
				4360	reg_restore(&save_start);
				4361	save_start.rs_u.pos.col =
				4362	(colnr_T)STRLEN(regline);
				4363	}
				4364	else
				4365	--save_start.rs_u.pos.col;
				4366	}
				4367	else
				4368	{
				4369	if (save_start.rs_u.ptr == regline)
				4370	break;
				4371	--save_start.rs_u.ptr;
				4372	}
				4373	}
				4374
				4375	/* NOBEHIND succeeds when no match was found */
				4376	behind_pos = save_behind_pos;
				4377	if (!needmatch)
				4378	{
				4379	reg_restore(&save_after);
				4380	return TRUE;
				4381	}
				4382	}
				4383	return FALSE;
				4384	}
				4385
				4386	case BHPOS:
				4387	if (REG_MULTI)
				4388	{
				4389	if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline)
				4390	\|\| behind_pos.rs_u.pos.lnum != reglnum)
				4391	return FALSE;
				4392	}
				4393	else if (behind_pos.rs_u.ptr != reginput)
				4394	return FALSE;
				4395	break;
				4396
				4397	case NEWL:
				4398	if ((c != NUL \|\| reglnum == reg_maxline)
				4399	&& (c != '\n' \|\| !reg_line_lbr))
				4400	return FALSE;
				4401	if (reg_line_lbr)
				4402	ADVANCE_REGINPUT();
				4403	else
				4404	reg_nextline();
				4405	break;
				4406
				4407	case END:
				4408	return TRUE; /* Success! */
				4409
				4410	default:
				4411	EMSG(_(e_re_corr));
				4412	#ifdef DEBUG
				4413	printf("Illegal op code %d\n", op);
				4414	#endif
				4415	return FALSE;
				4416	}
				4417	}
				4418
				4419	scan = next;
				4420	}
				4421
				4422	/*
				4423	* We get here only if there's trouble -- normally "case END" is the
				4424	* terminating point.
				4425	*/
				4426	EMSG(_(e_re_corr));
				4427	#ifdef DEBUG
				4428	printf("Premature EOL\n");
				4429	#endif
				4430	return FALSE;
				4431	}
				4432
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4433	/*
				4434	* regrepeat - repeatedly match something simple, return how many.
				4435	* Advances reginput (and reglnum) to just after the matched chars.
				4436	*/
				4437	static int
				4438	regrepeat(p, maxcount)
				4439	char_u *p;
				4440	long maxcount; /* maximum number of matches allowed */
				4441	{
				4442	long count = 0;
				4443	char_u *scan;
				4444	char_u *opnd;
				4445	int mask;
				4446	int testval = 0;
				4447
				4448	scan = reginput; /* Make local copy of reginput for speed. */
				4449	opnd = OPERAND(p);
				4450	switch (OP(p))
				4451	{
				4452	case ANY:
				4453	case ANY + ADD_NL:
				4454	while (count < maxcount)
				4455	{
				4456	/* Matching anything means we continue until end-of-line (or
				4457	* end-of-file for ANY + ADD_NL), only limited by maxcount. */
				4458	while (*scan != NUL && count < maxcount)
				4459	{
				4460	++count;
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4461	mb_ptr_adv(scan);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4462	}
				4463	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline \|\| count == maxcount)
				4464	break;
				4465	++count; /* count the line-break */
				4466	reg_nextline();
				4467	scan = reginput;
				4468	if (got_int)
				4469	break;
				4470	}
				4471	break;
				4472
				4473	case IDENT:
				4474	case IDENT + ADD_NL:
				4475	testval = TRUE;
				4476	/FALLTHROUGH/
				4477	case SIDENT:
				4478	case SIDENT + ADD_NL:
				4479	while (count < maxcount)
				4480	{
				4481	if (vim_isIDc(scan) && (testval \|\| !VIM_ISDIGIT(scan)))
				4482	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4483	mb_ptr_adv(scan);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4484	}
				4485	else if (*scan == NUL)
				4486	{
				4487	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4488	break;
				4489	reg_nextline();
				4490	scan = reginput;
				4491	if (got_int)
				4492	break;
				4493	}
				4494	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4495	++scan;
				4496	else
				4497	break;
				4498	++count;
				4499	}
				4500	break;
				4501
				4502	case KWORD:
				4503	case KWORD + ADD_NL:
				4504	testval = TRUE;
				4505	/FALLTHROUGH/
				4506	case SKWORD:
				4507	case SKWORD + ADD_NL:
				4508	while (count < maxcount)
				4509	{
				4510	if (vim_iswordp(scan) && (testval \|\| !VIM_ISDIGIT(*scan)))
				4511	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4512	mb_ptr_adv(scan);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4513	}
				4514	else if (*scan == NUL)
				4515	{
				4516	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4517	break;
				4518	reg_nextline();
				4519	scan = reginput;
				4520	if (got_int)
				4521	break;
				4522	}
				4523	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4524	++scan;
				4525	else
				4526	break;
				4527	++count;
				4528	}
				4529	break;
				4530
				4531	case FNAME:
				4532	case FNAME + ADD_NL:
				4533	testval = TRUE;
				4534	/FALLTHROUGH/
				4535	case SFNAME:
				4536	case SFNAME + ADD_NL:
				4537	while (count < maxcount)
				4538	{
				4539	if (vim_isfilec(scan) && (testval \|\| !VIM_ISDIGIT(scan)))
				4540	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4541	mb_ptr_adv(scan);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4542	}
				4543	else if (*scan == NUL)
				4544	{
				4545	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4546	break;
				4547	reg_nextline();
				4548	scan = reginput;
				4549	if (got_int)
				4550	break;
				4551	}
				4552	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4553	++scan;
				4554	else
				4555	break;
				4556	++count;
				4557	}
				4558	break;
				4559
				4560	case PRINT:
				4561	case PRINT + ADD_NL:
				4562	testval = TRUE;
				4563	/FALLTHROUGH/
				4564	case SPRINT:
				4565	case SPRINT + ADD_NL:
				4566	while (count < maxcount)
				4567	{
				4568	if (*scan == NUL)
				4569	{
				4570	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4571	break;
				4572	reg_nextline();
				4573	scan = reginput;
				4574	if (got_int)
				4575	break;
				4576	}
				4577	else if (ptr2cells(scan) == 1 && (testval \|\| !VIM_ISDIGIT(*scan)))
				4578	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4579	mb_ptr_adv(scan);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4580	}
				4581	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4582	++scan;
				4583	else
				4584	break;
				4585	++count;
				4586	}
				4587	break;
				4588
				4589	case WHITE:
				4590	case WHITE + ADD_NL:
				4591	testval = mask = RI_WHITE;
				4592	do_class:
				4593	while (count < maxcount)
				4594	{
				4595	#ifdef FEAT_MBYTE
				4596	int l;
				4597	#endif
				4598	if (*scan == NUL)
				4599	{
				4600	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4601	break;
				4602	reg_nextline();
				4603	scan = reginput;
				4604	if (got_int)
				4605	break;
				4606	}
				4607	#ifdef FEAT_MBYTE
				4608	else if (has_mbyte && (l = (*mb_ptr2len_check)(scan)) > 1)
				4609	{
				4610	if (testval != 0)
				4611	break;
				4612	scan += l;
				4613	}
				4614	#endif
				4615	else if ((class_tab[*scan] & mask) == testval)
				4616	++scan;
				4617	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4618	++scan;
				4619	else
				4620	break;
				4621	++count;
				4622	}
				4623	break;
				4624
				4625	case NWHITE:
				4626	case NWHITE + ADD_NL:
				4627	mask = RI_WHITE;
				4628	goto do_class;
				4629	case DIGIT:
				4630	case DIGIT + ADD_NL:
				4631	testval = mask = RI_DIGIT;
				4632	goto do_class;
				4633	case NDIGIT:
				4634	case NDIGIT + ADD_NL:
				4635	mask = RI_DIGIT;
				4636	goto do_class;
				4637	case HEX:
				4638	case HEX + ADD_NL:
				4639	testval = mask = RI_HEX;
				4640	goto do_class;
				4641	case NHEX:
				4642	case NHEX + ADD_NL:
				4643	mask = RI_HEX;
				4644	goto do_class;
				4645	case OCTAL:
				4646	case OCTAL + ADD_NL:
				4647	testval = mask = RI_OCTAL;
				4648	goto do_class;
				4649	case NOCTAL:
				4650	case NOCTAL + ADD_NL:
				4651	mask = RI_OCTAL;
				4652	goto do_class;
				4653	case WORD:
				4654	case WORD + ADD_NL:
				4655	testval = mask = RI_WORD;
				4656	goto do_class;
				4657	case NWORD:
				4658	case NWORD + ADD_NL:
				4659	mask = RI_WORD;
				4660	goto do_class;
				4661	case HEAD:
				4662	case HEAD + ADD_NL:
				4663	testval = mask = RI_HEAD;
				4664	goto do_class;
				4665	case NHEAD:
				4666	case NHEAD + ADD_NL:
				4667	mask = RI_HEAD;
				4668	goto do_class;
				4669	case ALPHA:
				4670	case ALPHA + ADD_NL:
				4671	testval = mask = RI_ALPHA;
				4672	goto do_class;
				4673	case NALPHA:
				4674	case NALPHA + ADD_NL:
				4675	mask = RI_ALPHA;
				4676	goto do_class;
				4677	case LOWER:
				4678	case LOWER + ADD_NL:
				4679	testval = mask = RI_LOWER;
				4680	goto do_class;
				4681	case NLOWER:
				4682	case NLOWER + ADD_NL:
				4683	mask = RI_LOWER;
				4684	goto do_class;
				4685	case UPPER:
				4686	case UPPER + ADD_NL:
				4687	testval = mask = RI_UPPER;
				4688	goto do_class;
				4689	case NUPPER:
				4690	case NUPPER + ADD_NL:
				4691	mask = RI_UPPER;
				4692	goto do_class;
				4693
				4694	case EXACTLY:
				4695	{
				4696	int cu, cl;
				4697
				4698	/* This doesn't do a multi-byte character, because a MULTIBYTECODE
				4699	* would have been used for it. */
				4700	if (ireg_ic)
				4701	{
				4702	cu = TOUPPER_LOC(*opnd);
				4703	cl = TOLOWER_LOC(*opnd);
				4704	while (count < maxcount && (scan == cu \|\| scan == cl))
				4705	{
				4706	count++;
				4707	scan++;
				4708	}
				4709	}
				4710	else
				4711	{
				4712	cu = *opnd;
				4713	while (count < maxcount && *scan == cu)
				4714	{
				4715	count++;
				4716	scan++;
				4717	}
				4718	}
				4719	break;
				4720	}
				4721
				4722	#ifdef FEAT_MBYTE
				4723	case MULTIBYTECODE:
				4724	{
				4725	int i, len, cf = 0;
				4726
				4727	/* Safety check (just in case 'encoding' was changed since
				4728	* compiling the program). */
				4729	if ((len = (*mb_ptr2len_check)(opnd)) > 1)
				4730	{
				4731	if (ireg_ic && enc_utf8)
				4732	cf = utf_fold(utf_ptr2char(opnd));
				4733	while (count < maxcount)
				4734	{
				4735	for (i = 0; i < len; ++i)
				4736	if (opnd[i] != scan[i])
				4737	break;
				4738	if (i < len && (!ireg_ic \|\| !enc_utf8
				4739	\|\| utf_fold(utf_ptr2char(scan)) != cf))
				4740	break;
				4741	scan += len;
				4742	++count;
				4743	}
				4744	}
				4745	}
				4746	break;
				4747	#endif
				4748
				4749	case ANYOF:
				4750	case ANYOF + ADD_NL:
				4751	testval = TRUE;
				4752	/FALLTHROUGH/
				4753
				4754	case ANYBUT:
				4755	case ANYBUT + ADD_NL:
				4756	while (count < maxcount)
				4757	{
				4758	#ifdef FEAT_MBYTE
				4759	int len;
				4760	#endif
				4761	if (*scan == NUL)
				4762	{
				4763	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4764	break;
				4765	reg_nextline();
				4766	scan = reginput;
				4767	if (got_int)
				4768	break;
				4769	}
				4770	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4771	++scan;
				4772	#ifdef FEAT_MBYTE
				4773	else if (has_mbyte && (len = (*mb_ptr2len_check)(scan)) > 1)
				4774	{
				4775	if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
				4776	break;
				4777	scan += len;
				4778	}
				4779	#endif
				4780	else
				4781	{
				4782	if ((cstrchr(opnd, *scan) == NULL) == testval)
				4783	break;
				4784	++scan;
				4785	}
				4786	++count;
				4787	}
				4788	break;
				4789
				4790	case NEWL:
				4791	while (count < maxcount
				4792	&& ((*scan == NUL && reglnum < reg_maxline)
				4793	\|\| (*scan == '\n' && reg_line_lbr)))
				4794	{
				4795	count++;
				4796	if (reg_line_lbr)
				4797	ADVANCE_REGINPUT();
				4798	else
				4799	reg_nextline();
				4800	scan = reginput;
				4801	if (got_int)
				4802	break;
				4803	}
				4804	break;
				4805
				4806	default: /* Oh dear. Called inappropriately. */
				4807	EMSG(_(e_re_corr));
				4808	#ifdef DEBUG
				4809	printf("Called regrepeat with op code %d\n", OP(p));
				4810	#endif
				4811	break;
				4812	}
				4813
				4814	reginput = scan;
				4815
				4816	return (int)count;
				4817	}
				4818
				4819	/*
				4820	* regnext - dig the "next" pointer out of a node
				4821	*/
				4822	static char_u *
				4823	regnext(p)
				4824	char_u *p;
				4825	{
				4826	int offset;
				4827
				4828	if (p == JUST_CALC_SIZE)
				4829	return NULL;
				4830
				4831	offset = NEXT(p);
				4832	if (offset == 0)
				4833	return NULL;
				4834
				4835	if (OP(p) == BACK)
				4836	return p - offset;
				4837	else
				4838	return p + offset;
				4839	}
				4840
				4841	/*
				4842	* Check the regexp program for its magic number.
				4843	* Return TRUE if it's wrong.
				4844	*/
				4845	static int
				4846	prog_magic_wrong()
				4847	{
				4848	if (UCHARAT(REG_MULTI
				4849	? reg_mmatch->regprog->program
				4850	: reg_match->regprog->program) != REGMAGIC)
				4851	{
				4852	EMSG(_(e_re_corr));
				4853	return TRUE;
				4854	}
				4855	return FALSE;
				4856	}
				4857
				4858	/*
				4859	* Cleanup the subexpressions, if this wasn't done yet.
				4860	* This construction is used to clear the subexpressions only when they are
				4861	* used (to increase speed).
				4862	*/
				4863	static void
				4864	cleanup_subexpr()
				4865	{
				4866	if (need_clear_subexpr)
				4867	{
				4868	if (REG_MULTI)
				4869	{
				4870	/* Use 0xff to set lnum to -1 */
				4871	vim_memset(reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				4872	vim_memset(reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				4873	}
				4874	else
				4875	{
				4876	vim_memset(reg_startp, 0, sizeof(char_u ) NSUBEXP);
				4877	vim_memset(reg_endp, 0, sizeof(char_u ) NSUBEXP);
				4878	}
				4879	need_clear_subexpr = FALSE;
				4880	}
				4881	}
				4882
				4883	#ifdef FEAT_SYN_HL
				4884	static void
				4885	cleanup_zsubexpr()
				4886	{
				4887	if (need_clear_zsubexpr)
				4888	{
				4889	if (REG_MULTI)
				4890	{
				4891	/* Use 0xff to set lnum to -1 */
				4892	vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				4893	vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				4894	}
				4895	else
				4896	{
				4897	vim_memset(reg_startzp, 0, sizeof(char_u ) NSUBEXP);
				4898	vim_memset(reg_endzp, 0, sizeof(char_u ) NSUBEXP);
				4899	}
				4900	need_clear_zsubexpr = FALSE;
				4901	}
				4902	}
				4903	#endif
				4904
				4905	/*
				4906	* Advance reglnum, regline and reginput to the next line.
				4907	*/
				4908	static void
				4909	reg_nextline()
				4910	{
				4911	regline = reg_getline(++reglnum);
				4912	reginput = regline;
				4913	fast_breakcheck();
				4914	}
				4915
				4916	/*
				4917	* Save the input line and position in a regsave_T.
				4918	*/
				4919	static void
				4920	reg_save(save)
				4921	regsave_T *save;
				4922	{
				4923	if (REG_MULTI)
				4924	{
				4925	save->rs_u.pos.col = (colnr_T)(reginput - regline);
				4926	save->rs_u.pos.lnum = reglnum;
				4927	}
				4928	else
				4929	save->rs_u.ptr = reginput;
				4930	}
				4931
				4932	/*
				4933	* Restore the input line and position from a regsave_T.
				4934	*/
				4935	static void
				4936	reg_restore(save)
				4937	regsave_T *save;
				4938	{
				4939	if (REG_MULTI)
				4940	{
				4941	if (reglnum != save->rs_u.pos.lnum)
				4942	{
				4943	/* only call reg_getline() when the line number changed to save
				4944	* a bit of time */
				4945	reglnum = save->rs_u.pos.lnum;
				4946	regline = reg_getline(reglnum);
				4947	}
				4948	reginput = regline + save->rs_u.pos.col;
				4949	}
				4950	else
				4951	reginput = save->rs_u.ptr;
				4952	}
				4953
				4954	/*
				4955	* Return TRUE if current position is equal to saved position.
				4956	*/
				4957	static int
				4958	reg_save_equal(save)
				4959	regsave_T *save;
				4960	{
				4961	if (REG_MULTI)
				4962	return reglnum == save->rs_u.pos.lnum
				4963	&& reginput == regline + save->rs_u.pos.col;
				4964	return reginput == save->rs_u.ptr;
				4965	}
				4966
				4967	/*
				4968	* Tentatively set the sub-expression start to the current position (after
				4969	* calling regmatch() they will have changed). Need to save the existing
				4970	* values for when there is no match.
				4971	* Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
				4972	* depending on REG_MULTI.
				4973	*/
				4974	static void
				4975	save_se_multi(savep, posp)
				4976	save_se_T *savep;
				4977	lpos_T *posp;
				4978	{
				4979	savep->se_u.pos = *posp;
				4980	posp->lnum = reglnum;
				4981	posp->col = (colnr_T)(reginput - regline);
				4982	}
				4983
				4984	static void
				4985	save_se_one(savep, pp)
				4986	save_se_T *savep;
				4987	char_u **pp;
				4988	{
				4989	savep->se_u.ptr = *pp;
				4990	*pp = reginput;
				4991	}
				4992
				4993	/*
				4994	* Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
				4995	*/
				4996	static int
				4997	re_num_cmp(val, scan)
				4998	long_u val;
				4999	char_u *scan;
				5000	{
				5001	long_u n = OPERAND_MIN(scan);
				5002
				5003	if (OPERAND_CMP(scan) == '>')
				5004	return val > n;
				5005	if (OPERAND_CMP(scan) == '<')
				5006	return val < n;
				5007	return val == n;
				5008	}
				5009
				5010
				5011	#ifdef DEBUG
				5012
				5013	/*
				5014	* regdump - dump a regexp onto stdout in vaguely comprehensible form
				5015	*/
				5016	static void
				5017	regdump(pattern, r)
				5018	char_u *pattern;
				5019	regprog_T *r;
				5020	{
				5021	char_u *s;
				5022	int op = EXACTLY; /* Arbitrary non-END op. */
				5023	char_u *next;
				5024	char_u *end = NULL;
				5025
				5026	printf("\r\nregcomp(%s):\r\n", pattern);
				5027
				5028	s = r->program + 1;
				5029	/*
				5030	* Loop until we find the END that isn't before a referred next (an END
				5031	* can also appear in a NOMATCH operand).
				5032	*/
				5033	while (op != END \|\| s <= end)
				5034	{
				5035	op = OP(s);
				5036	printf("%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
				5037	next = regnext(s);
				5038	if (next == NULL) /* Next ptr. */
				5039	printf("(0)");
				5040	else
				5041	printf("(%d)", (int)((s - r->program) + (next - s)));
				5042	if (end < next)
				5043	end = next;
				5044	if (op == BRACE_LIMITS)
				5045	{
				5046	/* Two short ints */
				5047	printf(" minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
				5048	s += 8;
				5049	}
				5050	s += 3;
				5051	if (op == ANYOF \|\| op == ANYOF + ADD_NL
				5052	\|\| op == ANYBUT \|\| op == ANYBUT + ADD_NL
				5053	\|\| op == EXACTLY)
				5054	{
				5055	/* Literal string, where present. */
				5056	while (*s != NUL)
				5057	printf("%c", *s++);
				5058	s++;
				5059	}
				5060	printf("\r\n");
				5061	}
				5062
				5063	/* Header fields of interest. */
				5064	if (r->regstart != NUL)
				5065	printf("start `%s' 0x%x; ", r->regstart < 256
				5066	? (char *)transchar(r->regstart)
				5067	: "multibyte", r->regstart);
				5068	if (r->reganch)
				5069	printf("anchored; ");
				5070	if (r->regmust != NULL)
				5071	printf("must have \"%s\"", r->regmust);
				5072	printf("\r\n");
				5073	}
				5074
				5075	/*
				5076	* regprop - printable representation of opcode
				5077	*/
				5078	static char_u *
				5079	regprop(op)
				5080	char_u *op;
				5081	{
				5082	char_u *p;
				5083	static char_u buf[50];
				5084
				5085	(void) strcpy(buf, ":");
				5086
				5087	switch (OP(op))
				5088	{
				5089	case BOL:
				5090	p = "BOL";
				5091	break;
				5092	case EOL:
				5093	p = "EOL";
				5094	break;
				5095	case RE_BOF:
				5096	p = "BOF";
				5097	break;
				5098	case RE_EOF:
				5099	p = "EOF";
				5100	break;
				5101	case CURSOR:
				5102	p = "CURSOR";
				5103	break;
				5104	case RE_LNUM:
				5105	p = "RE_LNUM";
				5106	break;
				5107	case RE_COL:
				5108	p = "RE_COL";
				5109	break;
				5110	case RE_VCOL:
				5111	p = "RE_VCOL";
				5112	break;
				5113	case BOW:
				5114	p = "BOW";
				5115	break;
				5116	case EOW:
				5117	p = "EOW";
				5118	break;
				5119	case ANY:
				5120	p = "ANY";
				5121	break;
				5122	case ANY + ADD_NL:
				5123	p = "ANY+NL";
				5124	break;
				5125	case ANYOF:
				5126	p = "ANYOF";
				5127	break;
				5128	case ANYOF + ADD_NL:
				5129	p = "ANYOF+NL";
				5130	break;
				5131	case ANYBUT:
				5132	p = "ANYBUT";
				5133	break;
				5134	case ANYBUT + ADD_NL:
				5135	p = "ANYBUT+NL";
				5136	break;
				5137	case IDENT:
				5138	p = "IDENT";
				5139	break;
				5140	case IDENT + ADD_NL:
				5141	p = "IDENT+NL";
				5142	break;
				5143	case SIDENT:
				5144	p = "SIDENT";
				5145	break;
				5146	case SIDENT + ADD_NL:
				5147	p = "SIDENT+NL";
				5148	break;
				5149	case KWORD:
				5150	p = "KWORD";
				5151	break;
				5152	case KWORD + ADD_NL:
				5153	p = "KWORD+NL";
				5154	break;
				5155	case SKWORD:
				5156	p = "SKWORD";
				5157	break;
				5158	case SKWORD + ADD_NL:
				5159	p = "SKWORD+NL";
				5160	break;
				5161	case FNAME:
				5162	p = "FNAME";
				5163	break;
				5164	case FNAME + ADD_NL:
				5165	p = "FNAME+NL";
				5166	break;
				5167	case SFNAME:
				5168	p = "SFNAME";
				5169	break;
				5170	case SFNAME + ADD_NL:
				5171	p = "SFNAME+NL";
				5172	break;
				5173	case PRINT:
				5174	p = "PRINT";
				5175	break;
				5176	case PRINT + ADD_NL:
				5177	p = "PRINT+NL";
				5178	break;
				5179	case SPRINT:
				5180	p = "SPRINT";
				5181	break;
				5182	case SPRINT + ADD_NL:
				5183	p = "SPRINT+NL";
				5184	break;
				5185	case WHITE:
				5186	p = "WHITE";
				5187	break;
				5188	case WHITE + ADD_NL:
				5189	p = "WHITE+NL";
				5190	break;
				5191	case NWHITE:
				5192	p = "NWHITE";
				5193	break;
				5194	case NWHITE + ADD_NL:
				5195	p = "NWHITE+NL";
				5196	break;
				5197	case DIGIT:
				5198	p = "DIGIT";
				5199	break;
				5200	case DIGIT + ADD_NL:
				5201	p = "DIGIT+NL";
				5202	break;
				5203	case NDIGIT:
				5204	p = "NDIGIT";
				5205	break;
				5206	case NDIGIT + ADD_NL:
				5207	p = "NDIGIT+NL";
				5208	break;
				5209	case HEX:
				5210	p = "HEX";
				5211	break;
				5212	case HEX + ADD_NL:
				5213	p = "HEX+NL";
				5214	break;
				5215	case NHEX:
				5216	p = "NHEX";
				5217	break;
				5218	case NHEX + ADD_NL:
				5219	p = "NHEX+NL";
				5220	break;
				5221	case OCTAL:
				5222	p = "OCTAL";
				5223	break;
				5224	case OCTAL + ADD_NL:
				5225	p = "OCTAL+NL";
				5226	break;
				5227	case NOCTAL:
				5228	p = "NOCTAL";
				5229	break;
				5230	case NOCTAL + ADD_NL:
				5231	p = "NOCTAL+NL";
				5232	break;
				5233	case WORD:
				5234	p = "WORD";
				5235	break;
				5236	case WORD + ADD_NL:
				5237	p = "WORD+NL";
				5238	break;
				5239	case NWORD:
				5240	p = "NWORD";
				5241	break;
				5242	case NWORD + ADD_NL:
				5243	p = "NWORD+NL";
				5244	break;
				5245	case HEAD:
				5246	p = "HEAD";
				5247	break;
				5248	case HEAD + ADD_NL:
				5249	p = "HEAD+NL";
				5250	break;
				5251	case NHEAD:
				5252	p = "NHEAD";
				5253	break;
				5254	case NHEAD + ADD_NL:
				5255	p = "NHEAD+NL";
				5256	break;
				5257	case ALPHA:
				5258	p = "ALPHA";
				5259	break;
				5260	case ALPHA + ADD_NL:
				5261	p = "ALPHA+NL";
				5262	break;
				5263	case NALPHA:
				5264	p = "NALPHA";
				5265	break;
				5266	case NALPHA + ADD_NL:
				5267	p = "NALPHA+NL";
				5268	break;
				5269	case LOWER:
				5270	p = "LOWER";
				5271	break;
				5272	case LOWER + ADD_NL:
				5273	p = "LOWER+NL";
				5274	break;
				5275	case NLOWER:
				5276	p = "NLOWER";
				5277	break;
				5278	case NLOWER + ADD_NL:
				5279	p = "NLOWER+NL";
				5280	break;
				5281	case UPPER:
				5282	p = "UPPER";
				5283	break;
				5284	case UPPER + ADD_NL:
				5285	p = "UPPER+NL";
				5286	break;
				5287	case NUPPER:
				5288	p = "NUPPER";
				5289	break;
				5290	case NUPPER + ADD_NL:
				5291	p = "NUPPER+NL";
				5292	break;
				5293	case BRANCH:
				5294	p = "BRANCH";
				5295	break;
				5296	case EXACTLY:
				5297	p = "EXACTLY";
				5298	break;
				5299	case NOTHING:
				5300	p = "NOTHING";
				5301	break;
				5302	case BACK:
				5303	p = "BACK";
				5304	break;
				5305	case END:
				5306	p = "END";
				5307	break;
				5308	case MOPEN + 0:
				5309	p = "MATCH START";
				5310	break;
				5311	case MOPEN + 1:
				5312	case MOPEN + 2:
				5313	case MOPEN + 3:
				5314	case MOPEN + 4:
				5315	case MOPEN + 5:
				5316	case MOPEN + 6:
				5317	case MOPEN + 7:
				5318	case MOPEN + 8:
				5319	case MOPEN + 9:
				5320	sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
				5321	p = NULL;
				5322	break;
				5323	case MCLOSE + 0:
				5324	p = "MATCH END";
				5325	break;
				5326	case MCLOSE + 1:
				5327	case MCLOSE + 2:
				5328	case MCLOSE + 3:
				5329	case MCLOSE + 4:
				5330	case MCLOSE + 5:
				5331	case MCLOSE + 6:
				5332	case MCLOSE + 7:
				5333	case MCLOSE + 8:
				5334	case MCLOSE + 9:
				5335	sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
				5336	p = NULL;
				5337	break;
				5338	case BACKREF + 1:
				5339	case BACKREF + 2:
				5340	case BACKREF + 3:
				5341	case BACKREF + 4:
				5342	case BACKREF + 5:
				5343	case BACKREF + 6:
				5344	case BACKREF + 7:
				5345	case BACKREF + 8:
				5346	case BACKREF + 9:
				5347	sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
				5348	p = NULL;
				5349	break;
				5350	case NOPEN:
				5351	p = "NOPEN";
				5352	break;
				5353	case NCLOSE:
				5354	p = "NCLOSE";
				5355	break;
				5356	#ifdef FEAT_SYN_HL
				5357	case ZOPEN + 1:
				5358	case ZOPEN + 2:
				5359	case ZOPEN + 3:
				5360	case ZOPEN + 4:
				5361	case ZOPEN + 5:
				5362	case ZOPEN + 6:
				5363	case ZOPEN + 7:
				5364	case ZOPEN + 8:
				5365	case ZOPEN + 9:
				5366	sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
				5367	p = NULL;
				5368	break;
				5369	case ZCLOSE + 1:
				5370	case ZCLOSE + 2:
				5371	case ZCLOSE + 3:
				5372	case ZCLOSE + 4:
				5373	case ZCLOSE + 5:
				5374	case ZCLOSE + 6:
				5375	case ZCLOSE + 7:
				5376	case ZCLOSE + 8:
				5377	case ZCLOSE + 9:
				5378	sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
				5379	p = NULL;
				5380	break;
				5381	case ZREF + 1:
				5382	case ZREF + 2:
				5383	case ZREF + 3:
				5384	case ZREF + 4:
				5385	case ZREF + 5:
				5386	case ZREF + 6:
				5387	case ZREF + 7:
				5388	case ZREF + 8:
				5389	case ZREF + 9:
				5390	sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
				5391	p = NULL;
				5392	break;
				5393	#endif
				5394	case STAR:
				5395	p = "STAR";
				5396	break;
				5397	case PLUS:
				5398	p = "PLUS";
				5399	break;
				5400	case NOMATCH:
				5401	p = "NOMATCH";
				5402	break;
				5403	case MATCH:
				5404	p = "MATCH";
				5405	break;
				5406	case BEHIND:
				5407	p = "BEHIND";
				5408	break;
				5409	case NOBEHIND:
				5410	p = "NOBEHIND";
				5411	break;
				5412	case SUBPAT:
				5413	p = "SUBPAT";
				5414	break;
				5415	case BRACE_LIMITS:
				5416	p = "BRACE_LIMITS";
				5417	break;
				5418	case BRACE_SIMPLE:
				5419	p = "BRACE_SIMPLE";
				5420	break;
				5421	case BRACE_COMPLEX + 0:
				5422	case BRACE_COMPLEX + 1:
				5423	case BRACE_COMPLEX + 2:
				5424	case BRACE_COMPLEX + 3:
				5425	case BRACE_COMPLEX + 4:
				5426	case BRACE_COMPLEX + 5:
				5427	case BRACE_COMPLEX + 6:
				5428	case BRACE_COMPLEX + 7:
				5429	case BRACE_COMPLEX + 8:
				5430	case BRACE_COMPLEX + 9:
				5431	sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
				5432	p = NULL;
				5433	break;
				5434	#ifdef FEAT_MBYTE
				5435	case MULTIBYTECODE:
				5436	p = "MULTIBYTECODE";
				5437	break;
				5438	#endif
				5439	case NEWL:
				5440	p = "NEWL";
				5441	break;
				5442	default:
				5443	sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
				5444	p = NULL;
				5445	break;
				5446	}
				5447	if (p != NULL)
				5448	(void) strcat(buf, p);
				5449	return buf;
				5450	}
				5451	#endif
				5452
				5453	#ifdef FEAT_MBYTE
				5454	static void mb_decompose __ARGS((int c, int c1, int c2, int *c3));
				5455
				5456	typedef struct
				5457	{
				5458	int a, b, c;
				5459	} decomp_T;
				5460
				5461
				5462	/* 0xfb20 - 0xfb4f */
				5463	decomp_T decomp_table[0xfb4f-0xfb20+1] =
				5464	{
				5465	{0x5e2,0,0}, /* 0xfb20 alt ayin */
				5466	{0x5d0,0,0}, /* 0xfb21 alt alef */
				5467	{0x5d3,0,0}, /* 0xfb22 alt dalet */
				5468	{0x5d4,0,0}, /* 0xfb23 alt he */
				5469	{0x5db,0,0}, /* 0xfb24 alt kaf */
				5470	{0x5dc,0,0}, /* 0xfb25 alt lamed */
				5471	{0x5dd,0,0}, /* 0xfb26 alt mem-sofit */
				5472	{0x5e8,0,0}, /* 0xfb27 alt resh */
				5473	{0x5ea,0,0}, /* 0xfb28 alt tav */
				5474	{'+', 0, 0}, /* 0xfb29 alt plus */
				5475	{0x5e9, 0x5c1, 0}, /* 0xfb2a shin+shin-dot */
				5476	{0x5e9, 0x5c2, 0}, /* 0xfb2b shin+sin-dot */
				5477	{0x5e9, 0x5c1, 0x5bc}, /* 0xfb2c shin+shin-dot+dagesh */
				5478	{0x5e9, 0x5c2, 0x5bc}, /* 0xfb2d shin+sin-dot+dagesh */
				5479	{0x5d0, 0x5b7, 0}, /* 0xfb2e alef+patah */
				5480	{0x5d0, 0x5b8, 0}, /* 0xfb2f alef+qamats */
				5481	{0x5d0, 0x5b4, 0}, /* 0xfb30 alef+hiriq */
				5482	{0x5d1, 0x5bc, 0}, /* 0xfb31 bet+dagesh */
				5483	{0x5d2, 0x5bc, 0}, /* 0xfb32 gimel+dagesh */
				5484	{0x5d3, 0x5bc, 0}, /* 0xfb33 dalet+dagesh */
				5485	{0x5d4, 0x5bc, 0}, /* 0xfb34 he+dagesh */
				5486	{0x5d5, 0x5bc, 0}, /* 0xfb35 vav+dagesh */
				5487	{0x5d6, 0x5bc, 0}, /* 0xfb36 zayin+dagesh */
				5488	{0xfb37, 0, 0}, /* 0xfb37 -- UNUSED */
				5489	{0x5d8, 0x5bc, 0}, /* 0xfb38 tet+dagesh */
				5490	{0x5d9, 0x5bc, 0}, /* 0xfb39 yud+dagesh */
				5491	{0x5da, 0x5bc, 0}, /* 0xfb3a kaf sofit+dagesh */
				5492	{0x5db, 0x5bc, 0}, /* 0xfb3b kaf+dagesh */
				5493	{0x5dc, 0x5bc, 0}, /* 0xfb3c lamed+dagesh */
				5494	{0xfb3d, 0, 0}, /* 0xfb3d -- UNUSED */
				5495	{0x5de, 0x5bc, 0}, /* 0xfb3e mem+dagesh */
				5496	{0xfb3f, 0, 0}, /* 0xfb3f -- UNUSED */
				5497	{0x5e0, 0x5bc, 0}, /* 0xfb40 nun+dagesh */
				5498	{0x5e1, 0x5bc, 0}, /* 0xfb41 samech+dagesh */
				5499	{0xfb42, 0, 0}, /* 0xfb42 -- UNUSED */
				5500	{0x5e3, 0x5bc, 0}, /* 0xfb43 pe sofit+dagesh */
				5501	{0x5e4, 0x5bc,0}, /* 0xfb44 pe+dagesh */
				5502	{0xfb45, 0, 0}, /* 0xfb45 -- UNUSED */
				5503	{0x5e6, 0x5bc, 0}, /* 0xfb46 tsadi+dagesh */
				5504	{0x5e7, 0x5bc, 0}, /* 0xfb47 qof+dagesh */
				5505	{0x5e8, 0x5bc, 0}, /* 0xfb48 resh+dagesh */
				5506	{0x5e9, 0x5bc, 0}, /* 0xfb49 shin+dagesh */
				5507	{0x5ea, 0x5bc, 0}, /* 0xfb4a tav+dagesh */
				5508	{0x5d5, 0x5b9, 0}, /* 0xfb4b vav+holam */
				5509	{0x5d1, 0x5bf, 0}, /* 0xfb4c bet+rafe */
				5510	{0x5db, 0x5bf, 0}, /* 0xfb4d kaf+rafe */
				5511	{0x5e4, 0x5bf, 0}, /* 0xfb4e pe+rafe */
				5512	{0x5d0, 0x5dc, 0} /* 0xfb4f alef-lamed */
				5513	};
				5514
				5515	static void
				5516	mb_decompose(c, c1, c2, c3)
				5517	int c, c1, c2, *c3;
				5518	{
				5519	decomp_T d;
				5520
				5521	if (c >= 0x4b20 && c <= 0xfb4f)
				5522	{
				5523	d = decomp_table[c - 0xfb20];
				5524	*c1 = d.a;
				5525	*c2 = d.b;
				5526	*c3 = d.c;
				5527	}
				5528	else
				5529	{
				5530	*c1 = c;
				5531	c2 = c3 = 0;
				5532	}
				5533	}
				5534	#endif
				5535
				5536	/*
				5537	* Compare two strings, ignore case if ireg_ic set.
				5538	* Return 0 if strings match, non-zero otherwise.
				5539	* Correct the length "*n" when composing characters are ignored.
				5540	*/
				5541	static int
				5542	cstrncmp(s1, s2, n)
				5543	char_u s1, s2;
				5544	int *n;
				5545	{
				5546	int result;
				5547
				5548	if (!ireg_ic)
				5549	result = STRNCMP(s1, s2, *n);
				5550	else
				5551	result = MB_STRNICMP(s1, s2, *n);
				5552
				5553	#ifdef FEAT_MBYTE
				5554	/* if it failed and it's utf8 and we want to combineignore: */
				5555	if (result != 0 && enc_utf8 && ireg_icombine)
				5556	{
				5557	char_u str1, str2;
				5558	int c1, c2, c11, c12;
				5559	int ix;
				5560	int junk;
				5561
				5562	/* we have to handle the strcmp ourselves, since it is necessary to
				5563	* deal with the composing characters by ignoring them: */
				5564	str1 = s1;
				5565	str2 = s2;
				5566	c1 = c2 = 0;
				5567	for (ix = 0; ix < *n; )
				5568	{
				5569	c1 = mb_ptr2char_adv(&str1);
				5570	c2 = mb_ptr2char_adv(&str2);
				5571	ix += utf_char2len(c1);
				5572
				5573	/* decompose the character if necessary, into 'base' characters
				5574	* because I don't care about Arabic, I will hard-code the Hebrew
				5575	* which I do care about! So sue me... */
				5576	if (c1 != c2 && (!ireg_ic \|\| utf_fold(c1) != utf_fold(c2)))
				5577	{
				5578	/* decomposition necessary? */
				5579	mb_decompose(c1, &c11, &junk, &junk);
				5580	mb_decompose(c2, &c12, &junk, &junk);
				5581	c1 = c11;
				5582	c2 = c12;
				5583	if (c11 != c12 && (!ireg_ic \|\| utf_fold(c11) != utf_fold(c12)))
				5584	break;
				5585	}
				5586	}
				5587	result = c2 - c1;
				5588	if (result == 0)
				5589	*n = (int)(str2 - s2);
				5590	}
				5591	#endif
				5592
				5593	return result;
				5594	}
				5595
				5596	/*
				5597	* cstrchr: This function is used a lot for simple searches, keep it fast!
				5598	*/
				5599	static char_u *
				5600	cstrchr(s, c)
				5601	char_u *s;
				5602	int c;
				5603	{
				5604	char_u *p;
				5605	int cc;
				5606
				5607	if (!ireg_ic
				5608	#ifdef FEAT_MBYTE
				5609	\|\| (!enc_utf8 && mb_char2len(c) > 1)
				5610	#endif
				5611	)
				5612	return vim_strchr(s, c);
				5613
				5614	/* tolower() and toupper() can be slow, comparing twice should be a lot
				5615	* faster (esp. when using MS Visual C++!).
				5616	* For UTF-8 need to use folded case. */
				5617	#ifdef FEAT_MBYTE
				5618	if (enc_utf8 && c > 0x80)
				5619	cc = utf_fold(c);
				5620	else
				5621	#endif
				5622	if (isupper(c))
				5623	cc = TOLOWER_LOC(c);
				5624	else if (islower(c))
				5625	cc = TOUPPER_LOC(c);
				5626	else
				5627	return vim_strchr(s, c);
				5628
				5629	#ifdef FEAT_MBYTE
				5630	if (has_mbyte)
				5631	{
				5632	for (p = s; p != NUL; p += (mb_ptr2len_check)(p))
				5633	{
				5634	if (enc_utf8 && c > 0x80)
				5635	{
				5636	if (utf_fold(utf_ptr2char(p)) == cc)
				5637	return p;
				5638	}
				5639	else if (p == c \|\| p == cc)
				5640	return p;
				5641	}
				5642	}
				5643	else
				5644	#endif
				5645	/* Faster version for when there are no multi-byte characters. */
				5646	for (p = s; *p != NUL; ++p)
				5647	if (p == c \|\| p == cc)
				5648	return p;
				5649
				5650	return NULL;
				5651	}
				5652
				5653	/***************************************************************
				5654	* regsub stuff *
				5655	***************************************************************/
				5656
				5657	/* This stuff below really confuses cc on an SGI -- webb */
				5658	#ifdef __sgi
				5659	# undef __ARGS
				5660	# define __ARGS(x) ()
				5661	#endif
				5662
				5663	/*
				5664	* We should define ftpr as a pointer to a function returning a pointer to
				5665	* a function returning a pointer to a function ...
				5666	* This is impossible, so we declare a pointer to a function returning a
				5667	* pointer to a function returning void. This should work for all compilers.
				5668	*/
				5669	typedef void ((fptr) __ARGS((char_u *, int)))();
				5670
				5671	static fptr do_upper __ARGS((char_u *, int));
				5672	static fptr do_Upper __ARGS((char_u *, int));
				5673	static fptr do_lower __ARGS((char_u *, int));
				5674	static fptr do_Lower __ARGS((char_u *, int));
				5675
				5676	static int vim_regsub_both __ARGS((char_u source, char_u dest, int copy, int magic, int backslash));
				5677
				5678	static fptr
				5679	do_upper(d, c)
				5680	char_u *d;
				5681	int c;
				5682	{
				5683	*d = TOUPPER_LOC(c);
				5684
				5685	return (fptr)NULL;
				5686	}
				5687
				5688	static fptr
				5689	do_Upper(d, c)
				5690	char_u *d;
				5691	int c;
				5692	{
				5693	*d = TOUPPER_LOC(c);
				5694
				5695	return (fptr)do_Upper;
				5696	}
				5697
				5698	static fptr
				5699	do_lower(d, c)
				5700	char_u *d;
				5701	int c;
				5702	{
				5703	*d = TOLOWER_LOC(c);
				5704
				5705	return (fptr)NULL;
				5706	}
				5707
				5708	static fptr
				5709	do_Lower(d, c)
				5710	char_u *d;
				5711	int c;
				5712	{
				5713	*d = TOLOWER_LOC(c);
				5714
				5715	return (fptr)do_Lower;
				5716	}
				5717
				5718	/*
				5719	* regtilde(): Replace tildes in the pattern by the old pattern.
				5720	*
				5721	* Short explanation of the tilde: It stands for the previous replacement
				5722	* pattern. If that previous pattern also contains a ~ we should go back a
				5723	* step further... But we insert the previous pattern into the current one
				5724	* and remember that.
				5725	* This still does not handle the case where "magic" changes. TODO?
				5726	*
				5727	* The tildes are parsed once before the first call to vim_regsub().
				5728	*/
				5729	char_u *
				5730	regtilde(source, magic)
				5731	char_u *source;
				5732	int magic;
				5733	{
				5734	char_u *newsub = source;
				5735	char_u *tmpsub;
				5736	char_u *p;
				5737	int len;
				5738	int prevlen;
				5739
				5740	for (p = newsub; *p; ++p)
				5741	{
				5742	if ((p == '~' && magic) \|\| (p == '\\' && *(p + 1) == '~' && !magic))
				5743	{
				5744	if (reg_prev_sub != NULL)
				5745	{
				5746	/* length = len(newsub) - 1 + len(prev_sub) + 1 */
				5747	prevlen = (int)STRLEN(reg_prev_sub);
				5748	tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen));
				5749	if (tmpsub != NULL)
				5750	{
				5751	/* copy prefix */
				5752	len = (int)(p - newsub); /* not including ~ */
				5753	mch_memmove(tmpsub, newsub, (size_t)len);
				5754	/* interpretate tilde */
				5755	mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
				5756	/* copy postfix */
				5757	if (!magic)
				5758	++p; /* back off \ */
				5759	STRCPY(tmpsub + len + prevlen, p + 1);
				5760
				5761	if (newsub != source) /* already allocated newsub */
				5762	vim_free(newsub);
				5763	newsub = tmpsub;
				5764	p = newsub + len + prevlen;
				5765	}
				5766	}
				5767	else if (magic)
				5768	STRCPY(p, p + 1); /* remove '~' */
				5769	else
				5770	STRCPY(p, p + 2); /* remove '\~' */
				5771	--p;
				5772	}
				5773	else
				5774	{
				5775	if (p == '\\' && p[1]) / skip escaped characters */
				5776	++p;
				5777	#ifdef FEAT_MBYTE
				5778	if (has_mbyte)
				5779	p += (*mb_ptr2len_check)(p) - 1;
				5780	#endif
				5781	}
				5782	}
				5783
				5784	vim_free(reg_prev_sub);
				5785	if (newsub != source) /* newsub was allocated, just keep it */
				5786	reg_prev_sub = newsub;
				5787	else /* no ~ found, need to save newsub */
				5788	reg_prev_sub = vim_strsave(newsub);
				5789	return newsub;
				5790	}
				5791
				5792	#ifdef FEAT_EVAL
				5793	static int can_f_submatch = FALSE; /* TRUE when submatch() can be used */
				5794
				5795	/* These pointers are used instead of reg_match and reg_mmatch for
				5796	* reg_submatch(). Needed for when the substitution string is an expression
				5797	* that contains a call to substitute() and submatch(). */
				5798	static regmatch_T *submatch_match;
				5799	static regmmatch_T *submatch_mmatch;
				5800	#endif
				5801
				5802	#if defined(FEAT_MODIFY_FNAME) \|\| defined(FEAT_EVAL) \|\| defined(PROTO)
				5803	/*
				5804	* vim_regsub() - perform substitutions after a vim_regexec() or
				5805	* vim_regexec_multi() match.
				5806	*
				5807	* If "copy" is TRUE really copy into "dest".
				5808	* If "copy" is FALSE nothing is copied, this is just to find out the length
				5809	* of the result.
				5810	*
				5811	* If "backslash" is TRUE, a backslash will be removed later, need to double
				5812	* them to keep them, and insert a backslash before a CR to avoid it being
				5813	* replaced with a line break later.
				5814	*
				5815	* Note: The matched text must not change between the call of
				5816	* vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
				5817	* references invalid!
				5818	*
				5819	* Returns the size of the replacement, including terminating NUL.
				5820	*/
				5821	int
				5822	vim_regsub(rmp, source, dest, copy, magic, backslash)
				5823	regmatch_T *rmp;
				5824	char_u *source;
				5825	char_u *dest;
				5826	int copy;
				5827	int magic;
				5828	int backslash;
				5829	{
				5830	reg_match = rmp;
				5831	reg_mmatch = NULL;
				5832	reg_maxline = 0;
				5833	return vim_regsub_both(source, dest, copy, magic, backslash);
				5834	}
				5835	#endif
				5836
				5837	int
				5838	vim_regsub_multi(rmp, lnum, source, dest, copy, magic, backslash)
				5839	regmmatch_T *rmp;
				5840	linenr_T lnum;
				5841	char_u *source;
				5842	char_u *dest;
				5843	int copy;
				5844	int magic;
				5845	int backslash;
				5846	{
				5847	reg_match = NULL;
				5848	reg_mmatch = rmp;
				5849	reg_buf = curbuf; /* always works on the current buffer! */
				5850	reg_firstlnum = lnum;
				5851	reg_maxline = curbuf->b_ml.ml_line_count - lnum;
				5852	return vim_regsub_both(source, dest, copy, magic, backslash);
				5853	}
				5854
				5855	static int
				5856	vim_regsub_both(source, dest, copy, magic, backslash)
				5857	char_u *source;
				5858	char_u *dest;
				5859	int copy;
				5860	int magic;
				5861	int backslash;
				5862	{
				5863	char_u *src;
				5864	char_u *dst;
				5865	char_u *s;
				5866	int c;
				5867	int no = -1;
				5868	fptr func = (fptr)NULL;
				5869	linenr_T clnum = 0; /* init for GCC */
				5870	int len = 0; /* init for GCC */
				5871	#ifdef FEAT_EVAL
				5872	static char_u *eval_result = NULL;
				5873	#endif
				5874	#ifdef FEAT_MBYTE
				5875	int l;
				5876	#endif
				5877
				5878
				5879	/* Be paranoid... */
				5880	if (source == NULL \|\| dest == NULL)
				5881	{
				5882	EMSG(_(e_null));
				5883	return 0;
				5884	}
				5885	if (prog_magic_wrong())
				5886	return 0;
				5887	src = source;
				5888	dst = dest;
				5889
				5890	/*
				5891	* When the substitute part starts with "\=" evaluate it as an expression.
				5892	*/
				5893	if (source[0] == '\\' && source[1] == '='
				5894	#ifdef FEAT_EVAL
				5895	&& !can_f_submatch /* can't do this recursively */
				5896	#endif
				5897	)
				5898	{
				5899	#ifdef FEAT_EVAL
				5900	/* To make sure that the length doesn't change between checking the
				5901	* length and copying the string, and to speed up things, the
				5902	* resulting string is saved from the call with "copy" == FALSE to the
				5903	* call with "copy" == TRUE. */
				5904	if (copy)
				5905	{
				5906	if (eval_result != NULL)
				5907	{
				5908	STRCPY(dest, eval_result);
				5909	dst += STRLEN(eval_result);
				5910	vim_free(eval_result);
				5911	eval_result = NULL;
				5912	}
				5913	}
				5914	else
				5915	{
				5916	linenr_T save_reg_maxline;
				5917	win_T *save_reg_win;
				5918	int save_ireg_ic;
				5919
				5920	vim_free(eval_result);
				5921
				5922	/* The expression may contain substitute(), which calls us
				5923	* recursively. Make sure submatch() gets the text from the first
				5924	* level. Don't need to save "reg_buf", because
				5925	* vim_regexec_multi() can't be called recursively. */
				5926	submatch_match = reg_match;
				5927	submatch_mmatch = reg_mmatch;
				5928	save_reg_maxline = reg_maxline;
				5929	save_reg_win = reg_win;
				5930	save_ireg_ic = ireg_ic;
				5931	can_f_submatch = TRUE;
				5932
				5933	eval_result = eval_to_string(source + 2, NULL);
				5934	if (eval_result != NULL)
				5935	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	5936	for (s = eval_result; *s != NUL; mb_ptr_adv(s))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	5937	{
				5938	/* Change NL to CR, so that it becomes a line break.
				5939	* Skip over a backslashed character. */
				5940	if (*s == NL)
				5941	*s = CAR;
				5942	else if (*s == '\\' && s[1] != NUL)
				5943	++s;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	5944	}
				5945
				5946	dst += STRLEN(eval_result);
				5947	}
				5948
				5949	reg_match = submatch_match;
				5950	reg_mmatch = submatch_mmatch;
				5951	reg_maxline = save_reg_maxline;
				5952	reg_win = save_reg_win;
				5953	ireg_ic = save_ireg_ic;
				5954	can_f_submatch = FALSE;
				5955	}
				5956	#endif
				5957	}
				5958	else
				5959	while ((c = *src++) != NUL)
				5960	{
				5961	if (c == '&' && magic)
				5962	no = 0;
				5963	else if (c == '\\' && *src != NUL)
				5964	{
				5965	if (*src == '&' && !magic)
				5966	{
				5967	++src;
				5968	no = 0;
				5969	}
				5970	else if ('0' <= src && src <= '9')
				5971	{
				5972	no = *src++ - '0';
				5973	}
				5974	else if (vim_strchr((char_u )"uUlLeE", src))
				5975	{
				5976	switch (*src++)
				5977	{
				5978	case 'u': func = (fptr)do_upper;
				5979	continue;
				5980	case 'U': func = (fptr)do_Upper;
				5981	continue;
				5982	case 'l': func = (fptr)do_lower;
				5983	continue;
				5984	case 'L': func = (fptr)do_Lower;
				5985	continue;
				5986	case 'e':
				5987	case 'E': func = (fptr)NULL;
				5988	continue;
				5989	}
				5990	}
				5991	}
				5992	if (no < 0) /* Ordinary character. */
				5993	{
				5994	if (c == '\\' && *src != NUL)
				5995	{
				5996	/* Check for abbreviations -- webb */
				5997	switch (*src)
				5998	{
				5999	case 'r': c = CAR; ++src; break;
				6000	case 'n': c = NL; ++src; break;
				6001	case 't': c = TAB; ++src; break;
				6002	/* Oh no! \e already has meaning in subst pat :-( */
				6003	/* case 'e': c = ESC; ++src; break; */
				6004	case 'b': c = Ctrl_H; ++src; break;
				6005
				6006	/* If "backslash" is TRUE the backslash will be removed
				6007	* later. Used to insert a literal CR. */
				6008	default: if (backslash)
				6009	{
				6010	if (copy)
				6011	*dst = '\\';
				6012	++dst;
				6013	}
				6014	c = *src++;
				6015	}
				6016	}
				6017
				6018	/* Write to buffer, if copy is set. */
				6019	#ifdef FEAT_MBYTE
				6020	if (has_mbyte && (l = (*mb_ptr2len_check)(src - 1)) > 1)
				6021	{
				6022	/* TODO: should use "func" here. */
				6023	if (copy)
				6024	mch_memmove(dst, src - 1, l);
				6025	dst += l - 1;
				6026	src += l - 1;
				6027	}
				6028	else
				6029	{
				6030	#endif
				6031	if (copy)
				6032	{
				6033	if (func == (fptr)NULL) /* just copy */
				6034	*dst = c;
				6035	else /* change case */
				6036	func = (fptr)(func(dst, c));
				6037	/* Turbo C complains without the typecast */
				6038	}
				6039	#ifdef FEAT_MBYTE
				6040	}
				6041	#endif
				6042	dst++;
				6043	}
				6044	else
				6045	{
				6046	if (REG_MULTI)
				6047	{
				6048	clnum = reg_mmatch->startpos[no].lnum;
				6049	if (clnum < 0 \|\| reg_mmatch->endpos[no].lnum < 0)
				6050	s = NULL;
				6051	else
				6052	{
				6053	s = reg_getline(clnum) + reg_mmatch->startpos[no].col;
				6054	if (reg_mmatch->endpos[no].lnum == clnum)
				6055	len = reg_mmatch->endpos[no].col
				6056	- reg_mmatch->startpos[no].col;
				6057	else
				6058	len = (int)STRLEN(s);
				6059	}
				6060	}
				6061	else
				6062	{
				6063	s = reg_match->startp[no];
				6064	if (reg_match->endp[no] == NULL)
				6065	s = NULL;
				6066	else
				6067	len = (int)(reg_match->endp[no] - s);
				6068	}
				6069	if (s != NULL)
				6070	{
				6071	for (;;)
				6072	{
				6073	if (len == 0)
				6074	{
				6075	if (REG_MULTI)
				6076	{
				6077	if (reg_mmatch->endpos[no].lnum == clnum)
				6078	break;
				6079	if (copy)
				6080	*dst = CAR;
				6081	++dst;
				6082	s = reg_getline(++clnum);
				6083	if (reg_mmatch->endpos[no].lnum == clnum)
				6084	len = reg_mmatch->endpos[no].col;
				6085	else
				6086	len = (int)STRLEN(s);
				6087	}
				6088	else
				6089	break;
				6090	}
				6091	else if (s == NUL) / we hit NUL. */
				6092	{
				6093	if (copy)
				6094	EMSG(_(e_re_damg));
				6095	goto exit;
				6096	}
				6097	else
				6098	{
				6099	if (backslash && (s == CAR \|\| s == '\\'))
				6100	{
				6101	/*
				6102	* Insert a backslash in front of a CR, otherwise
				6103	* it will be replaced by a line break.
				6104	* Number of backslashes will be halved later,
				6105	* double them here.
				6106	*/
				6107	if (copy)
				6108	{
				6109	dst[0] = '\\';
				6110	dst[1] = *s;
				6111	}
				6112	dst += 2;
				6113	}
				6114	#ifdef FEAT_MBYTE
				6115	else if (has_mbyte && (l = (*mb_ptr2len_check)(s)) > 1)
				6116	{
				6117	/* TODO: should use "func" here. */
				6118	if (copy)
				6119	mch_memmove(dst, s, l);
				6120	dst += l;
				6121	s += l - 1;
				6122	len -= l - 1;
				6123	}
				6124	#endif
				6125	else
				6126	{
				6127	if (copy)
				6128	{
				6129	if (func == (fptr)NULL) /* just copy */
				6130	dst = s;
				6131	else /* change case */
				6132	func = (fptr)(func(dst, *s));
				6133	/* Turbo C complains without the typecast */
				6134	}
				6135	++dst;
				6136	}
				6137	++s;
				6138	--len;
				6139	}
				6140	}
				6141	}
				6142	no = -1;
				6143	}
				6144	}
				6145	if (copy)
				6146	*dst = NUL;
				6147
				6148	exit:
				6149	return (int)((dst - dest) + 1);
				6150	}
				6151
				6152	#ifdef FEAT_EVAL
				6153	/*
				6154	* Used for the submatch() function: get the string from tne n'th submatch in
				6155	* allocated memory.
				6156	* Returns NULL when not in a ":s" command and for a non-existing submatch.
				6157	*/
				6158	char_u *
				6159	reg_submatch(no)
				6160	int no;
				6161	{
				6162	char_u *retval = NULL;
				6163	char_u *s;
				6164	int len;
				6165	int round;
				6166	linenr_T lnum;
				6167
				6168	if (!can_f_submatch)
				6169	return NULL;
				6170
				6171	if (submatch_match == NULL)
				6172	{
				6173	/*
				6174	* First round: compute the length and allocate memory.
				6175	* Second round: copy the text.
				6176	*/
				6177	for (round = 1; round <= 2; ++round)
				6178	{
				6179	lnum = submatch_mmatch->startpos[no].lnum;
				6180	if (lnum < 0 \|\| submatch_mmatch->endpos[no].lnum < 0)
				6181	return NULL;
				6182
				6183	s = reg_getline(lnum) + submatch_mmatch->startpos[no].col;
				6184	if (s == NULL) /* anti-crash check, cannot happen? */
				6185	break;
				6186	if (submatch_mmatch->endpos[no].lnum == lnum)
				6187	{
				6188	/* Within one line: take form start to end col. */
				6189	len = submatch_mmatch->endpos[no].col
				6190	- submatch_mmatch->startpos[no].col;
				6191	if (round == 2)
				6192	{
				6193	STRNCPY(retval, s, len);
				6194	retval[len] = NUL;
				6195	}
				6196	++len;
				6197	}
				6198	else
				6199	{
				6200	/* Multiple lines: take start line from start col, middle
				6201	* lines completely and end line up to end col. */
				6202	len = (int)STRLEN(s);
				6203	if (round == 2)
				6204	{
				6205	STRCPY(retval, s);
				6206	retval[len] = '\n';
				6207	}
				6208	++len;
				6209	++lnum;
				6210	while (lnum < submatch_mmatch->endpos[no].lnum)
				6211	{
				6212	s = reg_getline(lnum++);
				6213	if (round == 2)
				6214	STRCPY(retval + len, s);
				6215	len += (int)STRLEN(s);
				6216	if (round == 2)
				6217	retval[len] = '\n';
				6218	++len;
				6219	}
				6220	if (round == 2)
				6221	STRNCPY(retval + len, reg_getline(lnum),
				6222	submatch_mmatch->endpos[no].col);
				6223	len += submatch_mmatch->endpos[no].col;
				6224	if (round == 2)
				6225	retval[len] = NUL;
				6226	++len;
				6227	}
				6228
				6229	if (round == 1)
				6230	{
				6231	retval = lalloc((long_u)len, TRUE);
				6232	if (s == NULL)
				6233	return NULL;
				6234	}
				6235	}
				6236	}
				6237	else
				6238	{
				6239	if (submatch_match->endp[no] == NULL)
				6240	retval = NULL;
				6241	else
				6242	{
				6243	s = submatch_match->startp[no];
				6244	retval = vim_strnsave(s, (int)(submatch_match->endp[no] - s));
				6245	}
				6246	}
				6247
				6248	return retval;
				6249	}
				6250	#endif