Blame - src/regexp.c - android_external_vim

blob: 4aa5b6aba6bbd4d727c076951f85cdf3853ccd43 [file] [log] [blame]

Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1	/* vi:set ts=8 sts=4 sw=4:
				2	*
				3	* Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
				4	*
				5	* NOTICE:
				6	*
				7	* This is NOT the original regular expression code as written by Henry
				8	* Spencer. This code has been modified specifically for use with the VIM
				9	* editor, and should not be used separately from Vim. If you want a good
				10	* regular expression library, get the original code. The copyright notice
				11	* that follows is from the original.
				12	*
				13	* END NOTICE
				14	*
				15	* Copyright (c) 1986 by University of Toronto.
				16	* Written by Henry Spencer. Not derived from licensed software.
				17	*
				18	* Permission is granted to anyone to use this software for any
				19	* purpose on any computer system, and to redistribute it freely,
				20	* subject to the following restrictions:
				21	*
				22	* 1. The author is not responsible for the consequences of use of
				23	* this software, no matter how awful, even if they arise
				24	* from defects in it.
				25	*
				26	* 2. The origin of this software must not be misrepresented, either
				27	* by explicit claim or by omission.
				28	*
				29	* 3. Altered versions must be plainly marked as such, and must not
				30	* be misrepresented as being the original software.
				31	*
				32	* Beware that some of this code is subtly aware of the way operator
				33	* precedence is structured in regular expressions. Serious changes in
				34	* regular-expression syntax might require a total rethink.
				35	*
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	36	* Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
				37	* Webb, Ciaran McCreesh and Bram Moolenaar.
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	38	* Named character class support added by Walter Briscoe (1998 Jul 01)
				39	*/
				40
				41	#include "vim.h"
				42
				43	#undef DEBUG
				44
				45	/*
				46	* The "internal use only" fields in regexp.h are present to pass info from
				47	* compile to execute that permits the execute phase to run lots faster on
				48	* simple cases. They are:
				49	*
				50	* regstart char that must begin a match; NUL if none obvious; Can be a
				51	* multi-byte character.
				52	* reganch is the match anchored (at beginning-of-line only)?
				53	* regmust string (pointer into program) that match must include, or NULL
				54	* regmlen length of regmust string
				55	* regflags RF_ values or'ed together
				56	*
				57	* Regstart and reganch permit very fast decisions on suitable starting points
				58	* for a match, cutting down the work a lot. Regmust permits fast rejection
				59	* of lines that cannot possibly match. The regmust tests are costly enough
				60	* that vim_regcomp() supplies a regmust only if the r.e. contains something
				61	* potentially expensive (at present, the only such thing detected is * or +
				62	* at the start of the r.e., which can involve a lot of backup). Regmlen is
				63	* supplied because the test in vim_regexec() needs it and vim_regcomp() is
				64	* computing it anyway.
				65	*/
				66
				67	/*
				68	* Structure for regexp "program". This is essentially a linear encoding
				69	* of a nondeterministic finite-state machine (aka syntax charts or
				70	* "railroad normal form" in parsing technology). Each node is an opcode
				71	* plus a "next" pointer, possibly plus an operand. "Next" pointers of
				72	* all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
				73	* pointer with a BRANCH on both ends of it is connecting two alternatives.
				74	* (Here we have one of the subtle syntax dependencies: an individual BRANCH
				75	* (as opposed to a collection of them) is never concatenated with anything
				76	* because of operator precedence). The "next" pointer of a BRACES_COMPLEX
				77	* node points to the node after the stuff to be repeated. The operand of some
				78	* types of node is a literal string; for others, it is a node leading into a
				79	* sub-FSM. In particular, the operand of a BRANCH node is the first node of
				80	* the branch. (NB this is not a tree structure: the tail of the branch
				81	* connects to the thing following the set of BRANCHes.)
				82	*
				83	* pattern is coded like:
				84	*
				85	* +-----------------+
				86	* \| V
				87	* <aa>\\|<bb> BRANCH <aa> BRANCH <bb> --> END
				88	* \| ^ \| ^
				89	* +------+ +----------+
				90	*
				91	*
				92	* +------------------+
				93	* V \|
				94	* <aa>* BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
				95	* \| \| ^ ^
				96	* \| +---------------+ \|
				97	* +---------------------------------------------+
				98	*
				99	*
				100	* +-------------------------+
				101	* V \|
				102	* <aa>\{} BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK END
				103	* \| \| ^
				104	* \| +----------------+
				105	* +-----------------------------------------------+
				106	*
				107	*
				108	* <aa>\@!<bb> BRANCH NOMATCH <aa> --> END <bb> --> END
				109	* \| \| ^ ^
				110	* \| +----------------+ \|
				111	* +--------------------------------+
				112	*
				113	* +---------+
				114	* \| V
				115	* \z[abc] BRANCH BRANCH a BRANCH b BRANCH c BRANCH NOTHING --> END
				116	* \| \| \| \| ^ ^
				117	* \| \| \| +-----+ \|
				118	* \| \| +----------------+ \|
				119	* \| +---------------------------+ \|
				120	* +------------------------------------------------------+
				121	*
				122	* They all start with a BRANCH for "\\|" alternaties, even when there is only
				123	* one alternative.
				124	*/
				125
				126	/*
				127	* The opcodes are:
				128	*/
				129
				130	/* definition number opnd? meaning */
				131	#define END 0 /* End of program or NOMATCH operand. */
				132	#define BOL 1 /* Match "" at beginning of line. */
				133	#define EOL 2 /* Match "" at end of line. */
				134	#define BRANCH 3 /* node Match this alternative, or the
				135	* next... */
				136	#define BACK 4 /* Match "", "next" ptr points backward. */
				137	#define EXACTLY 5 /* str Match this string. */
				138	#define NOTHING 6 /* Match empty string. */
				139	#define STAR 7 /* node Match this (simple) thing 0 or more
				140	* times. */
				141	#define PLUS 8 /* node Match this (simple) thing 1 or more
				142	* times. */
				143	#define MATCH 9 /* node match the operand zero-width */
				144	#define NOMATCH 10 /* node check for no match with operand */
				145	#define BEHIND 11 /* node look behind for a match with operand */
				146	#define NOBEHIND 12 /* node look behind for no match with operand */
				147	#define SUBPAT 13 /* node match the operand here */
				148	#define BRACE_SIMPLE 14 /* node Match this (simple) thing between m and
				149	* n times (\{m,n\}). */
				150	#define BOW 15 /* Match "" after [^a-zA-Z0-9_] */
				151	#define EOW 16 /* Match "" at [^a-zA-Z0-9_] */
				152	#define BRACE_LIMITS 17 /* nr nr define the min & max for BRACE_SIMPLE
				153	* and BRACE_COMPLEX. */
				154	#define NEWL 18 /* Match line-break */
				155	#define BHPOS 19 /* End position for BEHIND or NOBEHIND */
				156
				157
				158	/* character classes: 20-48 normal, 50-78 include a line-break */
				159	#define ADD_NL 30
				160	#define FIRST_NL ANY + ADD_NL
				161	#define ANY 20 /* Match any one character. */
				162	#define ANYOF 21 /* str Match any character in this string. */
				163	#define ANYBUT 22 /* str Match any character not in this
				164	* string. */
				165	#define IDENT 23 /* Match identifier char */
				166	#define SIDENT 24 /* Match identifier char but no digit */
				167	#define KWORD 25 /* Match keyword char */
				168	#define SKWORD 26 /* Match word char but no digit */
				169	#define FNAME 27 /* Match file name char */
				170	#define SFNAME 28 /* Match file name char but no digit */
				171	#define PRINT 29 /* Match printable char */
				172	#define SPRINT 30 /* Match printable char but no digit */
				173	#define WHITE 31 /* Match whitespace char */
				174	#define NWHITE 32 /* Match non-whitespace char */
				175	#define DIGIT 33 /* Match digit char */
				176	#define NDIGIT 34 /* Match non-digit char */
				177	#define HEX 35 /* Match hex char */
				178	#define NHEX 36 /* Match non-hex char */
				179	#define OCTAL 37 /* Match octal char */
				180	#define NOCTAL 38 /* Match non-octal char */
				181	#define WORD 39 /* Match word char */
				182	#define NWORD 40 /* Match non-word char */
				183	#define HEAD 41 /* Match head char */
				184	#define NHEAD 42 /* Match non-head char */
				185	#define ALPHA 43 /* Match alpha char */
				186	#define NALPHA 44 /* Match non-alpha char */
				187	#define LOWER 45 /* Match lowercase char */
				188	#define NLOWER 46 /* Match non-lowercase char */
				189	#define UPPER 47 /* Match uppercase char */
				190	#define NUPPER 48 /* Match non-uppercase char */
				191	#define LAST_NL NUPPER + ADD_NL
				192	#define WITH_NL(op) ((op) >= FIRST_NL && (op) <= LAST_NL)
				193
				194	#define MOPEN 80 /* -89 Mark this point in input as start of
				195	* \( subexpr. MOPEN + 0 marks start of
				196	* match. */
				197	#define MCLOSE 90 /* -99 Analogous to MOPEN. MCLOSE + 0 marks
				198	* end of match. */
				199	#define BACKREF 100 /* -109 node Match same string again \1-\9 */
				200
				201	#ifdef FEAT_SYN_HL
				202	# define ZOPEN 110 /* -119 Mark this point in input as start of
				203	* \z( subexpr. */
				204	# define ZCLOSE 120 /* -129 Analogous to ZOPEN. */
				205	# define ZREF 130 /* -139 node Match external submatch \z1-\z9 */
				206	#endif
				207
				208	#define BRACE_COMPLEX 140 /* -149 node Match nodes between m & n times */
				209
				210	#define NOPEN 150 /* Mark this point in input as start of
				211	\%( subexpr. */
				212	#define NCLOSE 151 /* Analogous to NOPEN. */
				213
				214	#define MULTIBYTECODE 200 /* mbc Match one multi-byte character */
				215	#define RE_BOF 201 /* Match "" at beginning of file. */
				216	#define RE_EOF 202 /* Match "" at end of file. */
				217	#define CURSOR 203 /* Match location of cursor. */
				218
				219	#define RE_LNUM 204 /* nr cmp Match line number */
				220	#define RE_COL 205 /* nr cmp Match column number */
				221	#define RE_VCOL 206 /* nr cmp Match virtual column number */
				222
				223	/*
				224	* Magic characters have a special meaning, they don't match literally.
				225	* Magic characters are negative. This separates them from literal characters
				226	* (possibly multi-byte). Only ASCII characters can be Magic.
				227	*/
				228	#define Magic(x) ((int)(x) - 256)
				229	#define un_Magic(x) ((x) + 256)
				230	#define is_Magic(x) ((x) < 0)
				231
				232	static int no_Magic __ARGS((int x));
				233	static int toggle_Magic __ARGS((int x));
				234
				235	static int
				236	no_Magic(x)
				237	int x;
				238	{
				239	if (is_Magic(x))
				240	return un_Magic(x);
				241	return x;
				242	}
				243
				244	static int
				245	toggle_Magic(x)
				246	int x;
				247	{
				248	if (is_Magic(x))
				249	return un_Magic(x);
				250	return Magic(x);
				251	}
				252
				253	/*
				254	* The first byte of the regexp internal "program" is actually this magic
				255	* number; the start node begins in the second byte. It's used to catch the
				256	* most severe mutilation of the program by the caller.
				257	*/
				258
				259	#define REGMAGIC 0234
				260
				261	/*
				262	* Opcode notes:
				263	*
				264	* BRANCH The set of branches constituting a single choice are hooked
				265	* together with their "next" pointers, since precedence prevents
				266	* anything being concatenated to any individual branch. The
				267	* "next" pointer of the last BRANCH in a choice points to the
				268	* thing following the whole choice. This is also where the
				269	* final "next" pointer of each individual branch points; each
				270	* branch starts with the operand node of a BRANCH node.
				271	*
				272	* BACK Normal "next" pointers all implicitly point forward; BACK
				273	* exists to make loop structures possible.
				274	*
				275	* STAR,PLUS '=', and complex '*' and '+', are implemented as circular
				276	* BRANCH structures using BACK. Simple cases (one character
				277	* per match) are implemented with STAR and PLUS for speed
				278	* and to minimize recursive plunges.
				279	*
				280	* BRACE_LIMITS This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
				281	* node, and defines the min and max limits to be used for that
				282	* node.
				283	*
				284	* MOPEN,MCLOSE ...are numbered at compile time.
				285	* ZOPEN,ZCLOSE ...ditto
				286	*/
				287
				288	/*
				289	* A node is one char of opcode followed by two chars of "next" pointer.
				290	* "Next" pointers are stored as two 8-bit bytes, high order first. The
				291	* value is a positive offset from the opcode of the node containing it.
				292	* An operand, if any, simply follows the node. (Note that much of the
				293	* code generation knows about this implicit relationship.)
				294	*
				295	* Using two bytes for the "next" pointer is vast overkill for most things,
				296	* but allows patterns to get big without disasters.
				297	*/
				298	#define OP(p) ((int)*(p))
				299	#define NEXT(p) (((((p) + 1) & 0377) << 8) + (((p) + 2) & 0377))
				300	#define OPERAND(p) ((p) + 3)
				301	/* Obtain an operand that was stored as four bytes, MSB first. */
				302	#define OPERAND_MIN(p) (((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
				303	+ ((long)(p)[5] << 8) + (long)(p)[6])
				304	/* Obtain a second operand stored as four bytes. */
				305	#define OPERAND_MAX(p) OPERAND_MIN((p) + 4)
				306	/* Obtain a second single-byte operand stored after a four bytes operand. */
				307	#define OPERAND_CMP(p) (p)[7]
				308
				309	/*
				310	* Utility definitions.
				311	*/
				312	#define UCHARAT(p) ((int)(char_u )(p))
				313
				314	/* Used for an error (down from) vim_regcomp(): give the error message, set
				315	* rc_did_emsg and return NULL */
				316	#define EMSG_RET_NULL(m) { EMSG(m); rc_did_emsg = TRUE; return NULL; }
				317	#define EMSG_M_RET_NULL(m, c) { EMSG2(m, c ? "" : "\\"); rc_did_emsg = TRUE; return NULL; }
				318	#define EMSG_RET_FAIL(m) { EMSG(m); rc_did_emsg = TRUE; return FAIL; }
				319	#define EMSG_ONE_RET_NULL EMSG_M_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
				320
				321	#define MAX_LIMIT (32767L << 16L)
				322
				323	static int re_multi_type __ARGS((int));
				324	static int cstrncmp __ARGS((char_u s1, char_u s2, int *n));
				325	static char_u cstrchr __ARGS((char_u , int));
				326
				327	#ifdef DEBUG
				328	static void regdump __ARGS((char_u , regprog_T ));
				329	static char_u regprop __ARGS((char_u ));
				330	#endif
				331
				332	#define NOT_MULTI 0
				333	#define MULTI_ONE 1
				334	#define MULTI_MULT 2
				335	/*
				336	* Return NOT_MULTI if c is not a "multi" operator.
				337	* Return MULTI_ONE if c is a single "multi" operator.
				338	* Return MULTI_MULT if c is a multi "multi" operator.
				339	*/
				340	static int
				341	re_multi_type(c)
				342	int c;
				343	{
				344	if (c == Magic('@') \|\| c == Magic('=') \|\| c == Magic('?'))
				345	return MULTI_ONE;
				346	if (c == Magic('*') \|\| c == Magic('+') \|\| c == Magic('{'))
				347	return MULTI_MULT;
				348	return NOT_MULTI;
				349	}
				350
				351	/*
				352	* Flags to be passed up and down.
				353	*/
				354	#define HASWIDTH 0x1 /* Known never to match null string. */
				355	#define SIMPLE 0x2 /* Simple enough to be STAR/PLUS operand. */
				356	#define SPSTART 0x4 /* Starts with * or +. */
				357	#define HASNL 0x8 /* Contains some \n. */
				358	#define HASLOOKBH 0x10 /* Contains "\@<=" or "\@<!". */
				359	#define WORST 0 /* Worst case. */
				360
				361	/*
				362	* When regcode is set to this value, code is not emitted and size is computed
				363	* instead.
				364	*/
				365	#define JUST_CALC_SIZE ((char_u *) -1)
				366
				367	static char_u *reg_prev_sub;
				368
				369	/*
				370	* REGEXP_INRANGE contains all characters which are always special in a []
				371	* range after '\'.
				372	* REGEXP_ABBR contains all characters which act as abbreviations after '\'.
				373	* These are:
				374	* \n - New line (NL).
				375	* \r - Carriage Return (CR).
				376	* \t - Tab (TAB).
				377	* \e - Escape (ESC).
				378	* \b - Backspace (Ctrl_H).
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	379	* \d - Character code in decimal, eg \d123
				380	* \o - Character code in octal, eg \o80
				381	* \x - Character code in hex, eg \x4a
				382	* \u - Multibyte character code, eg \u20ac
				383	* \U - Long multibyte character code, eg \U12345678
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	384	*/
				385	static char_u REGEXP_INRANGE[] = "]^-n\\";
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	386	static char_u REGEXP_ABBR[] = "nrtebdoxuU";
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	387
				388	static int backslash_trans __ARGS((int c));
				389	static int skip_class_name __ARGS((char_u **pp));
				390	static char_u skip_anyof __ARGS((char_u p));
				391	static void init_class_tab __ARGS((void));
				392
				393	/*
				394	* Translate '\x' to its control character, except "\n", which is Magic.
				395	*/
				396	static int
				397	backslash_trans(c)
				398	int c;
				399	{
				400	switch (c)
				401	{
				402	case 'r': return CAR;
				403	case 't': return TAB;
				404	case 'e': return ESC;
				405	case 'b': return BS;
				406	}
				407	return c;
				408	}
				409
				410	/*
				411	* Check for a character class name. "pp" points to the '['.
				412	* Returns one of the CLASS_ items. CLASS_NONE means that no item was
				413	* recognized. Otherwise "pp" is advanced to after the item.
				414	*/
				415	static int
				416	skip_class_name(pp)
				417	char_u **pp;
				418	{
				419	static const char *(class_names[]) =
				420	{
				421	"alnum:]",
				422	#define CLASS_ALNUM 0
				423	"alpha:]",
				424	#define CLASS_ALPHA 1
				425	"blank:]",
				426	#define CLASS_BLANK 2
				427	"cntrl:]",
				428	#define CLASS_CNTRL 3
				429	"digit:]",
				430	#define CLASS_DIGIT 4
				431	"graph:]",
				432	#define CLASS_GRAPH 5
				433	"lower:]",
				434	#define CLASS_LOWER 6
				435	"print:]",
				436	#define CLASS_PRINT 7
				437	"punct:]",
				438	#define CLASS_PUNCT 8
				439	"space:]",
				440	#define CLASS_SPACE 9
				441	"upper:]",
				442	#define CLASS_UPPER 10
				443	"xdigit:]",
				444	#define CLASS_XDIGIT 11
				445	"tab:]",
				446	#define CLASS_TAB 12
				447	"return:]",
				448	#define CLASS_RETURN 13
				449	"backspace:]",
				450	#define CLASS_BACKSPACE 14
				451	"escape:]",
				452	#define CLASS_ESCAPE 15
				453	};
				454	#define CLASS_NONE 99
				455	int i;
				456
				457	if ((*pp)[1] == ':')
				458	{
				459	for (i = 0; i < sizeof(class_names) / sizeof(*class_names); ++i)
				460	if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
				461	{
				462	*pp += STRLEN(class_names[i]) + 2;
				463	return i;
				464	}
				465	}
				466	return CLASS_NONE;
				467	}
				468
				469	/*
				470	* Skip over a "[]" range.
				471	* "p" must point to the character after the '['.
				472	* The returned pointer is on the matching ']', or the terminating NUL.
				473	*/
				474	static char_u *
				475	skip_anyof(p)
				476	char_u *p;
				477	{
				478	int cpo_lit; /* 'cpoptions' contains 'l' flag */
				479	#ifdef FEAT_MBYTE
				480	int l;
				481	#endif
				482
				483	cpo_lit = (!reg_syn && vim_strchr(p_cpo, CPO_LITERAL) != NULL);
				484
				485	if (p == '^') / Complement of range. */
				486	++p;
				487	if (p == ']' \|\| p == '-')
				488	++p;
				489	while (p != NUL && p != ']')
				490	{
				491	#ifdef FEAT_MBYTE
				492	if (has_mbyte && (l = (*mb_ptr2len_check)(p)) > 1)
				493	p += l;
				494	else
				495	#endif
				496	if (*p == '-')
				497	{
				498	++p;
				499	if (p != ']' && p != NUL)
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	500	mb_ptr_adv(p);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	501	}
				502	else if (*p == '\\'
				503	&& (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
				504	\|\| (!cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
				505	p += 2;
				506	else if (*p == '[')
				507	{
				508	if (skip_class_name(&p) == CLASS_NONE)
				509	++p; /* It was not a class name */
				510	}
				511	else
				512	++p;
				513	}
				514
				515	return p;
				516	}
				517
				518	/*
				519	* Specific version of character class functions.
				520	* Using a table to keep this fast.
				521	*/
				522	static short class_tab[256];
				523
				524	#define RI_DIGIT 0x01
				525	#define RI_HEX 0x02
				526	#define RI_OCTAL 0x04
				527	#define RI_WORD 0x08
				528	#define RI_HEAD 0x10
				529	#define RI_ALPHA 0x20
				530	#define RI_LOWER 0x40
				531	#define RI_UPPER 0x80
				532	#define RI_WHITE 0x100
				533
				534	static void
				535	init_class_tab()
				536	{
				537	int i;
				538	static int done = FALSE;
				539
				540	if (done)
				541	return;
				542
				543	for (i = 0; i < 256; ++i)
				544	{
				545	if (i >= '0' && i <= '7')
				546	class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
				547	else if (i >= '8' && i <= '9')
				548	class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
				549	else if (i >= 'a' && i <= 'f')
				550	class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
				551	#ifdef EBCDIC
				552	else if ((i >= 'g' && i <= 'i') \|\| (i >= 'j' && i <= 'r')
				553	\|\| (i >= 's' && i <= 'z'))
				554	#else
				555	else if (i >= 'g' && i <= 'z')
				556	#endif
				557	class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
				558	else if (i >= 'A' && i <= 'F')
				559	class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
				560	#ifdef EBCDIC
				561	else if ((i >= 'G' && i <= 'I') \|\| ( i >= 'J' && i <= 'R')
				562	\|\| (i >= 'S' && i <= 'Z'))
				563	#else
				564	else if (i >= 'G' && i <= 'Z')
				565	#endif
				566	class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
				567	else if (i == '_')
				568	class_tab[i] = RI_WORD + RI_HEAD;
				569	else
				570	class_tab[i] = 0;
				571	}
				572	class_tab[' '] \|= RI_WHITE;
				573	class_tab['\t'] \|= RI_WHITE;
				574	done = TRUE;
				575	}
				576
				577	#ifdef FEAT_MBYTE
				578	# define ri_digit(c) (c < 0x100 && (class_tab[c] & RI_DIGIT))
				579	# define ri_hex(c) (c < 0x100 && (class_tab[c] & RI_HEX))
				580	# define ri_octal(c) (c < 0x100 && (class_tab[c] & RI_OCTAL))
				581	# define ri_word(c) (c < 0x100 && (class_tab[c] & RI_WORD))
				582	# define ri_head(c) (c < 0x100 && (class_tab[c] & RI_HEAD))
				583	# define ri_alpha(c) (c < 0x100 && (class_tab[c] & RI_ALPHA))
				584	# define ri_lower(c) (c < 0x100 && (class_tab[c] & RI_LOWER))
				585	# define ri_upper(c) (c < 0x100 && (class_tab[c] & RI_UPPER))
				586	# define ri_white(c) (c < 0x100 && (class_tab[c] & RI_WHITE))
				587	#else
				588	# define ri_digit(c) (class_tab[c] & RI_DIGIT)
				589	# define ri_hex(c) (class_tab[c] & RI_HEX)
				590	# define ri_octal(c) (class_tab[c] & RI_OCTAL)
				591	# define ri_word(c) (class_tab[c] & RI_WORD)
				592	# define ri_head(c) (class_tab[c] & RI_HEAD)
				593	# define ri_alpha(c) (class_tab[c] & RI_ALPHA)
				594	# define ri_lower(c) (class_tab[c] & RI_LOWER)
				595	# define ri_upper(c) (class_tab[c] & RI_UPPER)
				596	# define ri_white(c) (class_tab[c] & RI_WHITE)
				597	#endif
				598
				599	/* flags for regflags */
				600	#define RF_ICASE 1 /* ignore case */
				601	#define RF_NOICASE 2 /* don't ignore case */
				602	#define RF_HASNL 4 /* can match a NL */
				603	#define RF_ICOMBINE 8 /* ignore combining characters */
				604	#define RF_LOOKBH 16 /* uses "\@<=" or "\@<!" */
				605
				606	/*
				607	* Global work variables for vim_regcomp().
				608	*/
				609
				610	static char_u regparse; / Input-scan pointer. */
				611	static int prevchr_len; /* byte length of previous char */
				612	static int num_complex_braces; /* Complex \{...} count */
				613	static int regnpar; /* () count. */
				614	#ifdef FEAT_SYN_HL
				615	static int regnzpar; /* \z() count. */
				616	static int re_has_z; /* \z item detected */
				617	#endif
				618	static char_u regcode; / Code-emit pointer, or JUST_CALC_SIZE */
				619	static long regsize; /* Code size. */
				620	static char_u had_endbrace[NSUBEXP]; /* flags, TRUE if end of () found */
				621	static unsigned regflags; /* RF_ flags for prog */
				622	static long brace_min[10]; /* Minimums for complex brace repeats */
				623	static long brace_max[10]; /* Maximums for complex brace repeats */
				624	static int brace_count[10]; /* Current counts for complex brace repeats */
				625	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				626	static int had_eol; /* TRUE when EOL found by vim_regcomp() */
				627	#endif
				628	static int one_exactly = FALSE; /* only do one char for EXACTLY */
				629
				630	static int reg_magic; /* magicness of the pattern: */
				631	#define MAGIC_NONE 1 /* "\V" very unmagic */
				632	#define MAGIC_OFF 2 /* "\M" or 'magic' off */
				633	#define MAGIC_ON 3 /* "\m" or 'magic' */
				634	#define MAGIC_ALL 4 /* "\v" very magic */
				635
				636	static int reg_string; /* matching with a string instead of a buffer
				637	line */
				638
				639	/*
				640	* META contains all characters that may be magic, except '^' and '$'.
				641	*/
				642
				643	#ifdef EBCDIC
				644	static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{\|~";
				645	#else
				646	/* META[] is used often enough to justify turning it into a table. */
				647	static char_u META_flags[] = {
				648	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				649	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
				650	/* % & ( ) * + . */
				651	0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
				652	/* 1 2 3 4 5 6 7 8 9 < = > ? */
				653	0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
				654	/* @ A C D F H I K L M O */
				655	1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
				656	/* P S U V W X Z [ _ */
				657	1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
				658	/* a c d f h i k l m n o */
				659	0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
				660	/* p s u v w x z { \| ~ */
				661	1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
				662	};
				663	#endif
				664
				665	static int curchr;
				666
				667	/* arguments for reg() */
				668	#define REG_NOPAREN 0 /* toplevel reg() */
				669	#define REG_PAREN 1 /* */
				670	#define REG_ZPAREN 2 /* \z(\) */
				671	#define REG_NPAREN 3 /* \%(\) */
				672
				673	/*
				674	* Forward declarations for vim_regcomp()'s friends.
				675	*/
				676	static void initchr __ARGS((char_u *));
				677	static int getchr __ARGS((void));
				678	static void skipchr_keepstart __ARGS((void));
				679	static int peekchr __ARGS((void));
				680	static void skipchr __ARGS((void));
				681	static void ungetchr __ARGS((void));
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	682	static int gethexchrs __ARGS((int maxinputlen));
				683	static int getoctchrs __ARGS((void));
				684	static int getdecchrs __ARGS((void));
				685	static int coll_get_char __ARGS((void));
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	686	static void regcomp_start __ARGS((char_u *expr, int flags));
				687	static char_u reg __ARGS((int, int ));
				688	static char_u regbranch __ARGS((int flagp));
				689	static char_u regconcat __ARGS((int flagp));
				690	static char_u regpiece __ARGS((int ));
				691	static char_u regatom __ARGS((int ));
				692	static char_u *regnode __ARGS((int));
				693	static int prog_magic_wrong __ARGS((void));
				694	static char_u regnext __ARGS((char_u ));
				695	static void regc __ARGS((int b));
				696	#ifdef FEAT_MBYTE
				697	static void regmbc __ARGS((int c));
				698	#endif
				699	static void reginsert __ARGS((int, char_u *));
				700	static void reginsert_limits __ARGS((int, long, long, char_u *));
				701	static char_u re_put_long __ARGS((char_u pr, long_u val));
				702	static int read_limits __ARGS((long , long ));
				703	static void regtail __ARGS((char_u , char_u ));
				704	static void regoptail __ARGS((char_u , char_u ));
				705
				706	/*
				707	* Return TRUE if compiled regular expression "prog" can match a line break.
				708	*/
				709	int
				710	re_multiline(prog)
				711	regprog_T *prog;
				712	{
				713	return (prog->regflags & RF_HASNL);
				714	}
				715
				716	/*
				717	* Return TRUE if compiled regular expression "prog" looks before the start
				718	* position (pattern contains "\@<=" or "\@<!").
				719	*/
				720	int
				721	re_lookbehind(prog)
				722	regprog_T *prog;
				723	{
				724	return (prog->regflags & RF_LOOKBH);
				725	}
				726
				727	/*
				728	* Skip past regular expression.
Bram Moolenaar	86b6835	2004-12-27 21:59:20 +0000	[diff] [blame]	729	* Stop at end of 'p' or where 'dirc' is found ('/', '?', etc).
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	730	* Take care of characters with a backslash in front of it.
				731	* Skip strings inside [ and ].
				732	* When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
				733	* expression and change "\?" to "?". If "*newp" is not NULL the expression
				734	* is changed in-place.
				735	*/
				736	char_u *
				737	skip_regexp(startp, dirc, magic, newp)
				738	char_u *startp;
				739	int dirc;
				740	int magic;
				741	char_u **newp;
				742	{
				743	int mymagic;
				744	char_u *p = startp;
				745
				746	if (magic)
				747	mymagic = MAGIC_ON;
				748	else
				749	mymagic = MAGIC_OFF;
				750
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	751	for (; p[0] != NUL; mb_ptr_adv(p))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	752	{
				753	if (p[0] == dirc) /* found end of regexp */
				754	break;
				755	if ((p[0] == '[' && mymagic >= MAGIC_ON)
				756	\|\| (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
				757	{
				758	p = skip_anyof(p + 1);
				759	if (p[0] == NUL)
				760	break;
				761	}
				762	else if (p[0] == '\\' && p[1] != NUL)
				763	{
				764	if (dirc == '?' && newp != NULL && p[1] == '?')
				765	{
				766	/* change "\?" to "?", make a copy first. */
				767	if (*newp == NULL)
				768	{
				769	*newp = vim_strsave(startp);
				770	if (*newp != NULL)
				771	p = *newp + (p - startp);
				772	}
				773	if (*newp != NULL)
				774	mch_memmove(p, p + 1, STRLEN(p));
				775	else
				776	++p;
				777	}
				778	else
				779	++p; /* skip next character */
				780	if (*p == 'v')
				781	mymagic = MAGIC_ALL;
				782	else if (*p == 'V')
				783	mymagic = MAGIC_NONE;
				784	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	785	}
				786	return p;
				787	}
				788
				789	/*
Bram Moolenaar	86b6835	2004-12-27 21:59:20 +0000	[diff] [blame]	790	* vim_regcomp() - compile a regular expression into internal code
				791	* Returns the program in allocated space. Returns NULL for an error.
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	792	*
				793	* We can't allocate space until we know how big the compiled form will be,
				794	* but we can't compile it (and thus know how big it is) until we've got a
				795	* place to put the code. So we cheat: we compile it twice, once with code
				796	* generation turned off and size counting turned on, and once "for real".
				797	* This also means that we don't allocate space until we are sure that the
				798	* thing really will compile successfully, and we never have to move the
				799	* code and thus invalidate pointers into it. (Note that it has to be in
				800	* one piece because vim_free() must be able to free it all.)
				801	*
				802	* Whether upper/lower case is to be ignored is decided when executing the
				803	* program, it does not matter here.
				804	*
				805	* Beware that the optimization-preparation code in here knows about some
				806	* of the structure of the compiled regexp.
				807	* "re_flags": RE_MAGIC and/or RE_STRING.
				808	*/
				809	regprog_T *
				810	vim_regcomp(expr, re_flags)
				811	char_u *expr;
				812	int re_flags;
				813	{
				814	regprog_T *r;
				815	char_u *scan;
				816	char_u *longest;
				817	int len;
				818	int flags;
				819
				820	if (expr == NULL)
				821	EMSG_RET_NULL(_(e_null));
				822
				823	init_class_tab();
				824
				825	/*
				826	* First pass: determine size, legality.
				827	*/
				828	regcomp_start(expr, re_flags);
				829	regcode = JUST_CALC_SIZE;
				830	regc(REGMAGIC);
				831	if (reg(REG_NOPAREN, &flags) == NULL)
				832	return NULL;
				833
				834	/* Small enough for pointer-storage convention? */
				835	#ifdef SMALL_MALLOC /* 16 bit storage allocation */
				836	if (regsize >= 65536L - 256L)
				837	EMSG_RET_NULL(_("E339: Pattern too long"));
				838	#endif
				839
				840	/* Allocate space. */
				841	r = (regprog_T *)lalloc(sizeof(regprog_T) + regsize, TRUE);
				842	if (r == NULL)
				843	return NULL;
				844
				845	/*
				846	* Second pass: emit code.
				847	*/
				848	regcomp_start(expr, re_flags);
				849	regcode = r->program;
				850	regc(REGMAGIC);
				851	if (reg(REG_NOPAREN, &flags) == NULL)
				852	{
				853	vim_free(r);
				854	return NULL;
				855	}
				856
				857	/* Dig out information for optimizations. */
				858	r->regstart = NUL; /* Worst-case defaults. */
				859	r->reganch = 0;
				860	r->regmust = NULL;
				861	r->regmlen = 0;
				862	r->regflags = regflags;
				863	if (flags & HASNL)
				864	r->regflags \|= RF_HASNL;
				865	if (flags & HASLOOKBH)
				866	r->regflags \|= RF_LOOKBH;
				867	#ifdef FEAT_SYN_HL
				868	/* Remember whether this pattern has any \z specials in it. */
				869	r->reghasz = re_has_z;
				870	#endif
				871	scan = r->program + 1; /* First BRANCH. */
				872	if (OP(regnext(scan)) == END) /* Only one top-level choice. */
				873	{
				874	scan = OPERAND(scan);
				875
				876	/* Starting-point info. */
				877	if (OP(scan) == BOL \|\| OP(scan) == RE_BOF)
				878	{
				879	r->reganch++;
				880	scan = regnext(scan);
				881	}
				882
				883	if (OP(scan) == EXACTLY)
				884	{
				885	#ifdef FEAT_MBYTE
				886	if (has_mbyte)
				887	r->regstart = (*mb_ptr2char)(OPERAND(scan));
				888	else
				889	#endif
				890	r->regstart = *OPERAND(scan);
				891	}
				892	else if ((OP(scan) == BOW
				893	\|\| OP(scan) == EOW
				894	\|\| OP(scan) == NOTHING
				895	\|\| OP(scan) == MOPEN + 0 \|\| OP(scan) == NOPEN
				896	\|\| OP(scan) == MCLOSE + 0 \|\| OP(scan) == NCLOSE)
				897	&& OP(regnext(scan)) == EXACTLY)
				898	{
				899	#ifdef FEAT_MBYTE
				900	if (has_mbyte)
				901	r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
				902	else
				903	#endif
				904	r->regstart = *OPERAND(regnext(scan));
				905	}
				906
				907	/*
				908	* If there's something expensive in the r.e., find the longest
				909	* literal string that must appear and make it the regmust. Resolve
				910	* ties in favor of later strings, since the regstart check works
				911	* with the beginning of the r.e. and avoiding duplication
				912	* strengthens checking. Not a strong reason, but sufficient in the
				913	* absence of others.
				914	*/
				915	/*
				916	* When the r.e. starts with BOW, it is faster to look for a regmust
				917	* first. Used a lot for "#" and "*" commands. (Added by mool).
				918	*/
				919	if ((flags & SPSTART \|\| OP(scan) == BOW \|\| OP(scan) == EOW)
				920	&& !(flags & HASNL))
				921	{
				922	longest = NULL;
				923	len = 0;
				924	for (; scan != NULL; scan = regnext(scan))
				925	if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
				926	{
				927	longest = OPERAND(scan);
				928	len = (int)STRLEN(OPERAND(scan));
				929	}
				930	r->regmust = longest;
				931	r->regmlen = len;
				932	}
				933	}
				934	#ifdef DEBUG
				935	regdump(expr, r);
				936	#endif
				937	return r;
				938	}
				939
				940	/*
				941	* Setup to parse the regexp. Used once to get the length and once to do it.
				942	*/
				943	static void
				944	regcomp_start(expr, re_flags)
				945	char_u *expr;
				946	int re_flags; /* see vim_regcomp() */
				947	{
				948	initchr(expr);
				949	if (re_flags & RE_MAGIC)
				950	reg_magic = MAGIC_ON;
				951	else
				952	reg_magic = MAGIC_OFF;
				953	reg_string = (re_flags & RE_STRING);
				954
				955	num_complex_braces = 0;
				956	regnpar = 1;
				957	vim_memset(had_endbrace, 0, sizeof(had_endbrace));
				958	#ifdef FEAT_SYN_HL
				959	regnzpar = 1;
				960	re_has_z = 0;
				961	#endif
				962	regsize = 0L;
				963	regflags = 0;
				964	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				965	had_eol = FALSE;
				966	#endif
				967	}
				968
				969	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				970	/*
				971	* Check if during the previous call to vim_regcomp the EOL item "$" has been
				972	* found. This is messy, but it works fine.
				973	*/
				974	int
				975	vim_regcomp_had_eol()
				976	{
				977	return had_eol;
				978	}
				979	#endif
				980
				981	/*
				982	* reg - regular expression, i.e. main body or parenthesized thing
				983	*
				984	* Caller must absorb opening parenthesis.
				985	*
				986	* Combining parenthesis handling with the base level of regular expression
				987	* is a trifle forced, but the need to tie the tails of the branches to what
				988	* follows makes it hard to avoid.
				989	*/
				990	static char_u *
				991	reg(paren, flagp)
				992	int paren; /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
				993	int *flagp;
				994	{
				995	char_u *ret;
				996	char_u *br;
				997	char_u *ender;
				998	int parno = 0;
				999	int flags;
				1000
				1001	flagp = HASWIDTH; / Tentatively. */
				1002
				1003	#ifdef FEAT_SYN_HL
				1004	if (paren == REG_ZPAREN)
				1005	{
				1006	/* Make a ZOPEN node. */
				1007	if (regnzpar >= NSUBEXP)
				1008	EMSG_RET_NULL(_("E50: Too many \\z("));
				1009	parno = regnzpar;
				1010	regnzpar++;
				1011	ret = regnode(ZOPEN + parno);
				1012	}
				1013	else
				1014	#endif
				1015	if (paren == REG_PAREN)
				1016	{
				1017	/* Make a MOPEN node. */
				1018	if (regnpar >= NSUBEXP)
				1019	EMSG_M_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
				1020	parno = regnpar;
				1021	++regnpar;
				1022	ret = regnode(MOPEN + parno);
				1023	}
				1024	else if (paren == REG_NPAREN)
				1025	{
				1026	/* Make a NOPEN node. */
				1027	ret = regnode(NOPEN);
				1028	}
				1029	else
				1030	ret = NULL;
				1031
				1032	/* Pick up the branches, linking them together. */
				1033	br = regbranch(&flags);
				1034	if (br == NULL)
				1035	return NULL;
				1036	if (ret != NULL)
				1037	regtail(ret, br); /* [MZ]OPEN -> first. */
				1038	else
				1039	ret = br;
				1040	/* If one of the branches can be zero-width, the whole thing can.
				1041	* If one of the branches has * at start or matches a line-break, the
				1042	* whole thing can. */
				1043	if (!(flags & HASWIDTH))
				1044	*flagp &= ~HASWIDTH;
				1045	*flagp \|= flags & (SPSTART \| HASNL \| HASLOOKBH);
				1046	while (peekchr() == Magic('\|'))
				1047	{
				1048	skipchr();
				1049	br = regbranch(&flags);
				1050	if (br == NULL)
				1051	return NULL;
				1052	regtail(ret, br); /* BRANCH -> BRANCH. */
				1053	if (!(flags & HASWIDTH))
				1054	*flagp &= ~HASWIDTH;
				1055	*flagp \|= flags & (SPSTART \| HASNL \| HASLOOKBH);
				1056	}
				1057
				1058	/* Make a closing node, and hook it on the end. */
				1059	ender = regnode(
				1060	#ifdef FEAT_SYN_HL
				1061	paren == REG_ZPAREN ? ZCLOSE + parno :
				1062	#endif
				1063	paren == REG_PAREN ? MCLOSE + parno :
				1064	paren == REG_NPAREN ? NCLOSE : END);
				1065	regtail(ret, ender);
				1066
				1067	/* Hook the tails of the branches to the closing node. */
				1068	for (br = ret; br != NULL; br = regnext(br))
				1069	regoptail(br, ender);
				1070
				1071	/* Check for proper termination. */
				1072	if (paren != REG_NOPAREN && getchr() != Magic(')'))
				1073	{
				1074	#ifdef FEAT_SYN_HL
				1075	if (paren == REG_ZPAREN)
				1076	EMSG_RET_NULL(_("E52: Unmatched \\z("))
				1077	else
				1078	#endif
				1079	if (paren == REG_NPAREN)
				1080	EMSG_M_RET_NULL(_("E53: Unmatched %s%%("), reg_magic == MAGIC_ALL)
				1081	else
				1082	EMSG_M_RET_NULL(_("E54: Unmatched %s("), reg_magic == MAGIC_ALL)
				1083	}
				1084	else if (paren == REG_NOPAREN && peekchr() != NUL)
				1085	{
				1086	if (curchr == Magic(')'))
				1087	EMSG_M_RET_NULL(_("E55: Unmatched %s)"), reg_magic == MAGIC_ALL)
				1088	else
				1089	EMSG_RET_NULL(_(e_trailing)) /* "Can't happen". */
				1090	/* NOTREACHED */
				1091	}
				1092	/*
				1093	* Here we set the flag allowing back references to this set of
				1094	* parentheses.
				1095	*/
				1096	if (paren == REG_PAREN)
				1097	had_endbrace[parno] = TRUE; /* have seen the close paren */
				1098	return ret;
				1099	}
				1100
				1101	/*
				1102	* regbranch - one alternative of an \| operator
				1103	*
				1104	* Implements the & operator.
				1105	*/
				1106	static char_u *
				1107	regbranch(flagp)
				1108	int *flagp;
				1109	{
				1110	char_u *ret;
				1111	char_u *chain = NULL;
				1112	char_u *latest;
				1113	int flags;
				1114
				1115	flagp = WORST \| HASNL; / Tentatively. */
				1116
				1117	ret = regnode(BRANCH);
				1118	for (;;)
				1119	{
				1120	latest = regconcat(&flags);
				1121	if (latest == NULL)
				1122	return NULL;
				1123	/* If one of the branches has width, the whole thing has. If one of
				1124	* the branches anchors at start-of-line, the whole thing does.
				1125	* If one of the branches uses look-behind, the whole thing does. */
				1126	*flagp \|= flags & (HASWIDTH \| SPSTART \| HASLOOKBH);
				1127	/* If one of the branches doesn't match a line-break, the whole thing
				1128	* doesn't. */
				1129	*flagp &= ~HASNL \| (flags & HASNL);
				1130	if (chain != NULL)
				1131	regtail(chain, latest);
				1132	if (peekchr() != Magic('&'))
				1133	break;
				1134	skipchr();
				1135	regtail(latest, regnode(END)); /* operand ends */
				1136	reginsert(MATCH, latest);
				1137	chain = latest;
				1138	}
				1139
				1140	return ret;
				1141	}
				1142
				1143	/*
				1144	* regbranch - one alternative of an \| or & operator
				1145	*
				1146	* Implements the concatenation operator.
				1147	*/
				1148	static char_u *
				1149	regconcat(flagp)
				1150	int *flagp;
				1151	{
				1152	char_u *first = NULL;
				1153	char_u *chain = NULL;
				1154	char_u *latest;
				1155	int flags;
				1156	int cont = TRUE;
				1157
				1158	flagp = WORST; / Tentatively. */
				1159
				1160	while (cont)
				1161	{
				1162	switch (peekchr())
				1163	{
				1164	case NUL:
				1165	case Magic('\|'):
				1166	case Magic('&'):
				1167	case Magic(')'):
				1168	cont = FALSE;
				1169	break;
				1170	case Magic('Z'):
				1171	#ifdef FEAT_MBYTE
				1172	regflags \|= RF_ICOMBINE;
				1173	#endif
				1174	skipchr_keepstart();
				1175	break;
				1176	case Magic('c'):
				1177	regflags \|= RF_ICASE;
				1178	skipchr_keepstart();
				1179	break;
				1180	case Magic('C'):
				1181	regflags \|= RF_NOICASE;
				1182	skipchr_keepstart();
				1183	break;
				1184	case Magic('v'):
				1185	reg_magic = MAGIC_ALL;
				1186	skipchr_keepstart();
				1187	curchr = -1;
				1188	break;
				1189	case Magic('m'):
				1190	reg_magic = MAGIC_ON;
				1191	skipchr_keepstart();
				1192	curchr = -1;
				1193	break;
				1194	case Magic('M'):
				1195	reg_magic = MAGIC_OFF;
				1196	skipchr_keepstart();
				1197	curchr = -1;
				1198	break;
				1199	case Magic('V'):
				1200	reg_magic = MAGIC_NONE;
				1201	skipchr_keepstart();
				1202	curchr = -1;
				1203	break;
				1204	default:
				1205	latest = regpiece(&flags);
				1206	if (latest == NULL)
				1207	return NULL;
				1208	*flagp \|= flags & (HASWIDTH \| HASNL \| HASLOOKBH);
				1209	if (chain == NULL) /* First piece. */
				1210	*flagp \|= flags & SPSTART;
				1211	else
				1212	regtail(chain, latest);
				1213	chain = latest;
				1214	if (first == NULL)
				1215	first = latest;
				1216	break;
				1217	}
				1218	}
				1219	if (first == NULL) /* Loop ran zero times. */
				1220	first = regnode(NOTHING);
				1221	return first;
				1222	}
				1223
				1224	/*
				1225	* regpiece - something followed by possible [*+=]
				1226	*
				1227	* Note that the branching code sequences used for = and the general cases
				1228	* of * and + are somewhat optimized: they use the same NOTHING node as
				1229	* both the endmarker for their branch list and the body of the last branch.
				1230	* It might seem that this node could be dispensed with entirely, but the
				1231	* endmarker role is not redundant.
				1232	*/
				1233	static char_u *
				1234	regpiece(flagp)
				1235	int *flagp;
				1236	{
				1237	char_u *ret;
				1238	int op;
				1239	char_u *next;
				1240	int flags;
				1241	long minval;
				1242	long maxval;
				1243
				1244	ret = regatom(&flags);
				1245	if (ret == NULL)
				1246	return NULL;
				1247
				1248	op = peekchr();
				1249	if (re_multi_type(op) == NOT_MULTI)
				1250	{
				1251	*flagp = flags;
				1252	return ret;
				1253	}
				1254	if (!(flags & HASWIDTH) && re_multi_type(op) == MULTI_MULT)
				1255	{
				1256	if (op == Magic('*'))
				1257	EMSG_M_RET_NULL(_("E56: %s* operand could be empty"),
				1258	reg_magic >= MAGIC_ON);
				1259	if (op == Magic('+'))
				1260	EMSG_M_RET_NULL(_("E57: %s+ operand could be empty"),
				1261	reg_magic == MAGIC_ALL);
				1262	/* "\{}" is checked below, it's allowed when there is an upper limit */
				1263	}
				1264	/* default flags */
				1265	*flagp = (WORST \| SPSTART \| (flags & (HASNL \| HASLOOKBH)));
				1266
				1267	skipchr();
				1268	switch (op)
				1269	{
				1270	case Magic('*'):
				1271	if (flags & SIMPLE)
				1272	reginsert(STAR, ret);
				1273	else
				1274	{
				1275	/* Emit x* as (x&\|), where & means "self". */
				1276	reginsert(BRANCH, ret); /* Either x */
				1277	regoptail(ret, regnode(BACK)); /* and loop */
				1278	regoptail(ret, ret); /* back */
				1279	regtail(ret, regnode(BRANCH)); /* or */
				1280	regtail(ret, regnode(NOTHING)); /* null. */
				1281	}
				1282	break;
				1283
				1284	case Magic('+'):
				1285	if (flags & SIMPLE)
				1286	reginsert(PLUS, ret);
				1287	else
				1288	{
				1289	/* Emit x+ as x(&\|), where & means "self". */
				1290	next = regnode(BRANCH); /* Either */
				1291	regtail(ret, next);
				1292	regtail(regnode(BACK), ret); /* loop back */
				1293	regtail(next, regnode(BRANCH)); /* or */
				1294	regtail(ret, regnode(NOTHING)); /* null. */
				1295	}
				1296	*flagp = (WORST \| HASWIDTH \| (flags & (HASNL \| HASLOOKBH)));
				1297	break;
				1298
				1299	case Magic('@'):
				1300	{
				1301	int lop = END;
				1302
				1303	switch (no_Magic(getchr()))
				1304	{
				1305	case '=': lop = MATCH; break; /* \@= */
				1306	case '!': lop = NOMATCH; break; /* \@! */
				1307	case '>': lop = SUBPAT; break; /* \@> */
				1308	case '<': switch (no_Magic(getchr()))
				1309	{
				1310	case '=': lop = BEHIND; break; /* \@<= */
				1311	case '!': lop = NOBEHIND; break; /* \@<! */
				1312	}
				1313	}
				1314	if (lop == END)
				1315	EMSG_M_RET_NULL(_("E59: invalid character after %s@"),
				1316	reg_magic == MAGIC_ALL);
				1317	/* Look behind must match with behind_pos. */
				1318	if (lop == BEHIND \|\| lop == NOBEHIND)
				1319	{
				1320	regtail(ret, regnode(BHPOS));
				1321	*flagp \|= HASLOOKBH;
				1322	}
				1323	regtail(ret, regnode(END)); /* operand ends */
				1324	reginsert(lop, ret);
				1325	break;
				1326	}
				1327
				1328	case Magic('?'):
				1329	case Magic('='):
				1330	/* Emit x= as (x\|) */
				1331	reginsert(BRANCH, ret); /* Either x */
				1332	regtail(ret, regnode(BRANCH)); /* or */
				1333	next = regnode(NOTHING); /* null. */
				1334	regtail(ret, next);
				1335	regoptail(ret, next);
				1336	break;
				1337
				1338	case Magic('{'):
				1339	if (!read_limits(&minval, &maxval))
				1340	return NULL;
				1341	if (!(flags & HASWIDTH) && (maxval > minval
				1342	? maxval >= MAX_LIMIT : minval >= MAX_LIMIT))
				1343	EMSG_M_RET_NULL(_("E58: %s{ operand could be empty"),
				1344	reg_magic == MAGIC_ALL);
				1345	if (flags & SIMPLE)
				1346	{
				1347	reginsert(BRACE_SIMPLE, ret);
				1348	reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
				1349	}
				1350	else
				1351	{
				1352	if (num_complex_braces >= 10)
				1353	EMSG_M_RET_NULL(_("E60: Too many complex %s{...}s"),
				1354	reg_magic == MAGIC_ALL);
				1355	reginsert(BRACE_COMPLEX + num_complex_braces, ret);
				1356	regoptail(ret, regnode(BACK));
				1357	regoptail(ret, ret);
				1358	reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
				1359	++num_complex_braces;
				1360	}
				1361	if (minval > 0 && maxval > 0)
				1362	*flagp = (HASWIDTH \| (flags & (HASNL \| HASLOOKBH)));
				1363	break;
				1364	}
				1365	if (re_multi_type(peekchr()) != NOT_MULTI)
				1366	{
				1367	/* Can't have a multi follow a multi. */
				1368	if (peekchr() == Magic('*'))
				1369	sprintf((char )IObuff, _("E61: Nested %s"),
				1370	reg_magic >= MAGIC_ON ? "" : "\\");
				1371	else
				1372	sprintf((char *)IObuff, _("E62: Nested %s%c"),
				1373	reg_magic == MAGIC_ALL ? "" : "\\", no_Magic(peekchr()));
				1374	EMSG_RET_NULL(IObuff);
				1375	}
				1376
				1377	return ret;
				1378	}
				1379
				1380	/*
				1381	* regatom - the lowest level
				1382	*
				1383	* Optimization: gobbles an entire sequence of ordinary characters so that
				1384	* it can turn them into a single node, which is smaller to store and
				1385	* faster to run. Don't do this when one_exactly is set.
				1386	*/
				1387	static char_u *
				1388	regatom(flagp)
				1389	int *flagp;
				1390	{
				1391	char_u *ret;
				1392	int flags;
				1393	int cpo_lit; /* 'cpoptions' contains 'l' flag */
				1394	int c;
				1395	static char_u classchars = (char_u )".iIkKfFpPsSdDxXoOwWhHaAlLuU";
				1396	static int classcodes[] = {ANY, IDENT, SIDENT, KWORD, SKWORD,
				1397	FNAME, SFNAME, PRINT, SPRINT,
				1398	WHITE, NWHITE, DIGIT, NDIGIT,
				1399	HEX, NHEX, OCTAL, NOCTAL,
				1400	WORD, NWORD, HEAD, NHEAD,
				1401	ALPHA, NALPHA, LOWER, NLOWER,
				1402	UPPER, NUPPER
				1403	};
				1404	char_u *p;
				1405	int extra = 0;
				1406
				1407	flagp = WORST; / Tentatively. */
				1408	cpo_lit = (!reg_syn && vim_strchr(p_cpo, CPO_LITERAL) != NULL);
				1409
				1410	c = getchr();
				1411	switch (c)
				1412	{
				1413	case Magic('^'):
				1414	ret = regnode(BOL);
				1415	break;
				1416
				1417	case Magic('$'):
				1418	ret = regnode(EOL);
				1419	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				1420	had_eol = TRUE;
				1421	#endif
				1422	break;
				1423
				1424	case Magic('<'):
				1425	ret = regnode(BOW);
				1426	break;
				1427
				1428	case Magic('>'):
				1429	ret = regnode(EOW);
				1430	break;
				1431
				1432	case Magic('_'):
				1433	c = no_Magic(getchr());
				1434	if (c == '^') /* "\_^" is start-of-line */
				1435	{
				1436	ret = regnode(BOL);
				1437	break;
				1438	}
				1439	if (c == '$') /* "\_$" is end-of-line */
				1440	{
				1441	ret = regnode(EOL);
				1442	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				1443	had_eol = TRUE;
				1444	#endif
				1445	break;
				1446	}
				1447
				1448	extra = ADD_NL;
				1449	*flagp \|= HASNL;
				1450
				1451	/* "\_[" is character range plus newline */
				1452	if (c == '[')
				1453	goto collection;
				1454
				1455	/* "\_x" is character class plus newline */
				1456	/FALLTHROUGH/
				1457
				1458	/*
				1459	* Character classes.
				1460	*/
				1461	case Magic('.'):
				1462	case Magic('i'):
				1463	case Magic('I'):
				1464	case Magic('k'):
				1465	case Magic('K'):
				1466	case Magic('f'):
				1467	case Magic('F'):
				1468	case Magic('p'):
				1469	case Magic('P'):
				1470	case Magic('s'):
				1471	case Magic('S'):
				1472	case Magic('d'):
				1473	case Magic('D'):
				1474	case Magic('x'):
				1475	case Magic('X'):
				1476	case Magic('o'):
				1477	case Magic('O'):
				1478	case Magic('w'):
				1479	case Magic('W'):
				1480	case Magic('h'):
				1481	case Magic('H'):
				1482	case Magic('a'):
				1483	case Magic('A'):
				1484	case Magic('l'):
				1485	case Magic('L'):
				1486	case Magic('u'):
				1487	case Magic('U'):
				1488	p = vim_strchr(classchars, no_Magic(c));
				1489	if (p == NULL)
				1490	EMSG_RET_NULL(_("E63: invalid use of \\_"));
				1491	ret = regnode(classcodes[p - classchars] + extra);
				1492	*flagp \|= HASWIDTH \| SIMPLE;
				1493	break;
				1494
				1495	case Magic('n'):
				1496	if (reg_string)
				1497	{
				1498	/* In a string "\n" matches a newline character. */
				1499	ret = regnode(EXACTLY);
				1500	regc(NL);
				1501	regc(NUL);
				1502	*flagp \|= HASWIDTH \| SIMPLE;
				1503	}
				1504	else
				1505	{
				1506	/* In buffer text "\n" matches the end of a line. */
				1507	ret = regnode(NEWL);
				1508	*flagp \|= HASWIDTH \| HASNL;
				1509	}
				1510	break;
				1511
				1512	case Magic('('):
				1513	if (one_exactly)
				1514	EMSG_ONE_RET_NULL;
				1515	ret = reg(REG_PAREN, &flags);
				1516	if (ret == NULL)
				1517	return NULL;
				1518	*flagp \|= flags & (HASWIDTH \| SPSTART \| HASNL \| HASLOOKBH);
				1519	break;
				1520
				1521	case NUL:
				1522	case Magic('\|'):
				1523	case Magic('&'):
				1524	case Magic(')'):
				1525	EMSG_RET_NULL(_(e_internal)); /* Supposed to be caught earlier. */
				1526	/* NOTREACHED */
				1527
				1528	case Magic('='):
				1529	case Magic('?'):
				1530	case Magic('+'):
				1531	case Magic('@'):
				1532	case Magic('{'):
				1533	case Magic('*'):
				1534	c = no_Magic(c);
				1535	sprintf((char *)IObuff, _("E64: %s%c follows nothing"),
				1536	(c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL)
				1537	? "" : "\\", c);
				1538	EMSG_RET_NULL(IObuff);
				1539	/* NOTREACHED */
				1540
				1541	case Magic('~'): /* previous substitute pattern */
				1542	if (reg_prev_sub)
				1543	{
				1544	char_u *lp;
				1545
				1546	ret = regnode(EXACTLY);
				1547	lp = reg_prev_sub;
				1548	while (*lp != NUL)
				1549	regc(*lp++);
				1550	regc(NUL);
				1551	if (*reg_prev_sub != NUL)
				1552	{
				1553	*flagp \|= HASWIDTH;
				1554	if ((lp - reg_prev_sub) == 1)
				1555	*flagp \|= SIMPLE;
				1556	}
				1557	}
				1558	else
				1559	EMSG_RET_NULL(_(e_nopresub));
				1560	break;
				1561
				1562	case Magic('1'):
				1563	case Magic('2'):
				1564	case Magic('3'):
				1565	case Magic('4'):
				1566	case Magic('5'):
				1567	case Magic('6'):
				1568	case Magic('7'):
				1569	case Magic('8'):
				1570	case Magic('9'):
				1571	{
				1572	int refnum;
				1573
				1574	refnum = c - Magic('0');
				1575	/*
				1576	* Check if the back reference is legal. We must have seen the
				1577	* close brace.
				1578	* TODO: Should also check that we don't refer to something
				1579	* that is repeated (+*=): what instance of the repetition
				1580	* should we match?
				1581	*/
				1582	if (!had_endbrace[refnum])
				1583	{
				1584	/* Trick: check if "@<=" or "@<!" follows, in which case
				1585	* the \1 can appear before the referenced match. */
				1586	for (p = regparse; *p != NUL; ++p)
				1587	if (p[0] == '@' && p[1] == '<'
				1588	&& (p[2] == '!' \|\| p[2] == '='))
				1589	break;
				1590	if (*p == NUL)
				1591	EMSG_RET_NULL(_("E65: Illegal back reference"));
				1592	}
				1593	ret = regnode(BACKREF + refnum);
				1594	}
				1595	break;
				1596
				1597	#ifdef FEAT_SYN_HL
				1598	case Magic('z'):
				1599	{
				1600	c = no_Magic(getchr());
				1601	switch (c)
				1602	{
				1603	case '(': if (reg_do_extmatch != REX_SET)
				1604	EMSG_RET_NULL(_("E66: \\z( not allowed here"));
				1605	if (one_exactly)
				1606	EMSG_ONE_RET_NULL;
				1607	ret = reg(REG_ZPAREN, &flags);
				1608	if (ret == NULL)
				1609	return NULL;
				1610	*flagp \|= flags & (HASWIDTH\|SPSTART\|HASNL\|HASLOOKBH);
				1611	re_has_z = REX_SET;
				1612	break;
				1613
				1614	case '1':
				1615	case '2':
				1616	case '3':
				1617	case '4':
				1618	case '5':
				1619	case '6':
				1620	case '7':
				1621	case '8':
				1622	case '9': if (reg_do_extmatch != REX_USE)
				1623	EMSG_RET_NULL(_("E67: \\z1 et al. not allowed here"));
				1624	ret = regnode(ZREF + c - '0');
				1625	re_has_z = REX_USE;
				1626	break;
				1627
				1628	case 's': ret = regnode(MOPEN + 0);
				1629	break;
				1630
				1631	case 'e': ret = regnode(MCLOSE + 0);
				1632	break;
				1633
				1634	default: EMSG_RET_NULL(_("E68: Invalid character after \\z"));
				1635	}
				1636	}
				1637	break;
				1638	#endif
				1639
				1640	case Magic('%'):
				1641	{
				1642	c = no_Magic(getchr());
				1643	switch (c)
				1644	{
				1645	/* () without a back reference */
				1646	case '(':
				1647	if (one_exactly)
				1648	EMSG_ONE_RET_NULL;
				1649	ret = reg(REG_NPAREN, &flags);
				1650	if (ret == NULL)
				1651	return NULL;
				1652	*flagp \|= flags & (HASWIDTH \| SPSTART \| HASNL \| HASLOOKBH);
				1653	break;
				1654
				1655	/* Catch \%^ and \%$ regardless of where they appear in the
				1656	* pattern -- regardless of whether or not it makes sense. */
				1657	case '^':
				1658	ret = regnode(RE_BOF);
				1659	break;
				1660
				1661	case '$':
				1662	ret = regnode(RE_EOF);
				1663	break;
				1664
				1665	case '#':
				1666	ret = regnode(CURSOR);
				1667	break;
				1668
				1669	/* \%[abc]: Emit as a list of branches, all ending at the last
				1670	* branch which matches nothing. */
				1671	case '[':
				1672	if (one_exactly) /* doesn't nest */
				1673	EMSG_ONE_RET_NULL;
				1674	{
				1675	char_u *lastbranch;
				1676	char_u *lastnode = NULL;
				1677	char_u *br;
				1678
				1679	ret = NULL;
				1680	while ((c = getchr()) != ']')
				1681	{
				1682	if (c == NUL)
				1683	EMSG_M_RET_NULL(_("E69: Missing ] after %s%%["),
				1684	reg_magic == MAGIC_ALL);
				1685	br = regnode(BRANCH);
				1686	if (ret == NULL)
				1687	ret = br;
				1688	else
				1689	regtail(lastnode, br);
				1690
				1691	ungetchr();
				1692	one_exactly = TRUE;
				1693	lastnode = regatom(flagp);
				1694	one_exactly = FALSE;
				1695	if (lastnode == NULL)
				1696	return NULL;
				1697	}
				1698	if (ret == NULL)
				1699	EMSG_M_RET_NULL(_("E70: Empty %s%%[]"),
				1700	reg_magic == MAGIC_ALL);
				1701	lastbranch = regnode(BRANCH);
				1702	br = regnode(NOTHING);
				1703	if (ret != JUST_CALC_SIZE)
				1704	{
				1705	regtail(lastnode, br);
				1706	regtail(lastbranch, br);
				1707	/* connect all branches to the NOTHING
				1708	* branch at the end */
				1709	for (br = ret; br != lastnode; )
				1710	{
				1711	if (OP(br) == BRANCH)
				1712	{
				1713	regtail(br, lastbranch);
				1714	br = OPERAND(br);
				1715	}
				1716	else
				1717	br = regnext(br);
				1718	}
				1719	}
				1720	*flagp &= ~HASWIDTH;
				1721	break;
				1722	}
				1723
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	1724	case 'd': /* %d123 decimal */
				1725	case 'o': /* %o123 octal */
				1726	case 'x': /* %xab hex 2 */
				1727	case 'u': /* %uabcd hex 4 */
				1728	case 'U': /* %U1234abcd hex 8 */
				1729	{
				1730	int i;
				1731
				1732	switch (c)
				1733	{
				1734	case 'd': i = getdecchrs(); break;
				1735	case 'o': i = getoctchrs(); break;
				1736	case 'x': i = gethexchrs(2); break;
				1737	case 'u': i = gethexchrs(4); break;
				1738	case 'U': i = gethexchrs(8); break;
				1739	default: i = -1; break;
				1740	}
				1741
				1742	if (i < 0)
				1743	EMSG_M_RET_NULL(
				1744	_("E678: Invalid character after %s%%[dxouU]"),
				1745	reg_magic == MAGIC_ALL);
				1746	ret = regnode(EXACTLY);
				1747	if (i == 0)
				1748	regc(0x0a);
				1749	else
				1750	#ifdef FEAT_MBYTE
				1751	regmbc(i);
				1752	#else
				1753	regc(i);
				1754	#endif
				1755	regc(NUL);
				1756	*flagp \|= HASWIDTH;
				1757	break;
				1758	}
				1759
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1760	default:
				1761	if (VIM_ISDIGIT(c) \|\| c == '<' \|\| c == '>')
				1762	{
				1763	long_u n = 0;
				1764	int cmp;
				1765
				1766	cmp = c;
				1767	if (cmp == '<' \|\| cmp == '>')
				1768	c = getchr();
				1769	while (VIM_ISDIGIT(c))
				1770	{
				1771	n = n * 10 + (c - '0');
				1772	c = getchr();
				1773	}
				1774	if (c == 'l' \|\| c == 'c' \|\| c == 'v')
				1775	{
				1776	if (c == 'l')
				1777	ret = regnode(RE_LNUM);
				1778	else if (c == 'c')
				1779	ret = regnode(RE_COL);
				1780	else
				1781	ret = regnode(RE_VCOL);
				1782	if (ret == JUST_CALC_SIZE)
				1783	regsize += 5;
				1784	else
				1785	{
				1786	/* put the number and the optional
				1787	* comparator after the opcode */
				1788	regcode = re_put_long(regcode, n);
				1789	*regcode++ = cmp;
				1790	}
				1791	break;
				1792	}
				1793	}
				1794
				1795	EMSG_M_RET_NULL(_("E71: Invalid character after %s%%"),
				1796	reg_magic == MAGIC_ALL);
				1797	}
				1798	}
				1799	break;
				1800
				1801	case Magic('['):
				1802	collection:
				1803	{
				1804	char_u *lp;
				1805
				1806	/*
				1807	* If there is no matching ']', we assume the '[' is a normal
				1808	* character. This makes 'incsearch' and ":help [" work.
				1809	*/
				1810	lp = skip_anyof(regparse);
				1811	if (lp == ']') / there is a matching ']' */
				1812	{
				1813	int startc = -1; /* > 0 when next '-' is a range */
				1814	int endc;
				1815
				1816	/*
				1817	* In a character class, different parsing rules apply.
				1818	* Not even \ is special anymore, nothing is.
				1819	*/
				1820	if (regparse == '^') / Complement of range. */
				1821	{
				1822	ret = regnode(ANYBUT + extra);
				1823	regparse++;
				1824	}
				1825	else
				1826	ret = regnode(ANYOF + extra);
				1827
				1828	/* At the start ']' and '-' mean the literal character. */
				1829	if (regparse == ']' \|\| regparse == '-')
				1830	regc(*regparse++);
				1831
				1832	while (regparse != NUL && regparse != ']')
				1833	{
				1834	if (*regparse == '-')
				1835	{
				1836	++regparse;
				1837	/* The '-' is not used for a range at the end and
				1838	* after or before a '\n'. */
				1839	if (regparse == ']' \|\| regparse == NUL
				1840	\|\| startc == -1
				1841	\|\| (regparse[0] == '\\' && regparse[1] == 'n'))
				1842	{
				1843	regc('-');
				1844	startc = '-'; /* [--x] is a range */
				1845	}
				1846	else
				1847	{
				1848	#ifdef FEAT_MBYTE
				1849	if (has_mbyte)
				1850	endc = mb_ptr2char_adv(&regparse);
				1851	else
				1852	#endif
				1853	endc = *regparse++;
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	1854
				1855	/* Handle \o40, \x20 and \u20AC style sequences */
				1856	if (endc == '\\' && !cpo_lit)
				1857	endc = coll_get_char();
				1858
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1859	if (startc > endc)
				1860	EMSG_RET_NULL(_(e_invrange));
				1861	#ifdef FEAT_MBYTE
				1862	if (has_mbyte && ((*mb_char2len)(startc) > 1
				1863	\|\| (*mb_char2len)(endc) > 1))
				1864	{
				1865	/* Limit to a range of 256 chars */
				1866	if (endc > startc + 256)
				1867	EMSG_RET_NULL(_(e_invrange));
				1868	while (++startc <= endc)
				1869	regmbc(startc);
				1870	}
				1871	else
				1872	#endif
				1873	{
				1874	#ifdef EBCDIC
				1875	int alpha_only = FALSE;
				1876
				1877	/* for alphabetical range skip the gaps
				1878	* 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'. */
				1879	if (isalpha(startc) && isalpha(endc))
				1880	alpha_only = TRUE;
				1881	#endif
				1882	while (++startc <= endc)
				1883	#ifdef EBCDIC
				1884	if (!alpha_only \|\| isalpha(startc))
				1885	#endif
				1886	regc(startc);
				1887	}
				1888	startc = -1;
				1889	}
				1890	}
				1891	/*
				1892	* Only "\]", "\^", "\]" and "\\" are special in Vi. Vim
				1893	* accepts "\t", "\e", etc., but only when the 'l' flag in
				1894	* 'cpoptions' is not included.
				1895	*/
				1896	else if (*regparse == '\\'
				1897	&& (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
				1898	\|\| (!cpo_lit
				1899	&& vim_strchr(REGEXP_ABBR,
				1900	regparse[1]) != NULL)))
				1901	{
				1902	regparse++;
				1903	if (*regparse == 'n')
				1904	{
				1905	/* '\n' in range: also match NL */
				1906	if (ret != JUST_CALC_SIZE)
				1907	{
				1908	if (*ret == ANYBUT)
				1909	*ret = ANYBUT + ADD_NL;
				1910	else if (*ret == ANYOF)
				1911	*ret = ANYOF + ADD_NL;
				1912	/* else: must have had a \n already */
				1913	}
				1914	*flagp \|= HASNL;
				1915	regparse++;
				1916	startc = -1;
				1917	}
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	1918	else if (*regparse == 'd'
				1919	\|\| *regparse == 'o'
				1920	\|\| *regparse == 'x'
				1921	\|\| *regparse == 'u'
				1922	\|\| *regparse == 'U')
				1923	{
				1924	startc = coll_get_char();
				1925	if (startc == 0)
				1926	regc(0x0a);
				1927	else
				1928	#ifdef FEAT_MBYTE
				1929	regmbc(startc);
				1930	#else
				1931	regc(startc);
				1932	#endif
				1933	}
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	1934	else
				1935	{
				1936	startc = backslash_trans(*regparse++);
				1937	regc(startc);
				1938	}
				1939	}
				1940	else if (*regparse == '[')
				1941	{
				1942	int c_class;
				1943	int cu;
				1944
				1945	c_class = skip_class_name(&regparse);
				1946	startc = -1;
				1947	/* Characters assumed to be 8 bits! */
				1948	switch (c_class)
				1949	{
				1950	case CLASS_NONE:
				1951	/* literal '[', allow [[-x] as a range */
				1952	startc = *regparse++;
				1953	regc(startc);
				1954	break;
				1955	case CLASS_ALNUM:
				1956	for (cu = 1; cu <= 255; cu++)
				1957	if (isalnum(cu))
				1958	regc(cu);
				1959	break;
				1960	case CLASS_ALPHA:
				1961	for (cu = 1; cu <= 255; cu++)
				1962	if (isalpha(cu))
				1963	regc(cu);
				1964	break;
				1965	case CLASS_BLANK:
				1966	regc(' ');
				1967	regc('\t');
				1968	break;
				1969	case CLASS_CNTRL:
				1970	for (cu = 1; cu <= 255; cu++)
				1971	if (iscntrl(cu))
				1972	regc(cu);
				1973	break;
				1974	case CLASS_DIGIT:
				1975	for (cu = 1; cu <= 255; cu++)
				1976	if (VIM_ISDIGIT(cu))
				1977	regc(cu);
				1978	break;
				1979	case CLASS_GRAPH:
				1980	for (cu = 1; cu <= 255; cu++)
				1981	if (isgraph(cu))
				1982	regc(cu);
				1983	break;
				1984	case CLASS_LOWER:
				1985	for (cu = 1; cu <= 255; cu++)
				1986	if (islower(cu))
				1987	regc(cu);
				1988	break;
				1989	case CLASS_PRINT:
				1990	for (cu = 1; cu <= 255; cu++)
				1991	if (vim_isprintc(cu))
				1992	regc(cu);
				1993	break;
				1994	case CLASS_PUNCT:
				1995	for (cu = 1; cu <= 255; cu++)
				1996	if (ispunct(cu))
				1997	regc(cu);
				1998	break;
				1999	case CLASS_SPACE:
				2000	for (cu = 9; cu <= 13; cu++)
				2001	regc(cu);
				2002	regc(' ');
				2003	break;
				2004	case CLASS_UPPER:
				2005	for (cu = 1; cu <= 255; cu++)
				2006	if (isupper(cu))
				2007	regc(cu);
				2008	break;
				2009	case CLASS_XDIGIT:
				2010	for (cu = 1; cu <= 255; cu++)
				2011	if (vim_isxdigit(cu))
				2012	regc(cu);
				2013	break;
				2014	case CLASS_TAB:
				2015	regc('\t');
				2016	break;
				2017	case CLASS_RETURN:
				2018	regc('\r');
				2019	break;
				2020	case CLASS_BACKSPACE:
				2021	regc('\b');
				2022	break;
				2023	case CLASS_ESCAPE:
				2024	regc('\033');
				2025	break;
				2026	}
				2027	}
				2028	else
				2029	{
				2030	#ifdef FEAT_MBYTE
				2031	if (has_mbyte)
				2032	{
				2033	int len;
				2034
				2035	/* produce a multibyte character, including any
				2036	* following composing characters */
				2037	startc = mb_ptr2char(regparse);
				2038	len = (*mb_ptr2len_check)(regparse);
				2039	if (enc_utf8 && utf_char2len(startc) != len)
				2040	startc = -1; /* composing chars */
				2041	while (--len >= 0)
				2042	regc(*regparse++);
				2043	}
				2044	else
				2045	#endif
				2046	{
				2047	startc = *regparse++;
				2048	regc(startc);
				2049	}
				2050	}
				2051	}
				2052	regc(NUL);
				2053	prevchr_len = 1; /* last char was the ']' */
				2054	if (*regparse != ']')
				2055	EMSG_RET_NULL(_(e_toomsbra)); /* Cannot happen? */
				2056	skipchr(); /* let's be friends with the lexer again */
				2057	*flagp \|= HASWIDTH \| SIMPLE;
				2058	break;
				2059	}
				2060	}
				2061	/* FALLTHROUGH */
				2062
				2063	default:
				2064	{
				2065	int len;
				2066
				2067	#ifdef FEAT_MBYTE
				2068	/* A multi-byte character is handled as a separate atom if it's
				2069	* before a multi. */
				2070	if (has_mbyte && (*mb_char2len)(c) > 1
				2071	&& re_multi_type(peekchr()) != NOT_MULTI)
				2072	{
				2073	ret = regnode(MULTIBYTECODE);
				2074	regmbc(c);
				2075	*flagp \|= HASWIDTH \| SIMPLE;
				2076	break;
				2077	}
				2078	#endif
				2079
				2080	ret = regnode(EXACTLY);
				2081
				2082	/*
				2083	* Append characters as long as:
				2084	* - there is no following multi, we then need the character in
				2085	* front of it as a single character operand
				2086	* - not running into a Magic character
				2087	* - "one_exactly" is not set
				2088	* But always emit at least one character. Might be a Multi,
				2089	* e.g., a "[" without matching "]".
				2090	*/
				2091	for (len = 0; c != NUL && (len == 0
				2092	\|\| (re_multi_type(peekchr()) == NOT_MULTI
				2093	&& !one_exactly
				2094	&& !is_Magic(c))); ++len)
				2095	{
				2096	c = no_Magic(c);
				2097	#ifdef FEAT_MBYTE
				2098	if (has_mbyte)
				2099	{
				2100	regmbc(c);
				2101	if (enc_utf8)
				2102	{
				2103	int off;
				2104	int l;
				2105
				2106	/* Need to get composing character too, directly
				2107	* access regparse for that, because skipchr() skips
				2108	* over composing chars. */
				2109	ungetchr();
				2110	if (*regparse == '\\' && regparse[1] != NUL)
				2111	off = 1;
				2112	else
				2113	off = 0;
				2114	for (;;)
				2115	{
				2116	l = utf_ptr2len_check(regparse + off);
				2117	if (!UTF_COMPOSINGLIKE(regparse + off,
				2118	regparse + off + l))
				2119	break;
				2120	off += l;
				2121	regmbc(utf_ptr2char(regparse + off));
				2122	}
				2123	skipchr();
				2124	}
				2125	}
				2126	else
				2127	#endif
				2128	regc(c);
				2129	c = getchr();
				2130	}
				2131	ungetchr();
				2132
				2133	regc(NUL);
				2134	*flagp \|= HASWIDTH;
				2135	if (len == 1)
				2136	*flagp \|= SIMPLE;
				2137	}
				2138	break;
				2139	}
				2140
				2141	return ret;
				2142	}
				2143
				2144	/*
				2145	* emit a node
				2146	* Return pointer to generated code.
				2147	*/
				2148	static char_u *
				2149	regnode(op)
				2150	int op;
				2151	{
				2152	char_u *ret;
				2153
				2154	ret = regcode;
				2155	if (ret == JUST_CALC_SIZE)
				2156	regsize += 3;
				2157	else
				2158	{
				2159	*regcode++ = op;
				2160	regcode++ = NUL; / Null "next" pointer. */
				2161	*regcode++ = NUL;
				2162	}
				2163	return ret;
				2164	}
				2165
				2166	/*
				2167	* Emit (if appropriate) a byte of code
				2168	*/
				2169	static void
				2170	regc(b)
				2171	int b;
				2172	{
				2173	if (regcode == JUST_CALC_SIZE)
				2174	regsize++;
				2175	else
				2176	*regcode++ = b;
				2177	}
				2178
				2179	#ifdef FEAT_MBYTE
				2180	/*
				2181	* Emit (if appropriate) a multi-byte character of code
				2182	*/
				2183	static void
				2184	regmbc(c)
				2185	int c;
				2186	{
				2187	if (regcode == JUST_CALC_SIZE)
				2188	regsize += (*mb_char2len)(c);
				2189	else
				2190	regcode += (*mb_char2bytes)(c, regcode);
				2191	}
				2192	#endif
				2193
				2194	/*
				2195	* reginsert - insert an operator in front of already-emitted operand
				2196	*
				2197	* Means relocating the operand.
				2198	*/
				2199	static void
				2200	reginsert(op, opnd)
				2201	int op;
				2202	char_u *opnd;
				2203	{
				2204	char_u *src;
				2205	char_u *dst;
				2206	char_u *place;
				2207
				2208	if (regcode == JUST_CALC_SIZE)
				2209	{
				2210	regsize += 3;
				2211	return;
				2212	}
				2213	src = regcode;
				2214	regcode += 3;
				2215	dst = regcode;
				2216	while (src > opnd)
				2217	--dst = --src;
				2218
				2219	place = opnd; /* Op node, where operand used to be. */
				2220	*place++ = op;
				2221	*place++ = NUL;
				2222	*place = NUL;
				2223	}
				2224
				2225	/*
				2226	* reginsert_limits - insert an operator in front of already-emitted operand.
				2227	* The operator has the given limit values as operands. Also set next pointer.
				2228	*
				2229	* Means relocating the operand.
				2230	*/
				2231	static void
				2232	reginsert_limits(op, minval, maxval, opnd)
				2233	int op;
				2234	long minval;
				2235	long maxval;
				2236	char_u *opnd;
				2237	{
				2238	char_u *src;
				2239	char_u *dst;
				2240	char_u *place;
				2241
				2242	if (regcode == JUST_CALC_SIZE)
				2243	{
				2244	regsize += 11;
				2245	return;
				2246	}
				2247	src = regcode;
				2248	regcode += 11;
				2249	dst = regcode;
				2250	while (src > opnd)
				2251	--dst = --src;
				2252
				2253	place = opnd; /* Op node, where operand used to be. */
				2254	*place++ = op;
				2255	*place++ = NUL;
				2256	*place++ = NUL;
				2257	place = re_put_long(place, (long_u)minval);
				2258	place = re_put_long(place, (long_u)maxval);
				2259	regtail(opnd, place);
				2260	}
				2261
				2262	/*
				2263	* Write a long as four bytes at "p" and return pointer to the next char.
				2264	*/
				2265	static char_u *
				2266	re_put_long(p, val)
				2267	char_u *p;
				2268	long_u val;
				2269	{
				2270	*p++ = (char_u) ((val >> 24) & 0377);
				2271	*p++ = (char_u) ((val >> 16) & 0377);
				2272	*p++ = (char_u) ((val >> 8) & 0377);
				2273	*p++ = (char_u) (val & 0377);
				2274	return p;
				2275	}
				2276
				2277	/*
				2278	* regtail - set the next-pointer at the end of a node chain
				2279	*/
				2280	static void
				2281	regtail(p, val)
				2282	char_u *p;
				2283	char_u *val;
				2284	{
				2285	char_u *scan;
				2286	char_u *temp;
				2287	int offset;
				2288
				2289	if (p == JUST_CALC_SIZE)
				2290	return;
				2291
				2292	/* Find last node. */
				2293	scan = p;
				2294	for (;;)
				2295	{
				2296	temp = regnext(scan);
				2297	if (temp == NULL)
				2298	break;
				2299	scan = temp;
				2300	}
				2301
				2302	if (OP(scan) == BACK)
				2303	offset = (int)(scan - val);
				2304	else
				2305	offset = (int)(val - scan);
				2306	*(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
				2307	*(scan + 2) = (char_u) (offset & 0377);
				2308	}
				2309
				2310	/*
				2311	* regoptail - regtail on item after a BRANCH; nop if none
				2312	*/
				2313	static void
				2314	regoptail(p, val)
				2315	char_u *p;
				2316	char_u *val;
				2317	{
				2318	/* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
				2319	if (p == NULL \|\| p == JUST_CALC_SIZE
				2320	\|\| (OP(p) != BRANCH
				2321	&& (OP(p) < BRACE_COMPLEX \|\| OP(p) > BRACE_COMPLEX + 9)))
				2322	return;
				2323	regtail(OPERAND(p), val);
				2324	}
				2325
				2326	/*
				2327	* getchr() - get the next character from the pattern. We know about
				2328	* magic and such, so therefore we need a lexical analyzer.
				2329	*/
				2330
				2331	/* static int curchr; */
				2332	static int prevprevchr;
				2333	static int prevchr;
				2334	static int nextchr; /* used for ungetchr() */
				2335	/*
				2336	* Note: prevchr is sometimes -1 when we are not at the start,
				2337	* eg in /[ ^I]^ the pattern was never found even if it existed, because ^ was
				2338	* taken to be magic -- webb
				2339	*/
				2340	static int at_start; /* True when on the first character */
				2341	static int prev_at_start; /* True when on the second character */
				2342
				2343	static void
				2344	initchr(str)
				2345	char_u *str;
				2346	{
				2347	regparse = str;
				2348	prevchr_len = 0;
				2349	curchr = prevprevchr = prevchr = nextchr = -1;
				2350	at_start = TRUE;
				2351	prev_at_start = FALSE;
				2352	}
				2353
				2354	static int
				2355	peekchr()
				2356	{
				2357	if (curchr == -1)
				2358	{
				2359	switch (curchr = regparse[0])
				2360	{
				2361	case '.':
				2362	case '[':
				2363	case '~':
				2364	/* magic when 'magic' is on */
				2365	if (reg_magic >= MAGIC_ON)
				2366	curchr = Magic(curchr);
				2367	break;
				2368	case '(':
				2369	case ')':
				2370	case '{':
				2371	case '%':
				2372	case '+':
				2373	case '=':
				2374	case '?':
				2375	case '@':
				2376	case '!':
				2377	case '&':
				2378	case '\|':
				2379	case '<':
				2380	case '>':
				2381	case '#': /* future ext. */
				2382	case '"': /* future ext. */
				2383	case '\'': /* future ext. */
				2384	case ',': /* future ext. */
				2385	case '-': /* future ext. */
				2386	case ':': /* future ext. */
				2387	case ';': /* future ext. */
				2388	case '`': /* future ext. */
				2389	case '/': /* Can't be used in / command */
				2390	/* magic only after "\v" */
				2391	if (reg_magic == MAGIC_ALL)
				2392	curchr = Magic(curchr);
				2393	break;
				2394	case '*':
				2395	/* * is not magic as the very first character, eg "?*ptr" and when
				2396	* after '^', eg "/^ptr" /
				2397	if (reg_magic >= MAGIC_ON && !at_start
				2398	&& !(prev_at_start && prevchr == Magic('^')))
				2399	curchr = Magic('*');
				2400	break;
				2401	case '^':
				2402	/* '^' is only magic as the very first character and if it's after
				2403	* "\(", "\\|", "\&' or "\n" */
				2404	if (reg_magic >= MAGIC_OFF
				2405	&& (at_start
				2406	\|\| reg_magic == MAGIC_ALL
				2407	\|\| prevchr == Magic('(')
				2408	\|\| prevchr == Magic('\|')
				2409	\|\| prevchr == Magic('&')
				2410	\|\| prevchr == Magic('n')
				2411	\|\| (no_Magic(prevchr) == '('
				2412	&& prevprevchr == Magic('%'))))
				2413	{
				2414	curchr = Magic('^');
				2415	at_start = TRUE;
				2416	prev_at_start = FALSE;
				2417	}
				2418	break;
				2419	case '$':
				2420	/* '$' is only magic as the very last char and if it's in front of
				2421	* either "\\|", "\)", "\&", or "\n" */
				2422	if (reg_magic >= MAGIC_OFF)
				2423	{
				2424	char_u *p = regparse + 1;
				2425
				2426	/* ignore \c \C \m and \M after '$' */
				2427	while (p[0] == '\\' && (p[1] == 'c' \|\| p[1] == 'C'
				2428	\|\| p[1] == 'm' \|\| p[1] == 'M' \|\| p[1] == 'Z'))
				2429	p += 2;
				2430	if (p[0] == NUL
				2431	\|\| (p[0] == '\\'
				2432	&& (p[1] == '\|' \|\| p[1] == '&' \|\| p[1] == ')'
				2433	\|\| p[1] == 'n'))
				2434	\|\| reg_magic == MAGIC_ALL)
				2435	curchr = Magic('$');
				2436	}
				2437	break;
				2438	case '\\':
				2439	{
				2440	int c = regparse[1];
				2441
				2442	if (c == NUL)
				2443	curchr = '\\'; /* trailing '\' */
				2444	else if (
				2445	#ifdef EBCDIC
				2446	vim_strchr(META, c)
				2447	#else
				2448	c <= '~' && META_flags[c]
				2449	#endif
				2450	)
				2451	{
				2452	/*
				2453	* META contains everything that may be magic sometimes,
				2454	* except ^ and $ ("\^" and "\$" are only magic after
				2455	* "\v"). We now fetch the next character and toggle its
				2456	* magicness. Therefore, \ is so meta-magic that it is
				2457	* not in META.
				2458	*/
				2459	curchr = -1;
				2460	prev_at_start = at_start;
				2461	at_start = FALSE; /* be able to say "/\ptr" /
				2462	++regparse;
				2463	peekchr();
				2464	--regparse;
				2465	curchr = toggle_Magic(curchr);
				2466	}
				2467	else if (vim_strchr(REGEXP_ABBR, c))
				2468	{
				2469	/*
				2470	* Handle abbreviations, like "\t" for TAB -- webb
				2471	*/
				2472	curchr = backslash_trans(c);
				2473	}
				2474	else if (reg_magic == MAGIC_NONE && (c == '$' \|\| c == '^'))
				2475	curchr = toggle_Magic(c);
				2476	else
				2477	{
				2478	/*
				2479	* Next character can never be (made) magic?
				2480	* Then backslashing it won't do anything.
				2481	*/
				2482	#ifdef FEAT_MBYTE
				2483	if (has_mbyte)
				2484	curchr = (*mb_ptr2char)(regparse + 1);
				2485	else
				2486	#endif
				2487	curchr = c;
				2488	}
				2489	break;
				2490	}
				2491
				2492	#ifdef FEAT_MBYTE
				2493	default:
				2494	if (has_mbyte)
				2495	curchr = (*mb_ptr2char)(regparse);
				2496	#endif
				2497	}
				2498	}
				2499
				2500	return curchr;
				2501	}
				2502
				2503	/*
				2504	* Eat one lexed character. Do this in a way that we can undo it.
				2505	*/
				2506	static void
				2507	skipchr()
				2508	{
				2509	/* peekchr() eats a backslash, do the same here */
				2510	if (*regparse == '\\')
				2511	prevchr_len = 1;
				2512	else
				2513	prevchr_len = 0;
				2514	if (regparse[prevchr_len] != NUL)
				2515	{
				2516	#ifdef FEAT_MBYTE
				2517	if (has_mbyte)
				2518	prevchr_len += (*mb_ptr2len_check)(regparse + prevchr_len);
				2519	else
				2520	#endif
				2521	++prevchr_len;
				2522	}
				2523	regparse += prevchr_len;
				2524	prev_at_start = at_start;
				2525	at_start = FALSE;
				2526	prevprevchr = prevchr;
				2527	prevchr = curchr;
				2528	curchr = nextchr; /* use previously unget char, or -1 */
				2529	nextchr = -1;
				2530	}
				2531
				2532	/*
				2533	* Skip a character while keeping the value of prev_at_start for at_start.
				2534	* prevchr and prevprevchr are also kept.
				2535	*/
				2536	static void
				2537	skipchr_keepstart()
				2538	{
				2539	int as = prev_at_start;
				2540	int pr = prevchr;
				2541	int prpr = prevprevchr;
				2542
				2543	skipchr();
				2544	at_start = as;
				2545	prevchr = pr;
				2546	prevprevchr = prpr;
				2547	}
				2548
				2549	static int
				2550	getchr()
				2551	{
				2552	int chr = peekchr();
				2553
				2554	skipchr();
				2555	return chr;
				2556	}
				2557
				2558	/*
				2559	* put character back. Works only once!
				2560	*/
				2561	static void
				2562	ungetchr()
				2563	{
				2564	nextchr = curchr;
				2565	curchr = prevchr;
				2566	prevchr = prevprevchr;
				2567	at_start = prev_at_start;
				2568	prev_at_start = FALSE;
				2569
				2570	/* Backup regparse, so that it's at the same position as before the
				2571	* getchr(). */
				2572	regparse -= prevchr_len;
				2573	}
				2574
				2575	/*
Bram Moolenaar	7b0294c	2004-10-11 10:16:09 +0000	[diff] [blame]	2576	* Get and return the value of the hex string at the current position.
				2577	* Return -1 if there is no valid hex number.
				2578	* The position is updated:
Bram Moolenaar	c0197e2	2004-09-13 20:26:32 +0000	[diff] [blame]	2579	* blahblah\%x20asdf
				2580	* before-^ ^-after
				2581	* The parameter controls the maximum number of input characters. This will be
				2582	* 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
				2583	*/
				2584	static int
				2585	gethexchrs(maxinputlen)
				2586	int maxinputlen;
				2587	{
				2588	int nr = 0;
				2589	int c;
				2590	int i;
				2591
				2592	for (i = 0; i < maxinputlen; ++i)
				2593	{
				2594	c = regparse[0];
				2595	if (!vim_isxdigit(c))
				2596	break;
				2597	nr <<= 4;
				2598	nr \|= hex2nr(c);
				2599	++regparse;
				2600	}
				2601
				2602	if (i == 0)
				2603	return -1;
				2604	return nr;
				2605	}
				2606
				2607	/*
				2608	* get and return the value of the decimal string immediately after the
				2609	* current position. Return -1 for invalid. Consumes all digits.
				2610	*/
				2611	static int
				2612	getdecchrs()
				2613	{
				2614	int nr = 0;
				2615	int c;
				2616	int i;
				2617
				2618	for (i = 0; ; ++i)
				2619	{
				2620	c = regparse[0];
				2621	if (c < '0' \|\| c > '9')
				2622	break;
				2623	nr *= 10;
				2624	nr += c - '0';
				2625	++regparse;
				2626	}
				2627
				2628	if (i == 0)
				2629	return -1;
				2630	return nr;
				2631	}
				2632
				2633	/*
				2634	* get and return the value of the octal string immediately after the current
				2635	* position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
				2636	* numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
				2637	* treat 8 or 9 as recognised characters. Position is updated:
				2638	* blahblah\%o210asdf
				2639	* before-^ ^-after
				2640	*/
				2641	static int
				2642	getoctchrs()
				2643	{
				2644	int nr = 0;
				2645	int c;
				2646	int i;
				2647
				2648	for (i = 0; i < 3 && nr < 040; ++i)
				2649	{
				2650	c = regparse[0];
				2651	if (c < '0' \|\| c > '7')
				2652	break;
				2653	nr <<= 3;
				2654	nr \|= hex2nr(c);
				2655	++regparse;
				2656	}
				2657
				2658	if (i == 0)
				2659	return -1;
				2660	return nr;
				2661	}
				2662
				2663	/*
				2664	* Get a number after a backslash that is inside [].
				2665	* When nothing is recognized return a backslash.
				2666	*/
				2667	static int
				2668	coll_get_char()
				2669	{
				2670	int nr = -1;
				2671
				2672	switch (*regparse++)
				2673	{
				2674	case 'd': nr = getdecchrs(); break;
				2675	case 'o': nr = getoctchrs(); break;
				2676	case 'x': nr = gethexchrs(2); break;
				2677	case 'u': nr = gethexchrs(4); break;
				2678	case 'U': nr = gethexchrs(8); break;
				2679	}
				2680	if (nr < 0)
				2681	{
				2682	/* If getting the number fails be backwards compatible: the character
				2683	* is a backslash. */
				2684	--regparse;
				2685	nr = '\\';
				2686	}
				2687	return nr;
				2688	}
				2689
				2690	/*
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2691	* read_limits - Read two integers to be taken as a minimum and maximum.
				2692	* If the first character is '-', then the range is reversed.
				2693	* Should end with 'end'. If minval is missing, zero is default, if maxval is
				2694	* missing, a very big number is the default.
				2695	*/
				2696	static int
				2697	read_limits(minval, maxval)
				2698	long *minval;
				2699	long *maxval;
				2700	{
				2701	int reverse = FALSE;
				2702	char_u *first_char;
				2703	long tmp;
				2704
				2705	if (*regparse == '-')
				2706	{
				2707	/* Starts with '-', so reverse the range later */
				2708	regparse++;
				2709	reverse = TRUE;
				2710	}
				2711	first_char = regparse;
				2712	*minval = getdigits(&regparse);
				2713	if (regparse == ',') / There is a comma */
				2714	{
				2715	if (vim_isdigit(*++regparse))
				2716	*maxval = getdigits(&regparse);
				2717	else
				2718	*maxval = MAX_LIMIT;
				2719	}
				2720	else if (VIM_ISDIGIT(*first_char))
				2721	maxval = minval; /* It was \{n} or \{-n} */
				2722	else
				2723	maxval = MAX_LIMIT; / It was \{} or \{-} */
				2724	if (*regparse == '\\')
				2725	regparse++; /* Allow either \{...} or \{...\} */
				2726	if (regparse != '}' \|\| (maxval == 0 && *minval == 0))
				2727	{
				2728	sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"),
				2729	reg_magic == MAGIC_ALL ? "" : "\\");
				2730	EMSG_RET_FAIL(IObuff);
				2731	}
				2732
				2733	/*
				2734	* Reverse the range if there was a '-', or make sure it is in the right
				2735	* order otherwise.
				2736	*/
				2737	if ((!reverse && minval > maxval) \|\| (reverse && minval < maxval))
				2738	{
				2739	tmp = *minval;
				2740	minval = maxval;
				2741	*maxval = tmp;
				2742	}
				2743	skipchr(); /* let's be friends with the lexer again */
				2744	return OK;
				2745	}
				2746
				2747	/*
				2748	* vim_regexec and friends
				2749	*/
				2750
				2751	/*
				2752	* Global work variables for vim_regexec().
				2753	*/
				2754
				2755	/* The current match-position is remembered with these variables: */
				2756	static linenr_T reglnum; /* line number, relative to first line */
				2757	static char_u regline; / start of current line */
				2758	static char_u reginput; / current input, points into "regline" */
				2759
				2760	static int need_clear_subexpr; /* subexpressions still need to be
				2761	* cleared */
				2762	#ifdef FEAT_SYN_HL
				2763	static int need_clear_zsubexpr = FALSE; /* extmatch subexpressions
				2764	* still need to be cleared */
				2765	#endif
				2766
				2767	static int out_of_stack; /* TRUE when ran out of stack space */
				2768
				2769	/*
				2770	* Structure used to save the current input state, when it needs to be
				2771	* restored after trying a match. Used by reg_save() and reg_restore().
				2772	*/
				2773	typedef struct
				2774	{
				2775	union
				2776	{
				2777	char_u ptr; / reginput pointer, for single-line regexp */
				2778	lpos_T pos; /* reginput pos, for multi-line regexp */
				2779	} rs_u;
				2780	} regsave_T;
				2781
				2782	/* struct to save start/end pointer/position in for */
				2783	typedef struct
				2784	{
				2785	union
				2786	{
				2787	char_u *ptr;
				2788	lpos_T pos;
				2789	} se_u;
				2790	} save_se_T;
				2791
				2792	static char_u *reg_getline __ARGS((linenr_T lnum));
				2793	static long vim_regexec_both __ARGS((char_u *line, colnr_T col));
				2794	static long regtry __ARGS((regprog_T *prog, colnr_T col));
				2795	static void cleanup_subexpr __ARGS((void));
				2796	#ifdef FEAT_SYN_HL
				2797	static void cleanup_zsubexpr __ARGS((void));
				2798	#endif
				2799	static void reg_nextline __ARGS((void));
				2800	static void reg_save __ARGS((regsave_T *save));
				2801	static void reg_restore __ARGS((regsave_T *save));
				2802	static int reg_save_equal __ARGS((regsave_T *save));
				2803	static void save_se_multi __ARGS((save_se_T savep, lpos_T posp));
				2804	static void save_se_one __ARGS((save_se_T savep, char_u *pp));
				2805
				2806	/* Save the sub-expressions before attempting a match. */
				2807	#define save_se(savep, posp, pp) \
				2808	REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
				2809
				2810	/* After a failed match restore the sub-expressions. */
				2811	#define restore_se(savep, posp, pp) { \
				2812	if (REG_MULTI) \
				2813	*(posp) = (savep)->se_u.pos; \
				2814	else \
				2815	*(pp) = (savep)->se_u.ptr; }
				2816
				2817	static int re_num_cmp __ARGS((long_u val, char_u *scan));
				2818	static int regmatch __ARGS((char_u *prog));
				2819	static int regrepeat __ARGS((char_u *p, long maxcount));
				2820
				2821	#ifdef DEBUG
				2822	int regnarrate = 0;
				2823	#endif
				2824
				2825	/*
				2826	* Internal copy of 'ignorecase'. It is set at each call to vim_regexec().
				2827	* Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
				2828	* contains '\c' or '\C' the value is overruled.
				2829	*/
				2830	static int ireg_ic;
				2831
				2832	#ifdef FEAT_MBYTE
				2833	/*
				2834	* Similar to ireg_ic, but only for 'combining' characters. Set with \Z flag
				2835	* in the regexp. Defaults to false, always.
				2836	*/
				2837	static int ireg_icombine;
				2838	#endif
				2839
				2840	/*
				2841	* Sometimes need to save a copy of a line. Since alloc()/free() is very
				2842	* slow, we keep one allocated piece of memory and only re-allocate it when
				2843	* it's too small. It's freed in vim_regexec_both() when finished.
				2844	*/
				2845	static char_u *reg_tofree;
				2846	static unsigned reg_tofreelen;
				2847
				2848	/*
				2849	* These variables are set when executing a regexp to speed up the execution.
				2850	* Which ones are set depends on whethere a single-line or multi-line match is
				2851	* done:
				2852	* single-line multi-line
				2853	* reg_match &regmatch_T NULL
				2854	* reg_mmatch NULL &regmmatch_T
				2855	* reg_startp reg_match->startp <invalid>
				2856	* reg_endp reg_match->endp <invalid>
				2857	* reg_startpos <invalid> reg_mmatch->startpos
				2858	* reg_endpos <invalid> reg_mmatch->endpos
				2859	* reg_win NULL window in which to search
				2860	* reg_buf <invalid> buffer in which to search
				2861	* reg_firstlnum <invalid> first line in which to search
				2862	* reg_maxline 0 last line nr
				2863	* reg_line_lbr FALSE or TRUE FALSE
				2864	*/
				2865	static regmatch_T *reg_match;
				2866	static regmmatch_T *reg_mmatch;
				2867	static char_u **reg_startp = NULL;
				2868	static char_u **reg_endp = NULL;
				2869	static lpos_T *reg_startpos = NULL;
				2870	static lpos_T *reg_endpos = NULL;
				2871	static win_T *reg_win;
				2872	static buf_T *reg_buf;
				2873	static linenr_T reg_firstlnum;
				2874	static linenr_T reg_maxline;
				2875	static int reg_line_lbr; /* "\n" in string is line break */
				2876
				2877	/*
				2878	* Get pointer to the line "lnum", which is relative to "reg_firstlnum".
				2879	*/
				2880	static char_u *
				2881	reg_getline(lnum)
				2882	linenr_T lnum;
				2883	{
				2884	/* when looking behind for a match/no-match lnum is negative. But we
				2885	* can't go before line 1 */
				2886	if (reg_firstlnum + lnum < 1)
				2887	return NULL;
				2888	return ml_get_buf(reg_buf, reg_firstlnum + lnum, FALSE);
				2889	}
				2890
				2891	static regsave_T behind_pos;
				2892
				2893	#ifdef FEAT_SYN_HL
				2894	static char_u reg_startzp[NSUBEXP]; / Workspace to mark beginning */
				2895	static char_u reg_endzp[NSUBEXP]; / and end of \z(...\) matches */
				2896	static lpos_T reg_startzpos[NSUBEXP]; /* idem, beginning pos */
				2897	static lpos_T reg_endzpos[NSUBEXP]; /* idem, end pos */
				2898	#endif
				2899
				2900	/* TRUE if using multi-line regexp. */
				2901	#define REG_MULTI (reg_match == NULL)
				2902
				2903	/*
				2904	* Match a regexp against a string.
				2905	* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
				2906	* Uses curbuf for line count and 'iskeyword'.
				2907	*
				2908	* Return TRUE if there is a match, FALSE if not.
				2909	*/
				2910	int
				2911	vim_regexec(rmp, line, col)
				2912	regmatch_T *rmp;
				2913	char_u line; / string to match against */
				2914	colnr_T col; /* column to start looking for match */
				2915	{
				2916	reg_match = rmp;
				2917	reg_mmatch = NULL;
				2918	reg_maxline = 0;
				2919	reg_line_lbr = FALSE;
				2920	reg_win = NULL;
				2921	ireg_ic = rmp->rm_ic;
				2922	#ifdef FEAT_MBYTE
				2923	ireg_icombine = FALSE;
				2924	#endif
				2925	return (vim_regexec_both(line, col) != 0);
				2926	}
				2927
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	2928	#if defined(FEAT_MODIFY_FNAME) \|\| defined(FEAT_EVAL) \
				2929	\|\| defined(FIND_REPLACE_DIALOG) \|\| defined(PROTO)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	2930	/*
				2931	* Like vim_regexec(), but consider a "\n" in "line" to be a line break.
				2932	*/
				2933	int
				2934	vim_regexec_nl(rmp, line, col)
				2935	regmatch_T *rmp;
				2936	char_u line; / string to match against */
				2937	colnr_T col; /* column to start looking for match */
				2938	{
				2939	reg_match = rmp;
				2940	reg_mmatch = NULL;
				2941	reg_maxline = 0;
				2942	reg_line_lbr = TRUE;
				2943	reg_win = NULL;
				2944	ireg_ic = rmp->rm_ic;
				2945	#ifdef FEAT_MBYTE
				2946	ireg_icombine = FALSE;
				2947	#endif
				2948	return (vim_regexec_both(line, col) != 0);
				2949	}
				2950	#endif
				2951
				2952	/*
				2953	* Match a regexp against multiple lines.
				2954	* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
				2955	* Uses curbuf for line count and 'iskeyword'.
				2956	*
				2957	* Return zero if there is no match. Return number of lines contained in the
				2958	* match otherwise.
				2959	*/
				2960	long
				2961	vim_regexec_multi(rmp, win, buf, lnum, col)
				2962	regmmatch_T *rmp;
				2963	win_T win; / window in which to search or NULL */
				2964	buf_T buf; / buffer in which to search */
				2965	linenr_T lnum; /* nr of line to start looking for match */
				2966	colnr_T col; /* column to start looking for match */
				2967	{
				2968	long r;
				2969	buf_T *save_curbuf = curbuf;
				2970
				2971	reg_match = NULL;
				2972	reg_mmatch = rmp;
				2973	reg_buf = buf;
				2974	reg_win = win;
				2975	reg_firstlnum = lnum;
				2976	reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
				2977	reg_line_lbr = FALSE;
				2978	ireg_ic = rmp->rmm_ic;
				2979	#ifdef FEAT_MBYTE
				2980	ireg_icombine = FALSE;
				2981	#endif
				2982
				2983	/* Need to switch to buffer "buf" to make vim_iswordc() work. */
				2984	curbuf = buf;
				2985	r = vim_regexec_both(NULL, col);
				2986	curbuf = save_curbuf;
				2987
				2988	return r;
				2989	}
				2990
				2991	/*
				2992	* Match a regexp against a string ("line" points to the string) or multiple
				2993	* lines ("line" is NULL, use reg_getline()).
				2994	*/
				2995	#ifdef HAVE_SETJMP_H
				2996	static long
				2997	vim_regexec_both(line_arg, col_arg)
				2998	char_u *line_arg;
				2999	colnr_T col_arg; /* column to start looking for match */
				3000	#else
				3001	static long
				3002	vim_regexec_both(line, col)
				3003	char_u *line;
				3004	colnr_T col; /* column to start looking for match */
				3005	#endif
				3006	{
				3007	regprog_T *prog;
				3008	char_u *s;
				3009	long retval;
				3010	#ifdef HAVE_SETJMP_H
				3011	char_u *line;
				3012	colnr_T col;
				3013	#endif
				3014
				3015	reg_tofree = NULL;
				3016
				3017	#ifdef HAVE_TRY_EXCEPT
				3018	__try
				3019	{
				3020	#endif
				3021
				3022	#ifdef HAVE_SETJMP_H
				3023	/*
				3024	* Matching with a regexp may cause a very deep recursive call of
				3025	* regmatch(). Vim will crash when running out of stack space. Catch
				3026	* this here if the system supports it.
				3027	*/
				3028	mch_startjmp();
				3029	if (SETJMP(lc_jump_env) != 0)
				3030	{
				3031	mch_didjmp();
				3032	# ifdef SIGHASARG
				3033	if (lc_signal != SIGINT)
				3034	# endif
				3035	EMSG(_("E361: Crash intercepted; regexp too complex?"));
				3036	retval = 0L;
				3037	goto theend;
				3038	}
				3039
				3040	/* Trick to avoid "might be clobbered by `longjmp'" warning from gcc. */
				3041	line = line_arg;
				3042	col = col_arg;
				3043	#endif
				3044	retval = 0L;
				3045
				3046	if (REG_MULTI)
				3047	{
				3048	prog = reg_mmatch->regprog;
				3049	line = reg_getline((linenr_T)0);
				3050	reg_startpos = reg_mmatch->startpos;
				3051	reg_endpos = reg_mmatch->endpos;
				3052	}
				3053	else
				3054	{
				3055	prog = reg_match->regprog;
				3056	reg_startp = reg_match->startp;
				3057	reg_endp = reg_match->endp;
				3058	}
				3059
				3060	/* Be paranoid... */
				3061	if (prog == NULL \|\| line == NULL)
				3062	{
				3063	EMSG(_(e_null));
				3064	goto theend;
				3065	}
				3066
				3067	/* Check validity of program. */
				3068	if (prog_magic_wrong())
				3069	goto theend;
				3070
				3071	/* If pattern contains "\c" or "\C": overrule value of ireg_ic */
				3072	if (prog->regflags & RF_ICASE)
				3073	ireg_ic = TRUE;
				3074	else if (prog->regflags & RF_NOICASE)
				3075	ireg_ic = FALSE;
				3076
				3077	#ifdef FEAT_MBYTE
				3078	/* If pattern contains "\Z" overrule value of ireg_icombine */
				3079	if (prog->regflags & RF_ICOMBINE)
				3080	ireg_icombine = TRUE;
				3081	#endif
				3082
				3083	/* If there is a "must appear" string, look for it. */
				3084	if (prog->regmust != NULL)
				3085	{
				3086	int c;
				3087
				3088	#ifdef FEAT_MBYTE
				3089	if (has_mbyte)
				3090	c = (*mb_ptr2char)(prog->regmust);
				3091	else
				3092	#endif
				3093	c = *prog->regmust;
				3094	s = line + col;
				3095	while ((s = cstrchr(s, c)) != NULL)
				3096	{
				3097	if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
				3098	break; /* Found it. */
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	3099	mb_ptr_adv(s);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3100	}
				3101	if (s == NULL) /* Not present. */
				3102	goto theend;
				3103	}
				3104
				3105	regline = line;
				3106	reglnum = 0;
				3107	out_of_stack = FALSE;
				3108
				3109	/* Simplest case: Anchored match need be tried only once. */
				3110	if (prog->reganch)
				3111	{
				3112	int c;
				3113
				3114	#ifdef FEAT_MBYTE
				3115	if (has_mbyte)
				3116	c = (*mb_ptr2char)(regline + col);
				3117	else
				3118	#endif
				3119	c = regline[col];
				3120	if (prog->regstart == NUL
				3121	\|\| prog->regstart == c
				3122	\|\| (ireg_ic && ((
				3123	#ifdef FEAT_MBYTE
				3124	(enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
				3125	\|\| (c < 255 && prog->regstart < 255 &&
				3126	#endif
				3127	TOLOWER_LOC(prog->regstart) == TOLOWER_LOC(c)))))
				3128	retval = regtry(prog, col);
				3129	else
				3130	retval = 0;
				3131	}
				3132	else
				3133	{
				3134	/* Messy cases: unanchored match. */
				3135	while (!got_int && !out_of_stack)
				3136	{
				3137	if (prog->regstart != NUL)
				3138	{
				3139	/* Skip until the char we know it must start with. */
				3140	s = cstrchr(regline + col, prog->regstart);
				3141	if (s == NULL)
				3142	{
				3143	retval = 0;
				3144	break;
				3145	}
				3146	col = (int)(s - regline);
				3147	}
				3148
				3149	retval = regtry(prog, col);
				3150	if (retval > 0)
				3151	break;
				3152
				3153	/* if not currently on the first line, get it again */
				3154	if (reglnum != 0)
				3155	{
				3156	regline = reg_getline((linenr_T)0);
				3157	reglnum = 0;
				3158	}
				3159	if (regline[col] == NUL)
				3160	break;
				3161	#ifdef FEAT_MBYTE
				3162	if (has_mbyte)
				3163	col += (*mb_ptr2len_check)(regline + col);
				3164	else
				3165	#endif
				3166	++col;
				3167	}
				3168	}
				3169
				3170	if (out_of_stack)
				3171	EMSG(_("E363: pattern caused out-of-stack error"));
				3172
				3173	#ifdef HAVE_TRY_EXCEPT
				3174	}
				3175	__except(EXCEPTION_EXECUTE_HANDLER)
				3176	{
				3177	if (GetExceptionCode() == EXCEPTION_STACK_OVERFLOW)
				3178	{
				3179	RESETSTKOFLW();
				3180	EMSG(_("E363: pattern caused out-of-stack error"));
				3181	}
				3182	else
				3183	EMSG(_("E361: Crash intercepted; regexp too complex?"));
				3184	retval = 0L;
				3185	}
				3186	#endif
				3187
				3188	theend:
				3189	/* Didn't find a match. */
				3190	vim_free(reg_tofree);
				3191	#ifdef HAVE_SETJMP_H
				3192	mch_endjmp();
				3193	#endif
				3194	return retval;
				3195	}
				3196
				3197	#ifdef FEAT_SYN_HL
				3198	static reg_extmatch_T *make_extmatch __ARGS((void));
				3199
				3200	/*
				3201	* Create a new extmatch and mark it as referenced once.
				3202	*/
				3203	static reg_extmatch_T *
				3204	make_extmatch()
				3205	{
				3206	reg_extmatch_T *em;
				3207
				3208	em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T));
				3209	if (em != NULL)
				3210	em->refcnt = 1;
				3211	return em;
				3212	}
				3213
				3214	/*
				3215	* Add a reference to an extmatch.
				3216	*/
				3217	reg_extmatch_T *
				3218	ref_extmatch(em)
				3219	reg_extmatch_T *em;
				3220	{
				3221	if (em != NULL)
				3222	em->refcnt++;
				3223	return em;
				3224	}
				3225
				3226	/*
				3227	* Remove a reference to an extmatch. If there are no references left, free
				3228	* the info.
				3229	*/
				3230	void
				3231	unref_extmatch(em)
				3232	reg_extmatch_T *em;
				3233	{
				3234	int i;
				3235
				3236	if (em != NULL && --em->refcnt <= 0)
				3237	{
				3238	for (i = 0; i < NSUBEXP; ++i)
				3239	vim_free(em->matches[i]);
				3240	vim_free(em);
				3241	}
				3242	}
				3243	#endif
				3244
				3245	/*
				3246	* regtry - try match of "prog" with at regline["col"].
				3247	* Returns 0 for failure, number of lines contained in the match otherwise.
				3248	*/
				3249	static long
				3250	regtry(prog, col)
				3251	regprog_T *prog;
				3252	colnr_T col;
				3253	{
				3254	reginput = regline + col;
				3255	need_clear_subexpr = TRUE;
				3256	#ifdef FEAT_SYN_HL
				3257	/* Clear the external match subpointers if necessary. */
				3258	if (prog->reghasz == REX_SET)
				3259	need_clear_zsubexpr = TRUE;
				3260	#endif
				3261
				3262	if (regmatch(prog->program + 1))
				3263	{
				3264	cleanup_subexpr();
				3265	if (REG_MULTI)
				3266	{
				3267	if (reg_startpos[0].lnum < 0)
				3268	{
				3269	reg_startpos[0].lnum = 0;
				3270	reg_startpos[0].col = col;
				3271	}
				3272	if (reg_endpos[0].lnum < 0)
				3273	{
				3274	reg_endpos[0].lnum = reglnum;
				3275	reg_endpos[0].col = (int)(reginput - regline);
				3276	}
				3277	else
				3278	/* Use line number of "\ze". */
				3279	reglnum = reg_endpos[0].lnum;
				3280	}
				3281	else
				3282	{
				3283	if (reg_startp[0] == NULL)
				3284	reg_startp[0] = regline + col;
				3285	if (reg_endp[0] == NULL)
				3286	reg_endp[0] = reginput;
				3287	}
				3288	#ifdef FEAT_SYN_HL
				3289	/* Package any found \z(...\) matches for export. Default is none. */
				3290	unref_extmatch(re_extmatch_out);
				3291	re_extmatch_out = NULL;
				3292
				3293	if (prog->reghasz == REX_SET)
				3294	{
				3295	int i;
				3296
				3297	cleanup_zsubexpr();
				3298	re_extmatch_out = make_extmatch();
				3299	for (i = 0; i < NSUBEXP; i++)
				3300	{
				3301	if (REG_MULTI)
				3302	{
				3303	/* Only accept single line matches. */
				3304	if (reg_startzpos[i].lnum >= 0
				3305	&& reg_endzpos[i].lnum == reg_startzpos[i].lnum)
				3306	re_extmatch_out->matches[i] =
				3307	vim_strnsave(reg_getline(reg_startzpos[i].lnum)
				3308	+ reg_startzpos[i].col,
				3309	reg_endzpos[i].col - reg_startzpos[i].col);
				3310	}
				3311	else
				3312	{
				3313	if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
				3314	re_extmatch_out->matches[i] =
				3315	vim_strnsave(reg_startzp[i],
				3316	(int)(reg_endzp[i] - reg_startzp[i]));
				3317	}
				3318	}
				3319	}
				3320	#endif
				3321	return 1 + reglnum;
				3322	}
				3323	return 0;
				3324	}
				3325
				3326	#ifdef FEAT_MBYTE
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3327	static int reg_prev_class __ARGS((void));
				3328
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3329	/*
				3330	* Get class of previous character.
				3331	*/
				3332	static int
				3333	reg_prev_class()
				3334	{
				3335	if (reginput > regline)
				3336	return mb_get_class(reginput - 1
				3337	- (*mb_head_off)(regline, reginput - 1));
				3338	return -1;
				3339	}
				3340
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3341	#endif
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	3342	#define ADVANCE_REGINPUT() mb_ptr_adv(reginput)
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	3343
				3344	/*
				3345	* The arguments from BRACE_LIMITS are stored here. They are actually local
				3346	* to regmatch(), but they are here to reduce the amount of stack space used
				3347	* (it can be called recursively many times).
				3348	*/
				3349	static long bl_minval;
				3350	static long bl_maxval;
				3351
				3352	/*
				3353	* regmatch - main matching routine
				3354	*
				3355	* Conceptually the strategy is simple: Check to see whether the current
				3356	* node matches, call self recursively to see whether the rest matches,
				3357	* and then act accordingly. In practice we make some effort to avoid
				3358	* recursion, in particular by going through "ordinary" nodes (that don't
				3359	* need to know whether the rest of the match failed) by a loop instead of
				3360	* by recursion.
				3361	*
				3362	* Returns TRUE when there is a match. Leaves reginput and reglnum just after
				3363	* the last matched character.
				3364	* Returns FALSE when there is no match. Leaves reginput and reglnum in an
				3365	* undefined state!
				3366	*/
				3367	static int
				3368	regmatch(scan)
				3369	char_u scan; / Current node. */
				3370	{
				3371	char_u next; / Next node. */
				3372	int op;
				3373	int c;
				3374
				3375	#ifdef HAVE_GETRLIMIT
				3376	/* Check if we are running out of stack space. Could be caused by
				3377	* recursively calling ourselves. */
				3378	if (out_of_stack \|\| mch_stackcheck((char *)&op) == FAIL)
				3379	{
				3380	out_of_stack = TRUE;
				3381	return FALSE;
				3382	}
				3383	#endif
				3384
				3385	/* Some patterns my cause a long time to match, even though they are not
				3386	* illegal. E.g., "$[a-z]\+$\+Q". Allow breaking them with CTRL-C. */
				3387	fast_breakcheck();
				3388
				3389	#ifdef DEBUG
				3390	if (scan != NULL && regnarrate)
				3391	{
				3392	mch_errmsg(regprop(scan));
				3393	mch_errmsg("(\n");
				3394	}
				3395	#endif
				3396	while (scan != NULL)
				3397	{
				3398	if (got_int \|\| out_of_stack)
				3399	return FALSE;
				3400	#ifdef DEBUG
				3401	if (regnarrate)
				3402	{
				3403	mch_errmsg(regprop(scan));
				3404	mch_errmsg("...\n");
				3405	# ifdef FEAT_SYN_HL
				3406	if (re_extmatch_in != NULL)
				3407	{
				3408	int i;
				3409
				3410	mch_errmsg(_("External submatches:\n"));
				3411	for (i = 0; i < NSUBEXP; i++)
				3412	{
				3413	mch_errmsg(" \"");
				3414	if (re_extmatch_in->matches[i] != NULL)
				3415	mch_errmsg(re_extmatch_in->matches[i]);
				3416	mch_errmsg("\"\n");
				3417	}
				3418	}
				3419	# endif
				3420	}
				3421	#endif
				3422	next = regnext(scan);
				3423
				3424	op = OP(scan);
				3425	/* Check for character class with NL added. */
				3426	if (WITH_NL(op) && *reginput == NUL && reglnum < reg_maxline)
				3427	{
				3428	reg_nextline();
				3429	}
				3430	else if (reg_line_lbr && WITH_NL(op) && *reginput == '\n')
				3431	{
				3432	ADVANCE_REGINPUT();
				3433	}
				3434	else
				3435	{
				3436	if (WITH_NL(op))
				3437	op -= ADD_NL;
				3438	#ifdef FEAT_MBYTE
				3439	if (has_mbyte)
				3440	c = (*mb_ptr2char)(reginput);
				3441	else
				3442	#endif
				3443	c = *reginput;
				3444	switch (op)
				3445	{
				3446	case BOL:
				3447	if (reginput != regline)
				3448	return FALSE;
				3449	break;
				3450
				3451	case EOL:
				3452	if (c != NUL)
				3453	return FALSE;
				3454	break;
				3455
				3456	case RE_BOF:
				3457	/* Passing -1 to the getline() function provided for the search
				3458	* should always return NULL if the current line is the first
				3459	* line of the file. */
				3460	if (reglnum != 0 \|\| reginput != regline
				3461	\|\| (REG_MULTI && reg_getline((linenr_T)-1) != NULL))
				3462	return FALSE;
				3463	break;
				3464
				3465	case RE_EOF:
				3466	if (reglnum != reg_maxline \|\| c != NUL)
				3467	return FALSE;
				3468	break;
				3469
				3470	case CURSOR:
				3471	/* Check if the buffer is in a window and compare the
				3472	* reg_win->w_cursor position to the match position. */
				3473	if (reg_win == NULL
				3474	\|\| (reglnum + reg_firstlnum != reg_win->w_cursor.lnum)
				3475	\|\| ((colnr_T)(reginput - regline) != reg_win->w_cursor.col))
				3476	return FALSE;
				3477	break;
				3478
				3479	case RE_LNUM:
				3480	if (!REG_MULTI \|\| !re_num_cmp((long_u)(reglnum + reg_firstlnum),
				3481	scan))
				3482	return FALSE;
				3483	break;
				3484
				3485	case RE_COL:
				3486	if (!re_num_cmp((long_u)(reginput - regline) + 1, scan))
				3487	return FALSE;
				3488	break;
				3489
				3490	case RE_VCOL:
				3491	if (!re_num_cmp((long_u)win_linetabsize(
				3492	reg_win == NULL ? curwin : reg_win,
				3493	regline, (colnr_T)(reginput - regline)) + 1, scan))
				3494	return FALSE;
				3495	break;
				3496
				3497	case BOW: /* \<word; reginput points to w */
				3498	if (c == NUL) /* Can't match at end of line */
				3499	return FALSE;
				3500	#ifdef FEAT_MBYTE
				3501	if (has_mbyte)
				3502	{
				3503	int this_class;
				3504
				3505	/* Get class of current and previous char (if it exists). */
				3506	this_class = mb_get_class(reginput);
				3507	if (this_class <= 1)
				3508	return FALSE; /* not on a word at all */
				3509	if (reg_prev_class() == this_class)
				3510	return FALSE; /* previous char is in same word */
				3511	}
				3512	#endif
				3513	else
				3514	{
				3515	if (!vim_iswordc(c)
				3516	\|\| (reginput > regline && vim_iswordc(reginput[-1])))
				3517	return FALSE;
				3518	}
				3519	break;
				3520
				3521	case EOW: /* word\>; reginput points after d */
				3522	if (reginput == regline) /* Can't match at start of line */
				3523	return FALSE;
				3524	#ifdef FEAT_MBYTE
				3525	if (has_mbyte)
				3526	{
				3527	int this_class, prev_class;
				3528
				3529	/* Get class of current and previous char (if it exists). */
				3530	this_class = mb_get_class(reginput);
				3531	prev_class = reg_prev_class();
				3532	if (this_class == prev_class)
				3533	return FALSE;
				3534	if (prev_class == 0 \|\| prev_class == 1)
				3535	return FALSE;
				3536	}
				3537	else
				3538	#endif
				3539	{
				3540	if (!vim_iswordc(reginput[-1]))
				3541	return FALSE;
				3542	if (reginput[0] != NUL && vim_iswordc(c))
				3543	return FALSE;
				3544	}
				3545	break; /* Matched with EOW */
				3546
				3547	case ANY:
				3548	if (c == NUL)
				3549	return FALSE;
				3550	ADVANCE_REGINPUT();
				3551	break;
				3552
				3553	case IDENT:
				3554	if (!vim_isIDc(c))
				3555	return FALSE;
				3556	ADVANCE_REGINPUT();
				3557	break;
				3558
				3559	case SIDENT:
				3560	if (VIM_ISDIGIT(*reginput) \|\| !vim_isIDc(c))
				3561	return FALSE;
				3562	ADVANCE_REGINPUT();
				3563	break;
				3564
				3565	case KWORD:
				3566	if (!vim_iswordp(reginput))
				3567	return FALSE;
				3568	ADVANCE_REGINPUT();
				3569	break;
				3570
				3571	case SKWORD:
				3572	if (VIM_ISDIGIT(*reginput) \|\| !vim_iswordp(reginput))
				3573	return FALSE;
				3574	ADVANCE_REGINPUT();
				3575	break;
				3576
				3577	case FNAME:
				3578	if (!vim_isfilec(c))
				3579	return FALSE;
				3580	ADVANCE_REGINPUT();
				3581	break;
				3582
				3583	case SFNAME:
				3584	if (VIM_ISDIGIT(*reginput) \|\| !vim_isfilec(c))
				3585	return FALSE;
				3586	ADVANCE_REGINPUT();
				3587	break;
				3588
				3589	case PRINT:
				3590	if (ptr2cells(reginput) != 1)
				3591	return FALSE;
				3592	ADVANCE_REGINPUT();
				3593	break;
				3594
				3595	case SPRINT:
				3596	if (VIM_ISDIGIT(*reginput) \|\| ptr2cells(reginput) != 1)
				3597	return FALSE;
				3598	ADVANCE_REGINPUT();
				3599	break;
				3600
				3601	case WHITE:
				3602	if (!vim_iswhite(c))
				3603	return FALSE;
				3604	ADVANCE_REGINPUT();
				3605	break;
				3606
				3607	case NWHITE:
				3608	if (c == NUL \|\| vim_iswhite(c))
				3609	return FALSE;
				3610	ADVANCE_REGINPUT();
				3611	break;
				3612
				3613	case DIGIT:
				3614	if (!ri_digit(c))
				3615	return FALSE;
				3616	ADVANCE_REGINPUT();
				3617	break;
				3618
				3619	case NDIGIT:
				3620	if (c == NUL \|\| ri_digit(c))
				3621	return FALSE;
				3622	ADVANCE_REGINPUT();
				3623	break;
				3624
				3625	case HEX:
				3626	if (!ri_hex(c))
				3627	return FALSE;
				3628	ADVANCE_REGINPUT();
				3629	break;
				3630
				3631	case NHEX:
				3632	if (c == NUL \|\| ri_hex(c))
				3633	return FALSE;
				3634	ADVANCE_REGINPUT();
				3635	break;
				3636
				3637	case OCTAL:
				3638	if (!ri_octal(c))
				3639	return FALSE;
				3640	ADVANCE_REGINPUT();
				3641	break;
				3642
				3643	case NOCTAL:
				3644	if (c == NUL \|\| ri_octal(c))
				3645	return FALSE;
				3646	ADVANCE_REGINPUT();
				3647	break;
				3648
				3649	case WORD:
				3650	if (!ri_word(c))
				3651	return FALSE;
				3652	ADVANCE_REGINPUT();
				3653	break;
				3654
				3655	case NWORD:
				3656	if (c == NUL \|\| ri_word(c))
				3657	return FALSE;
				3658	ADVANCE_REGINPUT();
				3659	break;
				3660
				3661	case HEAD:
				3662	if (!ri_head(c))
				3663	return FALSE;
				3664	ADVANCE_REGINPUT();
				3665	break;
				3666
				3667	case NHEAD:
				3668	if (c == NUL \|\| ri_head(c))
				3669	return FALSE;
				3670	ADVANCE_REGINPUT();
				3671	break;
				3672
				3673	case ALPHA:
				3674	if (!ri_alpha(c))
				3675	return FALSE;
				3676	ADVANCE_REGINPUT();
				3677	break;
				3678
				3679	case NALPHA:
				3680	if (c == NUL \|\| ri_alpha(c))
				3681	return FALSE;
				3682	ADVANCE_REGINPUT();
				3683	break;
				3684
				3685	case LOWER:
				3686	if (!ri_lower(c))
				3687	return FALSE;
				3688	ADVANCE_REGINPUT();
				3689	break;
				3690
				3691	case NLOWER:
				3692	if (c == NUL \|\| ri_lower(c))
				3693	return FALSE;
				3694	ADVANCE_REGINPUT();
				3695	break;
				3696
				3697	case UPPER:
				3698	if (!ri_upper(c))
				3699	return FALSE;
				3700	ADVANCE_REGINPUT();
				3701	break;
				3702
				3703	case NUPPER:
				3704	if (c == NUL \|\| ri_upper(c))
				3705	return FALSE;
				3706	ADVANCE_REGINPUT();
				3707	break;
				3708
				3709	case EXACTLY:
				3710	{
				3711	int len;
				3712	char_u *opnd;
				3713
				3714	opnd = OPERAND(scan);
				3715	/* Inline the first byte, for speed. */
				3716	if (opnd != reginput
				3717	&& (!ireg_ic \|\| (
				3718	#ifdef FEAT_MBYTE
				3719	!enc_utf8 &&
				3720	#endif
				3721	TOLOWER_LOC(opnd) != TOLOWER_LOC(reginput))))
				3722	return FALSE;
				3723	if (*opnd == NUL)
				3724	{
				3725	/* match empty string always works; happens when "~" is
				3726	* empty. */
				3727	}
				3728	else if (opnd[1] == NUL
				3729	#ifdef FEAT_MBYTE
				3730	&& !(enc_utf8 && ireg_ic)
				3731	#endif
				3732	)
				3733	++reginput; /* matched a single char */
				3734	else
				3735	{
				3736	len = (int)STRLEN(opnd);
				3737	/* Need to match first byte again for multi-byte. */
				3738	if (cstrncmp(opnd, reginput, &len) != 0)
				3739	return FALSE;
				3740	#ifdef FEAT_MBYTE
				3741	/* Check for following composing character. */
				3742	if (enc_utf8 && UTF_COMPOSINGLIKE(reginput, reginput + len))
				3743	{
				3744	/* raaron: This code makes a composing character get
				3745	* ignored, which is the correct behavior (sometimes)
				3746	* for voweled Hebrew texts. */
				3747	if (!ireg_icombine)
				3748	return FALSE;
				3749	}
				3750	else
				3751	#endif
				3752	reginput += len;
				3753	}
				3754	}
				3755	break;
				3756
				3757	case ANYOF:
				3758	case ANYBUT:
				3759	if (c == NUL)
				3760	return FALSE;
				3761	if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
				3762	return FALSE;
				3763	ADVANCE_REGINPUT();
				3764	break;
				3765
				3766	#ifdef FEAT_MBYTE
				3767	case MULTIBYTECODE:
				3768	if (has_mbyte)
				3769	{
				3770	int i, len;
				3771	char_u *opnd;
				3772
				3773	opnd = OPERAND(scan);
				3774	/* Safety check (just in case 'encoding' was changed since
				3775	* compiling the program). */
				3776	if ((len = (*mb_ptr2len_check)(opnd)) < 2)
				3777	return FALSE;
				3778	for (i = 0; i < len; ++i)
				3779	if (opnd[i] != reginput[i])
				3780	return FALSE;
				3781	reginput += len;
				3782	}
				3783	else
				3784	return FALSE;
				3785	break;
				3786	#endif
				3787
				3788	case NOTHING:
				3789	break;
				3790
				3791	case BACK:
				3792	break;
				3793
				3794	case MOPEN + 0: /* Match start: \zs */
				3795	case MOPEN + 1: /* \( */
				3796	case MOPEN + 2:
				3797	case MOPEN + 3:
				3798	case MOPEN + 4:
				3799	case MOPEN + 5:
				3800	case MOPEN + 6:
				3801	case MOPEN + 7:
				3802	case MOPEN + 8:
				3803	case MOPEN + 9:
				3804	{
				3805	int no;
				3806	save_se_T save;
				3807
				3808	no = op - MOPEN;
				3809	cleanup_subexpr();
				3810	save_se(&save, &reg_startpos[no], &reg_startp[no]);
				3811
				3812	if (regmatch(next))
				3813	return TRUE;
				3814
				3815	restore_se(&save, &reg_startpos[no], &reg_startp[no]);
				3816	return FALSE;
				3817	}
				3818	/* break; Not Reached */
				3819
				3820	case NOPEN: /* \%( */
				3821	case NCLOSE: /* \) after \%( */
				3822	if (regmatch(next))
				3823	return TRUE;
				3824	return FALSE;
				3825	/* break; Not Reached */
				3826
				3827	#ifdef FEAT_SYN_HL
				3828	case ZOPEN + 1:
				3829	case ZOPEN + 2:
				3830	case ZOPEN + 3:
				3831	case ZOPEN + 4:
				3832	case ZOPEN + 5:
				3833	case ZOPEN + 6:
				3834	case ZOPEN + 7:
				3835	case ZOPEN + 8:
				3836	case ZOPEN + 9:
				3837	{
				3838	int no;
				3839	save_se_T save;
				3840
				3841	no = op - ZOPEN;
				3842	cleanup_zsubexpr();
				3843	save_se(&save, &reg_startzpos[no], &reg_startzp[no]);
				3844
				3845	if (regmatch(next))
				3846	return TRUE;
				3847
				3848	restore_se(&save, &reg_startzpos[no], &reg_startzp[no]);
				3849	return FALSE;
				3850	}
				3851	/* break; Not Reached */
				3852	#endif
				3853
				3854	case MCLOSE + 0: /* Match end: \ze */
				3855	case MCLOSE + 1: /* \) */
				3856	case MCLOSE + 2:
				3857	case MCLOSE + 3:
				3858	case MCLOSE + 4:
				3859	case MCLOSE + 5:
				3860	case MCLOSE + 6:
				3861	case MCLOSE + 7:
				3862	case MCLOSE + 8:
				3863	case MCLOSE + 9:
				3864	{
				3865	int no;
				3866	save_se_T save;
				3867
				3868	no = op - MCLOSE;
				3869	cleanup_subexpr();
				3870	save_se(&save, &reg_endpos[no], &reg_endp[no]);
				3871
				3872	if (regmatch(next))
				3873	return TRUE;
				3874
				3875	restore_se(&save, &reg_endpos[no], &reg_endp[no]);
				3876	return FALSE;
				3877	}
				3878	/* break; Not Reached */
				3879
				3880	#ifdef FEAT_SYN_HL
				3881	case ZCLOSE + 1: /* \) after \z( */
				3882	case ZCLOSE + 2:
				3883	case ZCLOSE + 3:
				3884	case ZCLOSE + 4:
				3885	case ZCLOSE + 5:
				3886	case ZCLOSE + 6:
				3887	case ZCLOSE + 7:
				3888	case ZCLOSE + 8:
				3889	case ZCLOSE + 9:
				3890	{
				3891	int no;
				3892	save_se_T save;
				3893
				3894	no = op - ZCLOSE;
				3895	cleanup_zsubexpr();
				3896	save_se(&save, &reg_endzpos[no], &reg_endzp[no]);
				3897
				3898	if (regmatch(next))
				3899	return TRUE;
				3900
				3901	restore_se(&save, &reg_endzpos[no], &reg_endzp[no]);
				3902	return FALSE;
				3903	}
				3904	/* break; Not Reached */
				3905	#endif
				3906
				3907	case BACKREF + 1:
				3908	case BACKREF + 2:
				3909	case BACKREF + 3:
				3910	case BACKREF + 4:
				3911	case BACKREF + 5:
				3912	case BACKREF + 6:
				3913	case BACKREF + 7:
				3914	case BACKREF + 8:
				3915	case BACKREF + 9:
				3916	{
				3917	int no;
				3918	int len;
				3919	linenr_T clnum;
				3920	colnr_T ccol;
				3921	char_u *p;
				3922
				3923	no = op - BACKREF;
				3924	cleanup_subexpr();
				3925	if (!REG_MULTI) /* Single-line regexp */
				3926	{
				3927	if (reg_endp[no] == NULL)
				3928	{
				3929	/* Backref was not set: Match an empty string. */
				3930	len = 0;
				3931	}
				3932	else
				3933	{
				3934	/* Compare current input with back-ref in the same
				3935	* line. */
				3936	len = (int)(reg_endp[no] - reg_startp[no]);
				3937	if (cstrncmp(reg_startp[no], reginput, &len) != 0)
				3938	return FALSE;
				3939	}
				3940	}
				3941	else /* Multi-line regexp */
				3942	{
				3943	if (reg_endpos[no].lnum < 0)
				3944	{
				3945	/* Backref was not set: Match an empty string. */
				3946	len = 0;
				3947	}
				3948	else
				3949	{
				3950	if (reg_startpos[no].lnum == reglnum
				3951	&& reg_endpos[no].lnum == reglnum)
				3952	{
				3953	/* Compare back-ref within the current line. */
				3954	len = reg_endpos[no].col - reg_startpos[no].col;
				3955	if (cstrncmp(regline + reg_startpos[no].col,
				3956	reginput, &len) != 0)
				3957	return FALSE;
				3958	}
				3959	else
				3960	{
				3961	/* Messy situation: Need to compare between two
				3962	* lines. */
				3963	ccol = reg_startpos[no].col;
				3964	clnum = reg_startpos[no].lnum;
				3965	for (;;)
				3966	{
				3967	/* Since getting one line may invalidate
				3968	* the other, need to make copy. Slow! */
				3969	if (regline != reg_tofree)
				3970	{
				3971	len = (int)STRLEN(regline);
				3972	if (reg_tofree == NULL
				3973	\|\| len >= (int)reg_tofreelen)
				3974	{
				3975	len += 50; /* get some extra */
				3976	vim_free(reg_tofree);
				3977	reg_tofree = alloc(len);
				3978	if (reg_tofree == NULL)
				3979	return FALSE; /* out of memory! */
				3980	reg_tofreelen = len;
				3981	}
				3982	STRCPY(reg_tofree, regline);
				3983	reginput = reg_tofree
				3984	+ (reginput - regline);
				3985	regline = reg_tofree;
				3986	}
				3987
				3988	/* Get the line to compare with. */
				3989	p = reg_getline(clnum);
				3990	if (clnum == reg_endpos[no].lnum)
				3991	len = reg_endpos[no].col - ccol;
				3992	else
				3993	len = (int)STRLEN(p + ccol);
				3994
				3995	if (cstrncmp(p + ccol, reginput, &len) != 0)
				3996	return FALSE; /* doesn't match */
				3997	if (clnum == reg_endpos[no].lnum)
				3998	break; /* match and at end! */
				3999	if (reglnum == reg_maxline)
				4000	return FALSE; /* text too short */
				4001
				4002	/* Advance to next line. */
				4003	reg_nextline();
				4004	++clnum;
				4005	ccol = 0;
				4006	if (got_int \|\| out_of_stack)
				4007	return FALSE;
				4008	}
				4009
				4010	/* found a match! Note that regline may now point
				4011	* to a copy of the line, that should not matter. */
				4012	}
				4013	}
				4014	}
				4015
				4016	/* Matched the backref, skip over it. */
				4017	reginput += len;
				4018	}
				4019	break;
				4020
				4021	#ifdef FEAT_SYN_HL
				4022	case ZREF + 1:
				4023	case ZREF + 2:
				4024	case ZREF + 3:
				4025	case ZREF + 4:
				4026	case ZREF + 5:
				4027	case ZREF + 6:
				4028	case ZREF + 7:
				4029	case ZREF + 8:
				4030	case ZREF + 9:
				4031	{
				4032	int no;
				4033	int len;
				4034
				4035	cleanup_zsubexpr();
				4036	no = op - ZREF;
				4037	if (re_extmatch_in != NULL
				4038	&& re_extmatch_in->matches[no] != NULL)
				4039	{
				4040	len = (int)STRLEN(re_extmatch_in->matches[no]);
				4041	if (cstrncmp(re_extmatch_in->matches[no],
				4042	reginput, &len) != 0)
				4043	return FALSE;
				4044	reginput += len;
				4045	}
				4046	else
				4047	{
				4048	/* Backref was not set: Match an empty string. */
				4049	}
				4050	}
				4051	break;
				4052	#endif
				4053
				4054	case BRANCH:
				4055	{
				4056	if (OP(next) != BRANCH) /* No choice. */
				4057	next = OPERAND(scan); /* Avoid recursion. */
				4058	else
				4059	{
				4060	regsave_T save;
				4061
				4062	do
				4063	{
				4064	reg_save(&save);
				4065	if (regmatch(OPERAND(scan)))
				4066	return TRUE;
				4067	reg_restore(&save);
				4068	scan = regnext(scan);
				4069	} while (scan != NULL && OP(scan) == BRANCH);
				4070	return FALSE;
				4071	/* NOTREACHED */
				4072	}
				4073	}
				4074	break;
				4075
				4076	case BRACE_LIMITS:
				4077	{
				4078	int no;
				4079
				4080	if (OP(next) == BRACE_SIMPLE)
				4081	{
				4082	bl_minval = OPERAND_MIN(scan);
				4083	bl_maxval = OPERAND_MAX(scan);
				4084	}
				4085	else if (OP(next) >= BRACE_COMPLEX
				4086	&& OP(next) < BRACE_COMPLEX + 10)
				4087	{
				4088	no = OP(next) - BRACE_COMPLEX;
				4089	brace_min[no] = OPERAND_MIN(scan);
				4090	brace_max[no] = OPERAND_MAX(scan);
				4091	brace_count[no] = 0;
				4092	}
				4093	else
				4094	{
				4095	EMSG(_(e_internal)); /* Shouldn't happen */
				4096	return FALSE;
				4097	}
				4098	}
				4099	break;
				4100
				4101	case BRACE_COMPLEX + 0:
				4102	case BRACE_COMPLEX + 1:
				4103	case BRACE_COMPLEX + 2:
				4104	case BRACE_COMPLEX + 3:
				4105	case BRACE_COMPLEX + 4:
				4106	case BRACE_COMPLEX + 5:
				4107	case BRACE_COMPLEX + 6:
				4108	case BRACE_COMPLEX + 7:
				4109	case BRACE_COMPLEX + 8:
				4110	case BRACE_COMPLEX + 9:
				4111	{
				4112	int no;
				4113	regsave_T save;
				4114
				4115	no = op - BRACE_COMPLEX;
				4116	++brace_count[no];
				4117
				4118	/* If not matched enough times yet, try one more */
				4119	if (brace_count[no] <= (brace_min[no] <= brace_max[no]
				4120	? brace_min[no] : brace_max[no]))
				4121	{
				4122	reg_save(&save);
				4123	if (regmatch(OPERAND(scan)))
				4124	return TRUE;
				4125	reg_restore(&save);
				4126	--brace_count[no]; /* failed, decrement match count */
				4127	return FALSE;
				4128	}
				4129
				4130	/* If matched enough times, may try matching some more */
				4131	if (brace_min[no] <= brace_max[no])
				4132	{
				4133	/* Range is the normal way around, use longest match */
				4134	if (brace_count[no] <= brace_max[no])
				4135	{
				4136	reg_save(&save);
				4137	if (regmatch(OPERAND(scan)))
				4138	return TRUE; /* matched some more times */
				4139	reg_restore(&save);
				4140	--brace_count[no]; /* matched just enough times */
				4141	/* continue with the items after \{} */
				4142	}
				4143	}
				4144	else
				4145	{
				4146	/* Range is backwards, use shortest match first */
				4147	if (brace_count[no] <= brace_min[no])
				4148	{
				4149	reg_save(&save);
				4150	if (regmatch(next))
				4151	return TRUE;
				4152	reg_restore(&save);
				4153	next = OPERAND(scan);
				4154	/* must try to match one more item */
				4155	}
				4156	}
				4157	}
				4158	break;
				4159
				4160	case BRACE_SIMPLE:
				4161	case STAR:
				4162	case PLUS:
				4163	{
				4164	int nextb; /* next byte */
				4165	int nextb_ic; /* next byte reverse case */
				4166	long count;
				4167	regsave_T save;
				4168	long minval;
				4169	long maxval;
				4170
				4171	/*
				4172	* Lookahead to avoid useless match attempts when we know
				4173	* what character comes next.
				4174	*/
				4175	if (OP(next) == EXACTLY)
				4176	{
				4177	nextb = *OPERAND(next);
				4178	if (ireg_ic)
				4179	{
				4180	if (isupper(nextb))
				4181	nextb_ic = TOLOWER_LOC(nextb);
				4182	else
				4183	nextb_ic = TOUPPER_LOC(nextb);
				4184	}
				4185	else
				4186	nextb_ic = nextb;
				4187	}
				4188	else
				4189	{
				4190	nextb = NUL;
				4191	nextb_ic = NUL;
				4192	}
				4193	if (op != BRACE_SIMPLE)
				4194	{
				4195	minval = (op == STAR) ? 0 : 1;
				4196	maxval = MAX_LIMIT;
				4197	}
				4198	else
				4199	{
				4200	minval = bl_minval;
				4201	maxval = bl_maxval;
				4202	}
				4203
				4204	/*
				4205	* When maxval > minval, try matching as much as possible, up
				4206	* to maxval. When maxval < minval, try matching at least the
				4207	* minimal number (since the range is backwards, that's also
				4208	* maxval!).
				4209	*/
				4210	count = regrepeat(OPERAND(scan), maxval);
				4211	if (got_int)
				4212	return FALSE;
				4213	if (minval <= maxval)
				4214	{
				4215	/* Range is the normal way around, use longest match */
				4216	while (count >= minval)
				4217	{
				4218	/* If it could match, try it. */
				4219	if (nextb == NUL \|\| *reginput == nextb
				4220	\|\| *reginput == nextb_ic)
				4221	{
				4222	reg_save(&save);
				4223	if (regmatch(next))
				4224	return TRUE;
				4225	reg_restore(&save);
				4226	}
				4227	/* Couldn't or didn't match -- back up one char. */
				4228	if (--count < minval)
				4229	break;
				4230	if (reginput == regline)
				4231	{
				4232	/* backup to last char of previous line */
				4233	--reglnum;
				4234	regline = reg_getline(reglnum);
				4235	/* Just in case regrepeat() didn't count right. */
				4236	if (regline == NULL)
				4237	return FALSE;
				4238	reginput = regline + STRLEN(regline);
				4239	fast_breakcheck();
				4240	if (got_int \|\| out_of_stack)
				4241	return FALSE;
				4242	}
				4243	else
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4244	mb_ptr_back(regline, reginput);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4245	}
				4246	}
				4247	else
				4248	{
				4249	/* Range is backwards, use shortest match first.
				4250	* Careful: maxval and minval are exchanged! */
				4251	if (count < maxval)
				4252	return FALSE;
				4253	for (;;)
				4254	{
				4255	/* If it could work, try it. */
				4256	if (nextb == NUL \|\| *reginput == nextb
				4257	\|\| *reginput == nextb_ic)
				4258	{
				4259	reg_save(&save);
				4260	if (regmatch(next))
				4261	return TRUE;
				4262	reg_restore(&save);
				4263	}
				4264	/* Couldn't or didn't match: try advancing one char. */
				4265	if (count == minval
				4266	\|\| regrepeat(OPERAND(scan), 1L) == 0)
				4267	break;
				4268	++count;
				4269	if (got_int \|\| out_of_stack)
				4270	return FALSE;
				4271	}
				4272	}
				4273	return FALSE;
				4274	}
				4275	/* break; Not Reached */
				4276
				4277	case NOMATCH:
				4278	{
				4279	regsave_T save;
				4280
				4281	/* If the operand matches, we fail. Otherwise backup and
				4282	* continue with the next item. */
				4283	reg_save(&save);
				4284	if (regmatch(OPERAND(scan)))
				4285	return FALSE;
				4286	reg_restore(&save);
				4287	}
				4288	break;
				4289
				4290	case MATCH:
				4291	case SUBPAT:
				4292	{
				4293	regsave_T save;
				4294
				4295	/* If the operand doesn't match, we fail. Otherwise backup
				4296	* and continue with the next item. */
				4297	reg_save(&save);
				4298	if (!regmatch(OPERAND(scan)))
				4299	return FALSE;
				4300	if (op == MATCH) /* zero-width */
				4301	reg_restore(&save);
				4302	}
				4303	break;
				4304
				4305	case BEHIND:
				4306	case NOBEHIND:
				4307	{
				4308	regsave_T save_after, save_start;
				4309	regsave_T save_behind_pos;
				4310	int needmatch = (op == BEHIND);
				4311
				4312	/*
				4313	* Look back in the input of the operand matches or not. This
				4314	* must be done at every position in the input and checking if
				4315	* the match ends at the current position.
				4316	* First check if the next item matches, that's probably
				4317	* faster.
				4318	*/
				4319	reg_save(&save_start);
				4320	if (regmatch(next))
				4321	{
				4322	/* save the position after the found match for next */
				4323	reg_save(&save_after);
				4324
				4325	/* start looking for a match with operand at the current
				4326	* postion. Go back one character until we find the
				4327	* result, hitting the start of the line or the previous
				4328	* line (for multi-line matching).
				4329	* Set behind_pos to where the match should end, BHPOS
				4330	* will match it. */
				4331	save_behind_pos = behind_pos;
				4332	behind_pos = save_start;
				4333	for (;;)
				4334	{
				4335	reg_restore(&save_start);
				4336	if (regmatch(OPERAND(scan))
				4337	&& reg_save_equal(&behind_pos))
				4338	{
				4339	behind_pos = save_behind_pos;
				4340	/* found a match that ends where "next" started */
				4341	if (needmatch)
				4342	{
				4343	reg_restore(&save_after);
				4344	return TRUE;
				4345	}
				4346	return FALSE;
				4347	}
				4348	/*
				4349	* No match: Go back one character. May go to
				4350	* previous line once.
				4351	*/
				4352	if (REG_MULTI)
				4353	{
				4354	if (save_start.rs_u.pos.col == 0)
				4355	{
				4356	if (save_start.rs_u.pos.lnum
				4357	< behind_pos.rs_u.pos.lnum
				4358	\|\| reg_getline(
				4359	--save_start.rs_u.pos.lnum) == NULL)
				4360	break;
				4361	reg_restore(&save_start);
				4362	save_start.rs_u.pos.col =
				4363	(colnr_T)STRLEN(regline);
				4364	}
				4365	else
				4366	--save_start.rs_u.pos.col;
				4367	}
				4368	else
				4369	{
				4370	if (save_start.rs_u.ptr == regline)
				4371	break;
				4372	--save_start.rs_u.ptr;
				4373	}
				4374	}
				4375
				4376	/* NOBEHIND succeeds when no match was found */
				4377	behind_pos = save_behind_pos;
				4378	if (!needmatch)
				4379	{
				4380	reg_restore(&save_after);
				4381	return TRUE;
				4382	}
				4383	}
				4384	return FALSE;
				4385	}
				4386
				4387	case BHPOS:
				4388	if (REG_MULTI)
				4389	{
				4390	if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline)
				4391	\|\| behind_pos.rs_u.pos.lnum != reglnum)
				4392	return FALSE;
				4393	}
				4394	else if (behind_pos.rs_u.ptr != reginput)
				4395	return FALSE;
				4396	break;
				4397
				4398	case NEWL:
				4399	if ((c != NUL \|\| reglnum == reg_maxline)
				4400	&& (c != '\n' \|\| !reg_line_lbr))
				4401	return FALSE;
				4402	if (reg_line_lbr)
				4403	ADVANCE_REGINPUT();
				4404	else
				4405	reg_nextline();
				4406	break;
				4407
				4408	case END:
				4409	return TRUE; /* Success! */
				4410
				4411	default:
				4412	EMSG(_(e_re_corr));
				4413	#ifdef DEBUG
				4414	printf("Illegal op code %d\n", op);
				4415	#endif
				4416	return FALSE;
				4417	}
				4418	}
				4419
				4420	scan = next;
				4421	}
				4422
				4423	/*
				4424	* We get here only if there's trouble -- normally "case END" is the
				4425	* terminating point.
				4426	*/
				4427	EMSG(_(e_re_corr));
				4428	#ifdef DEBUG
				4429	printf("Premature EOL\n");
				4430	#endif
				4431	return FALSE;
				4432	}
				4433
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4434	/*
				4435	* regrepeat - repeatedly match something simple, return how many.
				4436	* Advances reginput (and reglnum) to just after the matched chars.
				4437	*/
				4438	static int
				4439	regrepeat(p, maxcount)
				4440	char_u *p;
				4441	long maxcount; /* maximum number of matches allowed */
				4442	{
				4443	long count = 0;
				4444	char_u *scan;
				4445	char_u *opnd;
				4446	int mask;
				4447	int testval = 0;
				4448
				4449	scan = reginput; /* Make local copy of reginput for speed. */
				4450	opnd = OPERAND(p);
				4451	switch (OP(p))
				4452	{
				4453	case ANY:
				4454	case ANY + ADD_NL:
				4455	while (count < maxcount)
				4456	{
				4457	/* Matching anything means we continue until end-of-line (or
				4458	* end-of-file for ANY + ADD_NL), only limited by maxcount. */
				4459	while (*scan != NUL && count < maxcount)
				4460	{
				4461	++count;
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4462	mb_ptr_adv(scan);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4463	}
				4464	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline \|\| count == maxcount)
				4465	break;
				4466	++count; /* count the line-break */
				4467	reg_nextline();
				4468	scan = reginput;
				4469	if (got_int)
				4470	break;
				4471	}
				4472	break;
				4473
				4474	case IDENT:
				4475	case IDENT + ADD_NL:
				4476	testval = TRUE;
				4477	/FALLTHROUGH/
				4478	case SIDENT:
				4479	case SIDENT + ADD_NL:
				4480	while (count < maxcount)
				4481	{
				4482	if (vim_isIDc(scan) && (testval \|\| !VIM_ISDIGIT(scan)))
				4483	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4484	mb_ptr_adv(scan);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4485	}
				4486	else if (*scan == NUL)
				4487	{
				4488	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4489	break;
				4490	reg_nextline();
				4491	scan = reginput;
				4492	if (got_int)
				4493	break;
				4494	}
				4495	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4496	++scan;
				4497	else
				4498	break;
				4499	++count;
				4500	}
				4501	break;
				4502
				4503	case KWORD:
				4504	case KWORD + ADD_NL:
				4505	testval = TRUE;
				4506	/FALLTHROUGH/
				4507	case SKWORD:
				4508	case SKWORD + ADD_NL:
				4509	while (count < maxcount)
				4510	{
				4511	if (vim_iswordp(scan) && (testval \|\| !VIM_ISDIGIT(*scan)))
				4512	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4513	mb_ptr_adv(scan);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4514	}
				4515	else if (*scan == NUL)
				4516	{
				4517	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4518	break;
				4519	reg_nextline();
				4520	scan = reginput;
				4521	if (got_int)
				4522	break;
				4523	}
				4524	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4525	++scan;
				4526	else
				4527	break;
				4528	++count;
				4529	}
				4530	break;
				4531
				4532	case FNAME:
				4533	case FNAME + ADD_NL:
				4534	testval = TRUE;
				4535	/FALLTHROUGH/
				4536	case SFNAME:
				4537	case SFNAME + ADD_NL:
				4538	while (count < maxcount)
				4539	{
				4540	if (vim_isfilec(scan) && (testval \|\| !VIM_ISDIGIT(scan)))
				4541	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4542	mb_ptr_adv(scan);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4543	}
				4544	else if (*scan == NUL)
				4545	{
				4546	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4547	break;
				4548	reg_nextline();
				4549	scan = reginput;
				4550	if (got_int)
				4551	break;
				4552	}
				4553	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4554	++scan;
				4555	else
				4556	break;
				4557	++count;
				4558	}
				4559	break;
				4560
				4561	case PRINT:
				4562	case PRINT + ADD_NL:
				4563	testval = TRUE;
				4564	/FALLTHROUGH/
				4565	case SPRINT:
				4566	case SPRINT + ADD_NL:
				4567	while (count < maxcount)
				4568	{
				4569	if (*scan == NUL)
				4570	{
				4571	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4572	break;
				4573	reg_nextline();
				4574	scan = reginput;
				4575	if (got_int)
				4576	break;
				4577	}
				4578	else if (ptr2cells(scan) == 1 && (testval \|\| !VIM_ISDIGIT(*scan)))
				4579	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	4580	mb_ptr_adv(scan);
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	4581	}
				4582	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4583	++scan;
				4584	else
				4585	break;
				4586	++count;
				4587	}
				4588	break;
				4589
				4590	case WHITE:
				4591	case WHITE + ADD_NL:
				4592	testval = mask = RI_WHITE;
				4593	do_class:
				4594	while (count < maxcount)
				4595	{
				4596	#ifdef FEAT_MBYTE
				4597	int l;
				4598	#endif
				4599	if (*scan == NUL)
				4600	{
				4601	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4602	break;
				4603	reg_nextline();
				4604	scan = reginput;
				4605	if (got_int)
				4606	break;
				4607	}
				4608	#ifdef FEAT_MBYTE
				4609	else if (has_mbyte && (l = (*mb_ptr2len_check)(scan)) > 1)
				4610	{
				4611	if (testval != 0)
				4612	break;
				4613	scan += l;
				4614	}
				4615	#endif
				4616	else if ((class_tab[*scan] & mask) == testval)
				4617	++scan;
				4618	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4619	++scan;
				4620	else
				4621	break;
				4622	++count;
				4623	}
				4624	break;
				4625
				4626	case NWHITE:
				4627	case NWHITE + ADD_NL:
				4628	mask = RI_WHITE;
				4629	goto do_class;
				4630	case DIGIT:
				4631	case DIGIT + ADD_NL:
				4632	testval = mask = RI_DIGIT;
				4633	goto do_class;
				4634	case NDIGIT:
				4635	case NDIGIT + ADD_NL:
				4636	mask = RI_DIGIT;
				4637	goto do_class;
				4638	case HEX:
				4639	case HEX + ADD_NL:
				4640	testval = mask = RI_HEX;
				4641	goto do_class;
				4642	case NHEX:
				4643	case NHEX + ADD_NL:
				4644	mask = RI_HEX;
				4645	goto do_class;
				4646	case OCTAL:
				4647	case OCTAL + ADD_NL:
				4648	testval = mask = RI_OCTAL;
				4649	goto do_class;
				4650	case NOCTAL:
				4651	case NOCTAL + ADD_NL:
				4652	mask = RI_OCTAL;
				4653	goto do_class;
				4654	case WORD:
				4655	case WORD + ADD_NL:
				4656	testval = mask = RI_WORD;
				4657	goto do_class;
				4658	case NWORD:
				4659	case NWORD + ADD_NL:
				4660	mask = RI_WORD;
				4661	goto do_class;
				4662	case HEAD:
				4663	case HEAD + ADD_NL:
				4664	testval = mask = RI_HEAD;
				4665	goto do_class;
				4666	case NHEAD:
				4667	case NHEAD + ADD_NL:
				4668	mask = RI_HEAD;
				4669	goto do_class;
				4670	case ALPHA:
				4671	case ALPHA + ADD_NL:
				4672	testval = mask = RI_ALPHA;
				4673	goto do_class;
				4674	case NALPHA:
				4675	case NALPHA + ADD_NL:
				4676	mask = RI_ALPHA;
				4677	goto do_class;
				4678	case LOWER:
				4679	case LOWER + ADD_NL:
				4680	testval = mask = RI_LOWER;
				4681	goto do_class;
				4682	case NLOWER:
				4683	case NLOWER + ADD_NL:
				4684	mask = RI_LOWER;
				4685	goto do_class;
				4686	case UPPER:
				4687	case UPPER + ADD_NL:
				4688	testval = mask = RI_UPPER;
				4689	goto do_class;
				4690	case NUPPER:
				4691	case NUPPER + ADD_NL:
				4692	mask = RI_UPPER;
				4693	goto do_class;
				4694
				4695	case EXACTLY:
				4696	{
				4697	int cu, cl;
				4698
				4699	/* This doesn't do a multi-byte character, because a MULTIBYTECODE
				4700	* would have been used for it. */
				4701	if (ireg_ic)
				4702	{
				4703	cu = TOUPPER_LOC(*opnd);
				4704	cl = TOLOWER_LOC(*opnd);
				4705	while (count < maxcount && (scan == cu \|\| scan == cl))
				4706	{
				4707	count++;
				4708	scan++;
				4709	}
				4710	}
				4711	else
				4712	{
				4713	cu = *opnd;
				4714	while (count < maxcount && *scan == cu)
				4715	{
				4716	count++;
				4717	scan++;
				4718	}
				4719	}
				4720	break;
				4721	}
				4722
				4723	#ifdef FEAT_MBYTE
				4724	case MULTIBYTECODE:
				4725	{
				4726	int i, len, cf = 0;
				4727
				4728	/* Safety check (just in case 'encoding' was changed since
				4729	* compiling the program). */
				4730	if ((len = (*mb_ptr2len_check)(opnd)) > 1)
				4731	{
				4732	if (ireg_ic && enc_utf8)
				4733	cf = utf_fold(utf_ptr2char(opnd));
				4734	while (count < maxcount)
				4735	{
				4736	for (i = 0; i < len; ++i)
				4737	if (opnd[i] != scan[i])
				4738	break;
				4739	if (i < len && (!ireg_ic \|\| !enc_utf8
				4740	\|\| utf_fold(utf_ptr2char(scan)) != cf))
				4741	break;
				4742	scan += len;
				4743	++count;
				4744	}
				4745	}
				4746	}
				4747	break;
				4748	#endif
				4749
				4750	case ANYOF:
				4751	case ANYOF + ADD_NL:
				4752	testval = TRUE;
				4753	/FALLTHROUGH/
				4754
				4755	case ANYBUT:
				4756	case ANYBUT + ADD_NL:
				4757	while (count < maxcount)
				4758	{
				4759	#ifdef FEAT_MBYTE
				4760	int len;
				4761	#endif
				4762	if (*scan == NUL)
				4763	{
				4764	if (!WITH_NL(OP(p)) \|\| reglnum == reg_maxline)
				4765	break;
				4766	reg_nextline();
				4767	scan = reginput;
				4768	if (got_int)
				4769	break;
				4770	}
				4771	else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
				4772	++scan;
				4773	#ifdef FEAT_MBYTE
				4774	else if (has_mbyte && (len = (*mb_ptr2len_check)(scan)) > 1)
				4775	{
				4776	if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
				4777	break;
				4778	scan += len;
				4779	}
				4780	#endif
				4781	else
				4782	{
				4783	if ((cstrchr(opnd, *scan) == NULL) == testval)
				4784	break;
				4785	++scan;
				4786	}
				4787	++count;
				4788	}
				4789	break;
				4790
				4791	case NEWL:
				4792	while (count < maxcount
				4793	&& ((*scan == NUL && reglnum < reg_maxline)
				4794	\|\| (*scan == '\n' && reg_line_lbr)))
				4795	{
				4796	count++;
				4797	if (reg_line_lbr)
				4798	ADVANCE_REGINPUT();
				4799	else
				4800	reg_nextline();
				4801	scan = reginput;
				4802	if (got_int)
				4803	break;
				4804	}
				4805	break;
				4806
				4807	default: /* Oh dear. Called inappropriately. */
				4808	EMSG(_(e_re_corr));
				4809	#ifdef DEBUG
				4810	printf("Called regrepeat with op code %d\n", OP(p));
				4811	#endif
				4812	break;
				4813	}
				4814
				4815	reginput = scan;
				4816
				4817	return (int)count;
				4818	}
				4819
				4820	/*
				4821	* regnext - dig the "next" pointer out of a node
				4822	*/
				4823	static char_u *
				4824	regnext(p)
				4825	char_u *p;
				4826	{
				4827	int offset;
				4828
				4829	if (p == JUST_CALC_SIZE)
				4830	return NULL;
				4831
				4832	offset = NEXT(p);
				4833	if (offset == 0)
				4834	return NULL;
				4835
				4836	if (OP(p) == BACK)
				4837	return p - offset;
				4838	else
				4839	return p + offset;
				4840	}
				4841
				4842	/*
				4843	* Check the regexp program for its magic number.
				4844	* Return TRUE if it's wrong.
				4845	*/
				4846	static int
				4847	prog_magic_wrong()
				4848	{
				4849	if (UCHARAT(REG_MULTI
				4850	? reg_mmatch->regprog->program
				4851	: reg_match->regprog->program) != REGMAGIC)
				4852	{
				4853	EMSG(_(e_re_corr));
				4854	return TRUE;
				4855	}
				4856	return FALSE;
				4857	}
				4858
				4859	/*
				4860	* Cleanup the subexpressions, if this wasn't done yet.
				4861	* This construction is used to clear the subexpressions only when they are
				4862	* used (to increase speed).
				4863	*/
				4864	static void
				4865	cleanup_subexpr()
				4866	{
				4867	if (need_clear_subexpr)
				4868	{
				4869	if (REG_MULTI)
				4870	{
				4871	/* Use 0xff to set lnum to -1 */
				4872	vim_memset(reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				4873	vim_memset(reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				4874	}
				4875	else
				4876	{
				4877	vim_memset(reg_startp, 0, sizeof(char_u ) NSUBEXP);
				4878	vim_memset(reg_endp, 0, sizeof(char_u ) NSUBEXP);
				4879	}
				4880	need_clear_subexpr = FALSE;
				4881	}
				4882	}
				4883
				4884	#ifdef FEAT_SYN_HL
				4885	static void
				4886	cleanup_zsubexpr()
				4887	{
				4888	if (need_clear_zsubexpr)
				4889	{
				4890	if (REG_MULTI)
				4891	{
				4892	/* Use 0xff to set lnum to -1 */
				4893	vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				4894	vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				4895	}
				4896	else
				4897	{
				4898	vim_memset(reg_startzp, 0, sizeof(char_u ) NSUBEXP);
				4899	vim_memset(reg_endzp, 0, sizeof(char_u ) NSUBEXP);
				4900	}
				4901	need_clear_zsubexpr = FALSE;
				4902	}
				4903	}
				4904	#endif
				4905
				4906	/*
				4907	* Advance reglnum, regline and reginput to the next line.
				4908	*/
				4909	static void
				4910	reg_nextline()
				4911	{
				4912	regline = reg_getline(++reglnum);
				4913	reginput = regline;
				4914	fast_breakcheck();
				4915	}
				4916
				4917	/*
				4918	* Save the input line and position in a regsave_T.
				4919	*/
				4920	static void
				4921	reg_save(save)
				4922	regsave_T *save;
				4923	{
				4924	if (REG_MULTI)
				4925	{
				4926	save->rs_u.pos.col = (colnr_T)(reginput - regline);
				4927	save->rs_u.pos.lnum = reglnum;
				4928	}
				4929	else
				4930	save->rs_u.ptr = reginput;
				4931	}
				4932
				4933	/*
				4934	* Restore the input line and position from a regsave_T.
				4935	*/
				4936	static void
				4937	reg_restore(save)
				4938	regsave_T *save;
				4939	{
				4940	if (REG_MULTI)
				4941	{
				4942	if (reglnum != save->rs_u.pos.lnum)
				4943	{
				4944	/* only call reg_getline() when the line number changed to save
				4945	* a bit of time */
				4946	reglnum = save->rs_u.pos.lnum;
				4947	regline = reg_getline(reglnum);
				4948	}
				4949	reginput = regline + save->rs_u.pos.col;
				4950	}
				4951	else
				4952	reginput = save->rs_u.ptr;
				4953	}
				4954
				4955	/*
				4956	* Return TRUE if current position is equal to saved position.
				4957	*/
				4958	static int
				4959	reg_save_equal(save)
				4960	regsave_T *save;
				4961	{
				4962	if (REG_MULTI)
				4963	return reglnum == save->rs_u.pos.lnum
				4964	&& reginput == regline + save->rs_u.pos.col;
				4965	return reginput == save->rs_u.ptr;
				4966	}
				4967
				4968	/*
				4969	* Tentatively set the sub-expression start to the current position (after
				4970	* calling regmatch() they will have changed). Need to save the existing
				4971	* values for when there is no match.
				4972	* Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
				4973	* depending on REG_MULTI.
				4974	*/
				4975	static void
				4976	save_se_multi(savep, posp)
				4977	save_se_T *savep;
				4978	lpos_T *posp;
				4979	{
				4980	savep->se_u.pos = *posp;
				4981	posp->lnum = reglnum;
				4982	posp->col = (colnr_T)(reginput - regline);
				4983	}
				4984
				4985	static void
				4986	save_se_one(savep, pp)
				4987	save_se_T *savep;
				4988	char_u **pp;
				4989	{
				4990	savep->se_u.ptr = *pp;
				4991	*pp = reginput;
				4992	}
				4993
				4994	/*
				4995	* Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
				4996	*/
				4997	static int
				4998	re_num_cmp(val, scan)
				4999	long_u val;
				5000	char_u *scan;
				5001	{
				5002	long_u n = OPERAND_MIN(scan);
				5003
				5004	if (OPERAND_CMP(scan) == '>')
				5005	return val > n;
				5006	if (OPERAND_CMP(scan) == '<')
				5007	return val < n;
				5008	return val == n;
				5009	}
				5010
				5011
				5012	#ifdef DEBUG
				5013
				5014	/*
				5015	* regdump - dump a regexp onto stdout in vaguely comprehensible form
				5016	*/
				5017	static void
				5018	regdump(pattern, r)
				5019	char_u *pattern;
				5020	regprog_T *r;
				5021	{
				5022	char_u *s;
				5023	int op = EXACTLY; /* Arbitrary non-END op. */
				5024	char_u *next;
				5025	char_u *end = NULL;
				5026
				5027	printf("\r\nregcomp(%s):\r\n", pattern);
				5028
				5029	s = r->program + 1;
				5030	/*
				5031	* Loop until we find the END that isn't before a referred next (an END
				5032	* can also appear in a NOMATCH operand).
				5033	*/
				5034	while (op != END \|\| s <= end)
				5035	{
				5036	op = OP(s);
				5037	printf("%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
				5038	next = regnext(s);
				5039	if (next == NULL) /* Next ptr. */
				5040	printf("(0)");
				5041	else
				5042	printf("(%d)", (int)((s - r->program) + (next - s)));
				5043	if (end < next)
				5044	end = next;
				5045	if (op == BRACE_LIMITS)
				5046	{
				5047	/* Two short ints */
				5048	printf(" minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
				5049	s += 8;
				5050	}
				5051	s += 3;
				5052	if (op == ANYOF \|\| op == ANYOF + ADD_NL
				5053	\|\| op == ANYBUT \|\| op == ANYBUT + ADD_NL
				5054	\|\| op == EXACTLY)
				5055	{
				5056	/* Literal string, where present. */
				5057	while (*s != NUL)
				5058	printf("%c", *s++);
				5059	s++;
				5060	}
				5061	printf("\r\n");
				5062	}
				5063
				5064	/* Header fields of interest. */
				5065	if (r->regstart != NUL)
				5066	printf("start `%s' 0x%x; ", r->regstart < 256
				5067	? (char *)transchar(r->regstart)
				5068	: "multibyte", r->regstart);
				5069	if (r->reganch)
				5070	printf("anchored; ");
				5071	if (r->regmust != NULL)
				5072	printf("must have \"%s\"", r->regmust);
				5073	printf("\r\n");
				5074	}
				5075
				5076	/*
				5077	* regprop - printable representation of opcode
				5078	*/
				5079	static char_u *
				5080	regprop(op)
				5081	char_u *op;
				5082	{
				5083	char_u *p;
				5084	static char_u buf[50];
				5085
				5086	(void) strcpy(buf, ":");
				5087
				5088	switch (OP(op))
				5089	{
				5090	case BOL:
				5091	p = "BOL";
				5092	break;
				5093	case EOL:
				5094	p = "EOL";
				5095	break;
				5096	case RE_BOF:
				5097	p = "BOF";
				5098	break;
				5099	case RE_EOF:
				5100	p = "EOF";
				5101	break;
				5102	case CURSOR:
				5103	p = "CURSOR";
				5104	break;
				5105	case RE_LNUM:
				5106	p = "RE_LNUM";
				5107	break;
				5108	case RE_COL:
				5109	p = "RE_COL";
				5110	break;
				5111	case RE_VCOL:
				5112	p = "RE_VCOL";
				5113	break;
				5114	case BOW:
				5115	p = "BOW";
				5116	break;
				5117	case EOW:
				5118	p = "EOW";
				5119	break;
				5120	case ANY:
				5121	p = "ANY";
				5122	break;
				5123	case ANY + ADD_NL:
				5124	p = "ANY+NL";
				5125	break;
				5126	case ANYOF:
				5127	p = "ANYOF";
				5128	break;
				5129	case ANYOF + ADD_NL:
				5130	p = "ANYOF+NL";
				5131	break;
				5132	case ANYBUT:
				5133	p = "ANYBUT";
				5134	break;
				5135	case ANYBUT + ADD_NL:
				5136	p = "ANYBUT+NL";
				5137	break;
				5138	case IDENT:
				5139	p = "IDENT";
				5140	break;
				5141	case IDENT + ADD_NL:
				5142	p = "IDENT+NL";
				5143	break;
				5144	case SIDENT:
				5145	p = "SIDENT";
				5146	break;
				5147	case SIDENT + ADD_NL:
				5148	p = "SIDENT+NL";
				5149	break;
				5150	case KWORD:
				5151	p = "KWORD";
				5152	break;
				5153	case KWORD + ADD_NL:
				5154	p = "KWORD+NL";
				5155	break;
				5156	case SKWORD:
				5157	p = "SKWORD";
				5158	break;
				5159	case SKWORD + ADD_NL:
				5160	p = "SKWORD+NL";
				5161	break;
				5162	case FNAME:
				5163	p = "FNAME";
				5164	break;
				5165	case FNAME + ADD_NL:
				5166	p = "FNAME+NL";
				5167	break;
				5168	case SFNAME:
				5169	p = "SFNAME";
				5170	break;
				5171	case SFNAME + ADD_NL:
				5172	p = "SFNAME+NL";
				5173	break;
				5174	case PRINT:
				5175	p = "PRINT";
				5176	break;
				5177	case PRINT + ADD_NL:
				5178	p = "PRINT+NL";
				5179	break;
				5180	case SPRINT:
				5181	p = "SPRINT";
				5182	break;
				5183	case SPRINT + ADD_NL:
				5184	p = "SPRINT+NL";
				5185	break;
				5186	case WHITE:
				5187	p = "WHITE";
				5188	break;
				5189	case WHITE + ADD_NL:
				5190	p = "WHITE+NL";
				5191	break;
				5192	case NWHITE:
				5193	p = "NWHITE";
				5194	break;
				5195	case NWHITE + ADD_NL:
				5196	p = "NWHITE+NL";
				5197	break;
				5198	case DIGIT:
				5199	p = "DIGIT";
				5200	break;
				5201	case DIGIT + ADD_NL:
				5202	p = "DIGIT+NL";
				5203	break;
				5204	case NDIGIT:
				5205	p = "NDIGIT";
				5206	break;
				5207	case NDIGIT + ADD_NL:
				5208	p = "NDIGIT+NL";
				5209	break;
				5210	case HEX:
				5211	p = "HEX";
				5212	break;
				5213	case HEX + ADD_NL:
				5214	p = "HEX+NL";
				5215	break;
				5216	case NHEX:
				5217	p = "NHEX";
				5218	break;
				5219	case NHEX + ADD_NL:
				5220	p = "NHEX+NL";
				5221	break;
				5222	case OCTAL:
				5223	p = "OCTAL";
				5224	break;
				5225	case OCTAL + ADD_NL:
				5226	p = "OCTAL+NL";
				5227	break;
				5228	case NOCTAL:
				5229	p = "NOCTAL";
				5230	break;
				5231	case NOCTAL + ADD_NL:
				5232	p = "NOCTAL+NL";
				5233	break;
				5234	case WORD:
				5235	p = "WORD";
				5236	break;
				5237	case WORD + ADD_NL:
				5238	p = "WORD+NL";
				5239	break;
				5240	case NWORD:
				5241	p = "NWORD";
				5242	break;
				5243	case NWORD + ADD_NL:
				5244	p = "NWORD+NL";
				5245	break;
				5246	case HEAD:
				5247	p = "HEAD";
				5248	break;
				5249	case HEAD + ADD_NL:
				5250	p = "HEAD+NL";
				5251	break;
				5252	case NHEAD:
				5253	p = "NHEAD";
				5254	break;
				5255	case NHEAD + ADD_NL:
				5256	p = "NHEAD+NL";
				5257	break;
				5258	case ALPHA:
				5259	p = "ALPHA";
				5260	break;
				5261	case ALPHA + ADD_NL:
				5262	p = "ALPHA+NL";
				5263	break;
				5264	case NALPHA:
				5265	p = "NALPHA";
				5266	break;
				5267	case NALPHA + ADD_NL:
				5268	p = "NALPHA+NL";
				5269	break;
				5270	case LOWER:
				5271	p = "LOWER";
				5272	break;
				5273	case LOWER + ADD_NL:
				5274	p = "LOWER+NL";
				5275	break;
				5276	case NLOWER:
				5277	p = "NLOWER";
				5278	break;
				5279	case NLOWER + ADD_NL:
				5280	p = "NLOWER+NL";
				5281	break;
				5282	case UPPER:
				5283	p = "UPPER";
				5284	break;
				5285	case UPPER + ADD_NL:
				5286	p = "UPPER+NL";
				5287	break;
				5288	case NUPPER:
				5289	p = "NUPPER";
				5290	break;
				5291	case NUPPER + ADD_NL:
				5292	p = "NUPPER+NL";
				5293	break;
				5294	case BRANCH:
				5295	p = "BRANCH";
				5296	break;
				5297	case EXACTLY:
				5298	p = "EXACTLY";
				5299	break;
				5300	case NOTHING:
				5301	p = "NOTHING";
				5302	break;
				5303	case BACK:
				5304	p = "BACK";
				5305	break;
				5306	case END:
				5307	p = "END";
				5308	break;
				5309	case MOPEN + 0:
				5310	p = "MATCH START";
				5311	break;
				5312	case MOPEN + 1:
				5313	case MOPEN + 2:
				5314	case MOPEN + 3:
				5315	case MOPEN + 4:
				5316	case MOPEN + 5:
				5317	case MOPEN + 6:
				5318	case MOPEN + 7:
				5319	case MOPEN + 8:
				5320	case MOPEN + 9:
				5321	sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
				5322	p = NULL;
				5323	break;
				5324	case MCLOSE + 0:
				5325	p = "MATCH END";
				5326	break;
				5327	case MCLOSE + 1:
				5328	case MCLOSE + 2:
				5329	case MCLOSE + 3:
				5330	case MCLOSE + 4:
				5331	case MCLOSE + 5:
				5332	case MCLOSE + 6:
				5333	case MCLOSE + 7:
				5334	case MCLOSE + 8:
				5335	case MCLOSE + 9:
				5336	sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
				5337	p = NULL;
				5338	break;
				5339	case BACKREF + 1:
				5340	case BACKREF + 2:
				5341	case BACKREF + 3:
				5342	case BACKREF + 4:
				5343	case BACKREF + 5:
				5344	case BACKREF + 6:
				5345	case BACKREF + 7:
				5346	case BACKREF + 8:
				5347	case BACKREF + 9:
				5348	sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
				5349	p = NULL;
				5350	break;
				5351	case NOPEN:
				5352	p = "NOPEN";
				5353	break;
				5354	case NCLOSE:
				5355	p = "NCLOSE";
				5356	break;
				5357	#ifdef FEAT_SYN_HL
				5358	case ZOPEN + 1:
				5359	case ZOPEN + 2:
				5360	case ZOPEN + 3:
				5361	case ZOPEN + 4:
				5362	case ZOPEN + 5:
				5363	case ZOPEN + 6:
				5364	case ZOPEN + 7:
				5365	case ZOPEN + 8:
				5366	case ZOPEN + 9:
				5367	sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
				5368	p = NULL;
				5369	break;
				5370	case ZCLOSE + 1:
				5371	case ZCLOSE + 2:
				5372	case ZCLOSE + 3:
				5373	case ZCLOSE + 4:
				5374	case ZCLOSE + 5:
				5375	case ZCLOSE + 6:
				5376	case ZCLOSE + 7:
				5377	case ZCLOSE + 8:
				5378	case ZCLOSE + 9:
				5379	sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
				5380	p = NULL;
				5381	break;
				5382	case ZREF + 1:
				5383	case ZREF + 2:
				5384	case ZREF + 3:
				5385	case ZREF + 4:
				5386	case ZREF + 5:
				5387	case ZREF + 6:
				5388	case ZREF + 7:
				5389	case ZREF + 8:
				5390	case ZREF + 9:
				5391	sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
				5392	p = NULL;
				5393	break;
				5394	#endif
				5395	case STAR:
				5396	p = "STAR";
				5397	break;
				5398	case PLUS:
				5399	p = "PLUS";
				5400	break;
				5401	case NOMATCH:
				5402	p = "NOMATCH";
				5403	break;
				5404	case MATCH:
				5405	p = "MATCH";
				5406	break;
				5407	case BEHIND:
				5408	p = "BEHIND";
				5409	break;
				5410	case NOBEHIND:
				5411	p = "NOBEHIND";
				5412	break;
				5413	case SUBPAT:
				5414	p = "SUBPAT";
				5415	break;
				5416	case BRACE_LIMITS:
				5417	p = "BRACE_LIMITS";
				5418	break;
				5419	case BRACE_SIMPLE:
				5420	p = "BRACE_SIMPLE";
				5421	break;
				5422	case BRACE_COMPLEX + 0:
				5423	case BRACE_COMPLEX + 1:
				5424	case BRACE_COMPLEX + 2:
				5425	case BRACE_COMPLEX + 3:
				5426	case BRACE_COMPLEX + 4:
				5427	case BRACE_COMPLEX + 5:
				5428	case BRACE_COMPLEX + 6:
				5429	case BRACE_COMPLEX + 7:
				5430	case BRACE_COMPLEX + 8:
				5431	case BRACE_COMPLEX + 9:
				5432	sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
				5433	p = NULL;
				5434	break;
				5435	#ifdef FEAT_MBYTE
				5436	case MULTIBYTECODE:
				5437	p = "MULTIBYTECODE";
				5438	break;
				5439	#endif
				5440	case NEWL:
				5441	p = "NEWL";
				5442	break;
				5443	default:
				5444	sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
				5445	p = NULL;
				5446	break;
				5447	}
				5448	if (p != NULL)
				5449	(void) strcat(buf, p);
				5450	return buf;
				5451	}
				5452	#endif
				5453
				5454	#ifdef FEAT_MBYTE
				5455	static void mb_decompose __ARGS((int c, int c1, int c2, int *c3));
				5456
				5457	typedef struct
				5458	{
				5459	int a, b, c;
				5460	} decomp_T;
				5461
				5462
				5463	/* 0xfb20 - 0xfb4f */
				5464	decomp_T decomp_table[0xfb4f-0xfb20+1] =
				5465	{
				5466	{0x5e2,0,0}, /* 0xfb20 alt ayin */
				5467	{0x5d0,0,0}, /* 0xfb21 alt alef */
				5468	{0x5d3,0,0}, /* 0xfb22 alt dalet */
				5469	{0x5d4,0,0}, /* 0xfb23 alt he */
				5470	{0x5db,0,0}, /* 0xfb24 alt kaf */
				5471	{0x5dc,0,0}, /* 0xfb25 alt lamed */
				5472	{0x5dd,0,0}, /* 0xfb26 alt mem-sofit */
				5473	{0x5e8,0,0}, /* 0xfb27 alt resh */
				5474	{0x5ea,0,0}, /* 0xfb28 alt tav */
				5475	{'+', 0, 0}, /* 0xfb29 alt plus */
				5476	{0x5e9, 0x5c1, 0}, /* 0xfb2a shin+shin-dot */
				5477	{0x5e9, 0x5c2, 0}, /* 0xfb2b shin+sin-dot */
				5478	{0x5e9, 0x5c1, 0x5bc}, /* 0xfb2c shin+shin-dot+dagesh */
				5479	{0x5e9, 0x5c2, 0x5bc}, /* 0xfb2d shin+sin-dot+dagesh */
				5480	{0x5d0, 0x5b7, 0}, /* 0xfb2e alef+patah */
				5481	{0x5d0, 0x5b8, 0}, /* 0xfb2f alef+qamats */
				5482	{0x5d0, 0x5b4, 0}, /* 0xfb30 alef+hiriq */
				5483	{0x5d1, 0x5bc, 0}, /* 0xfb31 bet+dagesh */
				5484	{0x5d2, 0x5bc, 0}, /* 0xfb32 gimel+dagesh */
				5485	{0x5d3, 0x5bc, 0}, /* 0xfb33 dalet+dagesh */
				5486	{0x5d4, 0x5bc, 0}, /* 0xfb34 he+dagesh */
				5487	{0x5d5, 0x5bc, 0}, /* 0xfb35 vav+dagesh */
				5488	{0x5d6, 0x5bc, 0}, /* 0xfb36 zayin+dagesh */
				5489	{0xfb37, 0, 0}, /* 0xfb37 -- UNUSED */
				5490	{0x5d8, 0x5bc, 0}, /* 0xfb38 tet+dagesh */
				5491	{0x5d9, 0x5bc, 0}, /* 0xfb39 yud+dagesh */
				5492	{0x5da, 0x5bc, 0}, /* 0xfb3a kaf sofit+dagesh */
				5493	{0x5db, 0x5bc, 0}, /* 0xfb3b kaf+dagesh */
				5494	{0x5dc, 0x5bc, 0}, /* 0xfb3c lamed+dagesh */
				5495	{0xfb3d, 0, 0}, /* 0xfb3d -- UNUSED */
				5496	{0x5de, 0x5bc, 0}, /* 0xfb3e mem+dagesh */
				5497	{0xfb3f, 0, 0}, /* 0xfb3f -- UNUSED */
				5498	{0x5e0, 0x5bc, 0}, /* 0xfb40 nun+dagesh */
				5499	{0x5e1, 0x5bc, 0}, /* 0xfb41 samech+dagesh */
				5500	{0xfb42, 0, 0}, /* 0xfb42 -- UNUSED */
				5501	{0x5e3, 0x5bc, 0}, /* 0xfb43 pe sofit+dagesh */
				5502	{0x5e4, 0x5bc,0}, /* 0xfb44 pe+dagesh */
				5503	{0xfb45, 0, 0}, /* 0xfb45 -- UNUSED */
				5504	{0x5e6, 0x5bc, 0}, /* 0xfb46 tsadi+dagesh */
				5505	{0x5e7, 0x5bc, 0}, /* 0xfb47 qof+dagesh */
				5506	{0x5e8, 0x5bc, 0}, /* 0xfb48 resh+dagesh */
				5507	{0x5e9, 0x5bc, 0}, /* 0xfb49 shin+dagesh */
				5508	{0x5ea, 0x5bc, 0}, /* 0xfb4a tav+dagesh */
				5509	{0x5d5, 0x5b9, 0}, /* 0xfb4b vav+holam */
				5510	{0x5d1, 0x5bf, 0}, /* 0xfb4c bet+rafe */
				5511	{0x5db, 0x5bf, 0}, /* 0xfb4d kaf+rafe */
				5512	{0x5e4, 0x5bf, 0}, /* 0xfb4e pe+rafe */
				5513	{0x5d0, 0x5dc, 0} /* 0xfb4f alef-lamed */
				5514	};
				5515
				5516	static void
				5517	mb_decompose(c, c1, c2, c3)
				5518	int c, c1, c2, *c3;
				5519	{
				5520	decomp_T d;
				5521
				5522	if (c >= 0x4b20 && c <= 0xfb4f)
				5523	{
				5524	d = decomp_table[c - 0xfb20];
				5525	*c1 = d.a;
				5526	*c2 = d.b;
				5527	*c3 = d.c;
				5528	}
				5529	else
				5530	{
				5531	*c1 = c;
				5532	c2 = c3 = 0;
				5533	}
				5534	}
				5535	#endif
				5536
				5537	/*
				5538	* Compare two strings, ignore case if ireg_ic set.
				5539	* Return 0 if strings match, non-zero otherwise.
				5540	* Correct the length "*n" when composing characters are ignored.
				5541	*/
				5542	static int
				5543	cstrncmp(s1, s2, n)
				5544	char_u s1, s2;
				5545	int *n;
				5546	{
				5547	int result;
				5548
				5549	if (!ireg_ic)
				5550	result = STRNCMP(s1, s2, *n);
				5551	else
				5552	result = MB_STRNICMP(s1, s2, *n);
				5553
				5554	#ifdef FEAT_MBYTE
				5555	/* if it failed and it's utf8 and we want to combineignore: */
				5556	if (result != 0 && enc_utf8 && ireg_icombine)
				5557	{
				5558	char_u str1, str2;
				5559	int c1, c2, c11, c12;
				5560	int ix;
				5561	int junk;
				5562
				5563	/* we have to handle the strcmp ourselves, since it is necessary to
				5564	* deal with the composing characters by ignoring them: */
				5565	str1 = s1;
				5566	str2 = s2;
				5567	c1 = c2 = 0;
				5568	for (ix = 0; ix < *n; )
				5569	{
				5570	c1 = mb_ptr2char_adv(&str1);
				5571	c2 = mb_ptr2char_adv(&str2);
				5572	ix += utf_char2len(c1);
				5573
				5574	/* decompose the character if necessary, into 'base' characters
				5575	* because I don't care about Arabic, I will hard-code the Hebrew
				5576	* which I do care about! So sue me... */
				5577	if (c1 != c2 && (!ireg_ic \|\| utf_fold(c1) != utf_fold(c2)))
				5578	{
				5579	/* decomposition necessary? */
				5580	mb_decompose(c1, &c11, &junk, &junk);
				5581	mb_decompose(c2, &c12, &junk, &junk);
				5582	c1 = c11;
				5583	c2 = c12;
				5584	if (c11 != c12 && (!ireg_ic \|\| utf_fold(c11) != utf_fold(c12)))
				5585	break;
				5586	}
				5587	}
				5588	result = c2 - c1;
				5589	if (result == 0)
				5590	*n = (int)(str2 - s2);
				5591	}
				5592	#endif
				5593
				5594	return result;
				5595	}
				5596
				5597	/*
				5598	* cstrchr: This function is used a lot for simple searches, keep it fast!
				5599	*/
				5600	static char_u *
				5601	cstrchr(s, c)
				5602	char_u *s;
				5603	int c;
				5604	{
				5605	char_u *p;
				5606	int cc;
				5607
				5608	if (!ireg_ic
				5609	#ifdef FEAT_MBYTE
				5610	\|\| (!enc_utf8 && mb_char2len(c) > 1)
				5611	#endif
				5612	)
				5613	return vim_strchr(s, c);
				5614
				5615	/* tolower() and toupper() can be slow, comparing twice should be a lot
				5616	* faster (esp. when using MS Visual C++!).
				5617	* For UTF-8 need to use folded case. */
				5618	#ifdef FEAT_MBYTE
				5619	if (enc_utf8 && c > 0x80)
				5620	cc = utf_fold(c);
				5621	else
				5622	#endif
				5623	if (isupper(c))
				5624	cc = TOLOWER_LOC(c);
				5625	else if (islower(c))
				5626	cc = TOUPPER_LOC(c);
				5627	else
				5628	return vim_strchr(s, c);
				5629
				5630	#ifdef FEAT_MBYTE
				5631	if (has_mbyte)
				5632	{
				5633	for (p = s; p != NUL; p += (mb_ptr2len_check)(p))
				5634	{
				5635	if (enc_utf8 && c > 0x80)
				5636	{
				5637	if (utf_fold(utf_ptr2char(p)) == cc)
				5638	return p;
				5639	}
				5640	else if (p == c \|\| p == cc)
				5641	return p;
				5642	}
				5643	}
				5644	else
				5645	#endif
				5646	/* Faster version for when there are no multi-byte characters. */
				5647	for (p = s; *p != NUL; ++p)
				5648	if (p == c \|\| p == cc)
				5649	return p;
				5650
				5651	return NULL;
				5652	}
				5653
				5654	/***************************************************************
				5655	* regsub stuff *
				5656	***************************************************************/
				5657
				5658	/* This stuff below really confuses cc on an SGI -- webb */
				5659	#ifdef __sgi
				5660	# undef __ARGS
				5661	# define __ARGS(x) ()
				5662	#endif
				5663
				5664	/*
				5665	* We should define ftpr as a pointer to a function returning a pointer to
				5666	* a function returning a pointer to a function ...
				5667	* This is impossible, so we declare a pointer to a function returning a
				5668	* pointer to a function returning void. This should work for all compilers.
				5669	*/
				5670	typedef void ((fptr) __ARGS((char_u *, int)))();
				5671
				5672	static fptr do_upper __ARGS((char_u *, int));
				5673	static fptr do_Upper __ARGS((char_u *, int));
				5674	static fptr do_lower __ARGS((char_u *, int));
				5675	static fptr do_Lower __ARGS((char_u *, int));
				5676
				5677	static int vim_regsub_both __ARGS((char_u source, char_u dest, int copy, int magic, int backslash));
				5678
				5679	static fptr
				5680	do_upper(d, c)
				5681	char_u *d;
				5682	int c;
				5683	{
				5684	*d = TOUPPER_LOC(c);
				5685
				5686	return (fptr)NULL;
				5687	}
				5688
				5689	static fptr
				5690	do_Upper(d, c)
				5691	char_u *d;
				5692	int c;
				5693	{
				5694	*d = TOUPPER_LOC(c);
				5695
				5696	return (fptr)do_Upper;
				5697	}
				5698
				5699	static fptr
				5700	do_lower(d, c)
				5701	char_u *d;
				5702	int c;
				5703	{
				5704	*d = TOLOWER_LOC(c);
				5705
				5706	return (fptr)NULL;
				5707	}
				5708
				5709	static fptr
				5710	do_Lower(d, c)
				5711	char_u *d;
				5712	int c;
				5713	{
				5714	*d = TOLOWER_LOC(c);
				5715
				5716	return (fptr)do_Lower;
				5717	}
				5718
				5719	/*
				5720	* regtilde(): Replace tildes in the pattern by the old pattern.
				5721	*
				5722	* Short explanation of the tilde: It stands for the previous replacement
				5723	* pattern. If that previous pattern also contains a ~ we should go back a
				5724	* step further... But we insert the previous pattern into the current one
				5725	* and remember that.
				5726	* This still does not handle the case where "magic" changes. TODO?
				5727	*
				5728	* The tildes are parsed once before the first call to vim_regsub().
				5729	*/
				5730	char_u *
				5731	regtilde(source, magic)
				5732	char_u *source;
				5733	int magic;
				5734	{
				5735	char_u *newsub = source;
				5736	char_u *tmpsub;
				5737	char_u *p;
				5738	int len;
				5739	int prevlen;
				5740
				5741	for (p = newsub; *p; ++p)
				5742	{
				5743	if ((p == '~' && magic) \|\| (p == '\\' && *(p + 1) == '~' && !magic))
				5744	{
				5745	if (reg_prev_sub != NULL)
				5746	{
				5747	/* length = len(newsub) - 1 + len(prev_sub) + 1 */
				5748	prevlen = (int)STRLEN(reg_prev_sub);
				5749	tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen));
				5750	if (tmpsub != NULL)
				5751	{
				5752	/* copy prefix */
				5753	len = (int)(p - newsub); /* not including ~ */
				5754	mch_memmove(tmpsub, newsub, (size_t)len);
				5755	/* interpretate tilde */
				5756	mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
				5757	/* copy postfix */
				5758	if (!magic)
				5759	++p; /* back off \ */
				5760	STRCPY(tmpsub + len + prevlen, p + 1);
				5761
				5762	if (newsub != source) /* already allocated newsub */
				5763	vim_free(newsub);
				5764	newsub = tmpsub;
				5765	p = newsub + len + prevlen;
				5766	}
				5767	}
				5768	else if (magic)
				5769	STRCPY(p, p + 1); /* remove '~' */
				5770	else
				5771	STRCPY(p, p + 2); /* remove '\~' */
				5772	--p;
				5773	}
				5774	else
				5775	{
				5776	if (p == '\\' && p[1]) / skip escaped characters */
				5777	++p;
				5778	#ifdef FEAT_MBYTE
				5779	if (has_mbyte)
				5780	p += (*mb_ptr2len_check)(p) - 1;
				5781	#endif
				5782	}
				5783	}
				5784
				5785	vim_free(reg_prev_sub);
				5786	if (newsub != source) /* newsub was allocated, just keep it */
				5787	reg_prev_sub = newsub;
				5788	else /* no ~ found, need to save newsub */
				5789	reg_prev_sub = vim_strsave(newsub);
				5790	return newsub;
				5791	}
				5792
				5793	#ifdef FEAT_EVAL
				5794	static int can_f_submatch = FALSE; /* TRUE when submatch() can be used */
				5795
				5796	/* These pointers are used instead of reg_match and reg_mmatch for
				5797	* reg_submatch(). Needed for when the substitution string is an expression
				5798	* that contains a call to substitute() and submatch(). */
				5799	static regmatch_T *submatch_match;
				5800	static regmmatch_T *submatch_mmatch;
				5801	#endif
				5802
				5803	#if defined(FEAT_MODIFY_FNAME) \|\| defined(FEAT_EVAL) \|\| defined(PROTO)
				5804	/*
				5805	* vim_regsub() - perform substitutions after a vim_regexec() or
				5806	* vim_regexec_multi() match.
				5807	*
				5808	* If "copy" is TRUE really copy into "dest".
				5809	* If "copy" is FALSE nothing is copied, this is just to find out the length
				5810	* of the result.
				5811	*
				5812	* If "backslash" is TRUE, a backslash will be removed later, need to double
				5813	* them to keep them, and insert a backslash before a CR to avoid it being
				5814	* replaced with a line break later.
				5815	*
				5816	* Note: The matched text must not change between the call of
				5817	* vim_regexec()/vim_regexec_multi() and vim_regsub()! It would make the back
				5818	* references invalid!
				5819	*
				5820	* Returns the size of the replacement, including terminating NUL.
				5821	*/
				5822	int
				5823	vim_regsub(rmp, source, dest, copy, magic, backslash)
				5824	regmatch_T *rmp;
				5825	char_u *source;
				5826	char_u *dest;
				5827	int copy;
				5828	int magic;
				5829	int backslash;
				5830	{
				5831	reg_match = rmp;
				5832	reg_mmatch = NULL;
				5833	reg_maxline = 0;
				5834	return vim_regsub_both(source, dest, copy, magic, backslash);
				5835	}
				5836	#endif
				5837
				5838	int
				5839	vim_regsub_multi(rmp, lnum, source, dest, copy, magic, backslash)
				5840	regmmatch_T *rmp;
				5841	linenr_T lnum;
				5842	char_u *source;
				5843	char_u *dest;
				5844	int copy;
				5845	int magic;
				5846	int backslash;
				5847	{
				5848	reg_match = NULL;
				5849	reg_mmatch = rmp;
				5850	reg_buf = curbuf; /* always works on the current buffer! */
				5851	reg_firstlnum = lnum;
				5852	reg_maxline = curbuf->b_ml.ml_line_count - lnum;
				5853	return vim_regsub_both(source, dest, copy, magic, backslash);
				5854	}
				5855
				5856	static int
				5857	vim_regsub_both(source, dest, copy, magic, backslash)
				5858	char_u *source;
				5859	char_u *dest;
				5860	int copy;
				5861	int magic;
				5862	int backslash;
				5863	{
				5864	char_u *src;
				5865	char_u *dst;
				5866	char_u *s;
				5867	int c;
				5868	int no = -1;
				5869	fptr func = (fptr)NULL;
				5870	linenr_T clnum = 0; /* init for GCC */
				5871	int len = 0; /* init for GCC */
				5872	#ifdef FEAT_EVAL
				5873	static char_u *eval_result = NULL;
				5874	#endif
				5875	#ifdef FEAT_MBYTE
				5876	int l;
				5877	#endif
				5878
				5879
				5880	/* Be paranoid... */
				5881	if (source == NULL \|\| dest == NULL)
				5882	{
				5883	EMSG(_(e_null));
				5884	return 0;
				5885	}
				5886	if (prog_magic_wrong())
				5887	return 0;
				5888	src = source;
				5889	dst = dest;
				5890
				5891	/*
				5892	* When the substitute part starts with "\=" evaluate it as an expression.
				5893	*/
				5894	if (source[0] == '\\' && source[1] == '='
				5895	#ifdef FEAT_EVAL
				5896	&& !can_f_submatch /* can't do this recursively */
				5897	#endif
				5898	)
				5899	{
				5900	#ifdef FEAT_EVAL
				5901	/* To make sure that the length doesn't change between checking the
				5902	* length and copying the string, and to speed up things, the
				5903	* resulting string is saved from the call with "copy" == FALSE to the
				5904	* call with "copy" == TRUE. */
				5905	if (copy)
				5906	{
				5907	if (eval_result != NULL)
				5908	{
				5909	STRCPY(dest, eval_result);
				5910	dst += STRLEN(eval_result);
				5911	vim_free(eval_result);
				5912	eval_result = NULL;
				5913	}
				5914	}
				5915	else
				5916	{
				5917	linenr_T save_reg_maxline;
				5918	win_T *save_reg_win;
				5919	int save_ireg_ic;
				5920
				5921	vim_free(eval_result);
				5922
				5923	/* The expression may contain substitute(), which calls us
				5924	* recursively. Make sure submatch() gets the text from the first
				5925	* level. Don't need to save "reg_buf", because
				5926	* vim_regexec_multi() can't be called recursively. */
				5927	submatch_match = reg_match;
				5928	submatch_mmatch = reg_mmatch;
				5929	save_reg_maxline = reg_maxline;
				5930	save_reg_win = reg_win;
				5931	save_ireg_ic = ireg_ic;
				5932	can_f_submatch = TRUE;
				5933
				5934	eval_result = eval_to_string(source + 2, NULL);
				5935	if (eval_result != NULL)
				5936	{
Bram Moolenaar	1cd871b	2004-12-19 22:46:22 +0000	[diff] [blame]	5937	for (s = eval_result; *s != NUL; mb_ptr_adv(s))
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	5938	{
				5939	/* Change NL to CR, so that it becomes a line break.
				5940	* Skip over a backslashed character. */
				5941	if (*s == NL)
				5942	*s = CAR;
				5943	else if (*s == '\\' && s[1] != NUL)
				5944	++s;
Bram Moolenaar	071d427	2004-06-13 20:20:40 +0000	[diff] [blame]	5945	}
				5946
				5947	dst += STRLEN(eval_result);
				5948	}
				5949
				5950	reg_match = submatch_match;
				5951	reg_mmatch = submatch_mmatch;
				5952	reg_maxline = save_reg_maxline;
				5953	reg_win = save_reg_win;
				5954	ireg_ic = save_ireg_ic;
				5955	can_f_submatch = FALSE;
				5956	}
				5957	#endif
				5958	}
				5959	else
				5960	while ((c = *src++) != NUL)
				5961	{
				5962	if (c == '&' && magic)
				5963	no = 0;
				5964	else if (c == '\\' && *src != NUL)
				5965	{
				5966	if (*src == '&' && !magic)
				5967	{
				5968	++src;
				5969	no = 0;
				5970	}
				5971	else if ('0' <= src && src <= '9')
				5972	{
				5973	no = *src++ - '0';
				5974	}
				5975	else if (vim_strchr((char_u )"uUlLeE", src))
				5976	{
				5977	switch (*src++)
				5978	{
				5979	case 'u': func = (fptr)do_upper;
				5980	continue;
				5981	case 'U': func = (fptr)do_Upper;
				5982	continue;
				5983	case 'l': func = (fptr)do_lower;
				5984	continue;
				5985	case 'L': func = (fptr)do_Lower;
				5986	continue;
				5987	case 'e':
				5988	case 'E': func = (fptr)NULL;
				5989	continue;
				5990	}
				5991	}
				5992	}
				5993	if (no < 0) /* Ordinary character. */
				5994	{
				5995	if (c == '\\' && *src != NUL)
				5996	{
				5997	/* Check for abbreviations -- webb */
				5998	switch (*src)
				5999	{
				6000	case 'r': c = CAR; ++src; break;
				6001	case 'n': c = NL; ++src; break;
				6002	case 't': c = TAB; ++src; break;
				6003	/* Oh no! \e already has meaning in subst pat :-( */
				6004	/* case 'e': c = ESC; ++src; break; */
				6005	case 'b': c = Ctrl_H; ++src; break;
				6006
				6007	/* If "backslash" is TRUE the backslash will be removed
				6008	* later. Used to insert a literal CR. */
				6009	default: if (backslash)
				6010	{
				6011	if (copy)
				6012	*dst = '\\';
				6013	++dst;
				6014	}
				6015	c = *src++;
				6016	}
				6017	}
				6018
				6019	/* Write to buffer, if copy is set. */
				6020	#ifdef FEAT_MBYTE
				6021	if (has_mbyte && (l = (*mb_ptr2len_check)(src - 1)) > 1)
				6022	{
				6023	/* TODO: should use "func" here. */
				6024	if (copy)
				6025	mch_memmove(dst, src - 1, l);
				6026	dst += l - 1;
				6027	src += l - 1;
				6028	}
				6029	else
				6030	{
				6031	#endif
				6032	if (copy)
				6033	{
				6034	if (func == (fptr)NULL) /* just copy */
				6035	*dst = c;
				6036	else /* change case */
				6037	func = (fptr)(func(dst, c));
				6038	/* Turbo C complains without the typecast */
				6039	}
				6040	#ifdef FEAT_MBYTE
				6041	}
				6042	#endif
				6043	dst++;
				6044	}
				6045	else
				6046	{
				6047	if (REG_MULTI)
				6048	{
				6049	clnum = reg_mmatch->startpos[no].lnum;
				6050	if (clnum < 0 \|\| reg_mmatch->endpos[no].lnum < 0)
				6051	s = NULL;
				6052	else
				6053	{
				6054	s = reg_getline(clnum) + reg_mmatch->startpos[no].col;
				6055	if (reg_mmatch->endpos[no].lnum == clnum)
				6056	len = reg_mmatch->endpos[no].col
				6057	- reg_mmatch->startpos[no].col;
				6058	else
				6059	len = (int)STRLEN(s);
				6060	}
				6061	}
				6062	else
				6063	{
				6064	s = reg_match->startp[no];
				6065	if (reg_match->endp[no] == NULL)
				6066	s = NULL;
				6067	else
				6068	len = (int)(reg_match->endp[no] - s);
				6069	}
				6070	if (s != NULL)
				6071	{
				6072	for (;;)
				6073	{
				6074	if (len == 0)
				6075	{
				6076	if (REG_MULTI)
				6077	{
				6078	if (reg_mmatch->endpos[no].lnum == clnum)
				6079	break;
				6080	if (copy)
				6081	*dst = CAR;
				6082	++dst;
				6083	s = reg_getline(++clnum);
				6084	if (reg_mmatch->endpos[no].lnum == clnum)
				6085	len = reg_mmatch->endpos[no].col;
				6086	else
				6087	len = (int)STRLEN(s);
				6088	}
				6089	else
				6090	break;
				6091	}
				6092	else if (s == NUL) / we hit NUL. */
				6093	{
				6094	if (copy)
				6095	EMSG(_(e_re_damg));
				6096	goto exit;
				6097	}
				6098	else
				6099	{
				6100	if (backslash && (s == CAR \|\| s == '\\'))
				6101	{
				6102	/*
				6103	* Insert a backslash in front of a CR, otherwise
				6104	* it will be replaced by a line break.
				6105	* Number of backslashes will be halved later,
				6106	* double them here.
				6107	*/
				6108	if (copy)
				6109	{
				6110	dst[0] = '\\';
				6111	dst[1] = *s;
				6112	}
				6113	dst += 2;
				6114	}
				6115	#ifdef FEAT_MBYTE
				6116	else if (has_mbyte && (l = (*mb_ptr2len_check)(s)) > 1)
				6117	{
				6118	/* TODO: should use "func" here. */
				6119	if (copy)
				6120	mch_memmove(dst, s, l);
				6121	dst += l;
				6122	s += l - 1;
				6123	len -= l - 1;
				6124	}
				6125	#endif
				6126	else
				6127	{
				6128	if (copy)
				6129	{
				6130	if (func == (fptr)NULL) /* just copy */
				6131	dst = s;
				6132	else /* change case */
				6133	func = (fptr)(func(dst, *s));
				6134	/* Turbo C complains without the typecast */
				6135	}
				6136	++dst;
				6137	}
				6138	++s;
				6139	--len;
				6140	}
				6141	}
				6142	}
				6143	no = -1;
				6144	}
				6145	}
				6146	if (copy)
				6147	*dst = NUL;
				6148
				6149	exit:
				6150	return (int)((dst - dest) + 1);
				6151	}
				6152
				6153	#ifdef FEAT_EVAL
				6154	/*
				6155	* Used for the submatch() function: get the string from tne n'th submatch in
				6156	* allocated memory.
				6157	* Returns NULL when not in a ":s" command and for a non-existing submatch.
				6158	*/
				6159	char_u *
				6160	reg_submatch(no)
				6161	int no;
				6162	{
				6163	char_u *retval = NULL;
				6164	char_u *s;
				6165	int len;
				6166	int round;
				6167	linenr_T lnum;
				6168
				6169	if (!can_f_submatch)
				6170	return NULL;
				6171
				6172	if (submatch_match == NULL)
				6173	{
				6174	/*
				6175	* First round: compute the length and allocate memory.
				6176	* Second round: copy the text.
				6177	*/
				6178	for (round = 1; round <= 2; ++round)
				6179	{
				6180	lnum = submatch_mmatch->startpos[no].lnum;
				6181	if (lnum < 0 \|\| submatch_mmatch->endpos[no].lnum < 0)
				6182	return NULL;
				6183
				6184	s = reg_getline(lnum) + submatch_mmatch->startpos[no].col;
				6185	if (s == NULL) /* anti-crash check, cannot happen? */
				6186	break;
				6187	if (submatch_mmatch->endpos[no].lnum == lnum)
				6188	{
				6189	/* Within one line: take form start to end col. */
				6190	len = submatch_mmatch->endpos[no].col
				6191	- submatch_mmatch->startpos[no].col;
				6192	if (round == 2)
				6193	{
				6194	STRNCPY(retval, s, len);
				6195	retval[len] = NUL;
				6196	}
				6197	++len;
				6198	}
				6199	else
				6200	{
				6201	/* Multiple lines: take start line from start col, middle
				6202	* lines completely and end line up to end col. */
				6203	len = (int)STRLEN(s);
				6204	if (round == 2)
				6205	{
				6206	STRCPY(retval, s);
				6207	retval[len] = '\n';
				6208	}
				6209	++len;
				6210	++lnum;
				6211	while (lnum < submatch_mmatch->endpos[no].lnum)
				6212	{
				6213	s = reg_getline(lnum++);
				6214	if (round == 2)
				6215	STRCPY(retval + len, s);
				6216	len += (int)STRLEN(s);
				6217	if (round == 2)
				6218	retval[len] = '\n';
				6219	++len;
				6220	}
				6221	if (round == 2)
				6222	STRNCPY(retval + len, reg_getline(lnum),
				6223	submatch_mmatch->endpos[no].col);
				6224	len += submatch_mmatch->endpos[no].col;
				6225	if (round == 2)
				6226	retval[len] = NUL;
				6227	++len;
				6228	}
				6229
				6230	if (round == 1)
				6231	{
				6232	retval = lalloc((long_u)len, TRUE);
				6233	if (s == NULL)
				6234	return NULL;
				6235	}
				6236	}
				6237	}
				6238	else
				6239	{
				6240	if (submatch_match->endp[no] == NULL)
				6241	retval = NULL;
				6242	else
				6243	{
				6244	s = submatch_match->startp[no];
				6245	retval = vim_strnsave(s, (int)(submatch_match->endp[no] - s));
				6246	}
				6247	}
				6248
				6249	return retval;
				6250	}
				6251	#endif