Blame - src/regexp_nfa.c - android_external_vim

blob: 51e355a5fd716b046cf723082e601e1670cf1c1c [file] [log] [blame]

Bram Moolenaar	fbc0d2e	2013-05-19 19:40:29 +0200	[diff] [blame]	1	/* vi:set ts=8 sts=4 sw=4:
				2	*
				3	* NFA regular expression implementation.
				4	*
				5	* This file is included in "regexp.c".
				6	*/
				7
				8	#ifdef DEBUG
				9	/* Comment this out to disable log files. They can get pretty big */
				10	# define ENABLE_LOG
				11	# define LOG_NAME "log_nfarun.log"
				12	#endif
				13
				14	/* Upper limit allowed for {m,n} repetitions handled by NFA */
				15	#define NFA_BRACES_MAXLIMIT 10
				16	/* For allocating space for the postfix representation */
				17	#define NFA_POSTFIX_MULTIPLIER (NFA_BRACES_MAXLIMIT + 2)*2
				18	/* Size of stack, used when converting the postfix regexp into NFA */
				19	#define NFA_STACK_SIZE 1024
				20
				21	enum
				22	{
				23	NFA_SPLIT = -1024,
				24	NFA_MATCH,
				25	NFA_SKIP_CHAR, /* matches a 0-length char */
				26	NFA_END_NEG_RANGE, /* Used when expanding [^ab] */
				27
				28	NFA_CONCAT,
				29	NFA_OR,
				30	NFA_STAR,
				31	NFA_PLUS,
				32	NFA_QUEST,
				33	NFA_QUEST_NONGREEDY, /* Non-greedy version of \? */
				34	NFA_NOT, /* used for [^ab] negated char ranges */
				35
				36	NFA_BOL, /* ^ Begin line */
				37	NFA_EOL, /* $ End line */
				38	NFA_BOW, /* \< Begin word */
				39	NFA_EOW, /* \> End word */
				40	NFA_BOF, /* \%^ Begin file */
				41	NFA_EOF, /* \%$ End file */
				42	NFA_NEWL,
				43	NFA_ZSTART, /* Used for \zs */
				44	NFA_ZEND, /* Used for \ze */
				45	NFA_NOPEN, /* Start of subexpression marked with \%( */
				46	NFA_NCLOSE, /* End of subexpr. marked with \%( ... \) */
				47	NFA_START_INVISIBLE,
				48	NFA_END_INVISIBLE,
				49	NFA_MULTIBYTE, /* Next nodes in NFA are part of the same
				50	multibyte char */
				51	NFA_END_MULTIBYTE, /* End of multibyte char in the NFA */
				52	NFA_COMPOSING, /* Next nodes in NFA are part of the
				53	composing multibyte char */
				54	NFA_END_COMPOSING, /* End of a composing char in the NFA */
				55
				56	/* The following are used only in the postfix form, not in the NFA */
				57	NFA_PREV_ATOM_NO_WIDTH, /* Used for \@= */
				58	NFA_PREV_ATOM_NO_WIDTH_NEG, /* Used for \@! */
				59	NFA_PREV_ATOM_JUST_BEFORE, /* Used for \@<= */
				60	NFA_PREV_ATOM_JUST_BEFORE_NEG, /* Used for \@<! */
				61	NFA_PREV_ATOM_LIKE_PATTERN, /* Used for \@> */
				62
				63	NFA_MOPEN,
				64	NFA_MCLOSE = NFA_MOPEN + NSUBEXP,
				65
				66	/* NFA_FIRST_NL */
				67	NFA_ANY = NFA_MCLOSE + NSUBEXP, /* Match any one character. */
				68	NFA_ANYOF, /* Match any character in this string. */
				69	NFA_ANYBUT, /* Match any character not in this string. */
				70	NFA_IDENT, /* Match identifier char */
				71	NFA_SIDENT, /* Match identifier char but no digit */
				72	NFA_KWORD, /* Match keyword char */
				73	NFA_SKWORD, /* Match word char but no digit */
				74	NFA_FNAME, /* Match file name char */
				75	NFA_SFNAME, /* Match file name char but no digit */
				76	NFA_PRINT, /* Match printable char */
				77	NFA_SPRINT, /* Match printable char but no digit */
				78	NFA_WHITE, /* Match whitespace char */
				79	NFA_NWHITE, /* Match non-whitespace char */
				80	NFA_DIGIT, /* Match digit char */
				81	NFA_NDIGIT, /* Match non-digit char */
				82	NFA_HEX, /* Match hex char */
				83	NFA_NHEX, /* Match non-hex char */
				84	NFA_OCTAL, /* Match octal char */
				85	NFA_NOCTAL, /* Match non-octal char */
				86	NFA_WORD, /* Match word char */
				87	NFA_NWORD, /* Match non-word char */
				88	NFA_HEAD, /* Match head char */
				89	NFA_NHEAD, /* Match non-head char */
				90	NFA_ALPHA, /* Match alpha char */
				91	NFA_NALPHA, /* Match non-alpha char */
				92	NFA_LOWER, /* Match lowercase char */
				93	NFA_NLOWER, /* Match non-lowercase char */
				94	NFA_UPPER, /* Match uppercase char */
				95	NFA_NUPPER, /* Match non-uppercase char */
				96	NFA_FIRST_NL = NFA_ANY + ADD_NL,
				97	NFA_LAST_NL = NFA_NUPPER + ADD_NL,
				98
				99	/* Character classes [:alnum:] etc */
				100	NFA_CLASS_ALNUM,
				101	NFA_CLASS_ALPHA,
				102	NFA_CLASS_BLANK,
				103	NFA_CLASS_CNTRL,
				104	NFA_CLASS_DIGIT,
				105	NFA_CLASS_GRAPH,
				106	NFA_CLASS_LOWER,
				107	NFA_CLASS_PRINT,
				108	NFA_CLASS_PUNCT,
				109	NFA_CLASS_SPACE,
				110	NFA_CLASS_UPPER,
				111	NFA_CLASS_XDIGIT,
				112	NFA_CLASS_TAB,
				113	NFA_CLASS_RETURN,
				114	NFA_CLASS_BACKSPACE,
				115	NFA_CLASS_ESCAPE
				116	};
				117
				118	/* Keep in sync with classchars. */
				119	static int nfa_classcodes[] = {
				120	NFA_ANY, NFA_IDENT, NFA_SIDENT, NFA_KWORD,NFA_SKWORD,
				121	NFA_FNAME, NFA_SFNAME, NFA_PRINT, NFA_SPRINT,
				122	NFA_WHITE, NFA_NWHITE, NFA_DIGIT, NFA_NDIGIT,
				123	NFA_HEX, NFA_NHEX, NFA_OCTAL, NFA_NOCTAL,
				124	NFA_WORD, NFA_NWORD, NFA_HEAD, NFA_NHEAD,
				125	NFA_ALPHA, NFA_NALPHA, NFA_LOWER, NFA_NLOWER,
				126	NFA_UPPER, NFA_NUPPER
				127	};
				128
				129	static char_u e_misplaced[] = N_("E866: (NFA regexp) Misplaced %c");
				130
				131	/*
				132	* NFA errors can be of 3 types:
				133	* *** NFA runtime errors, when something unknown goes wrong. The NFA fails
				134	* silently and revert the to backtracking engine.
				135	* syntax_error = FALSE;
				136	* *** Regexp syntax errors, when the input regexp is not syntactically correct.
				137	* The NFA engine displays an error message, and nothing else happens.
				138	* syntax_error = TRUE
				139	* *** Unsupported features, when the input regexp uses an operator that is not
				140	* implemented in the NFA. The NFA engine fails silently, and reverts to the
				141	* old backtracking engine.
				142	* syntax_error = FALSE
				143	* "The NFA fails" means that "compiling the regexp with the NFA fails":
				144	* nfa_regcomp() returns FAIL.
				145	*/
				146	static int syntax_error = FALSE;
				147
				148	/* NFA regexp \ze operator encountered. */
				149	static int nfa_has_zend = FALSE;
				150
				151	static int post_start; / holds the postfix form of r.e. */
				152	static int *post_end;
				153	static int *post_ptr;
				154
				155	static int nstate; /* Number of states in the NFA. */
				156	static int istate; /* Index in the state vector, used in new_state() */
				157	static int nstate_max; /* Upper bound of estimated number of states. */
				158
				159
				160	static int nfa_regcomp_start __ARGS((char_u*expr, int re_flags));
				161	static int nfa_recognize_char_class __ARGS((char_u start, char_u end, int extra_newl));
				162	static int nfa_emit_equi_class __ARGS((int c, int neg));
				163	static void nfa_inc __ARGS((char_u **p));
				164	static void nfa_dec __ARGS((char_u **p));
				165	static int nfa_regatom __ARGS((void));
				166	static int nfa_regpiece __ARGS((void));
				167	static int nfa_regconcat __ARGS((void));
				168	static int nfa_regbranch __ARGS((void));
				169	static int nfa_reg __ARGS((int paren));
				170	#ifdef DEBUG
				171	static void nfa_set_code __ARGS((int c));
				172	static void nfa_postfix_dump __ARGS((char_u *expr, int retval));
				173	static void nfa_print_state __ARGS((FILE debugf, nfa_state_T state, int ident));
				174	static void nfa_dump __ARGS((nfa_regprog_T *prog));
				175	#endif
				176	static int *re2post __ARGS((void));
				177	static nfa_state_T new_state __ARGS((int c, nfa_state_T out, nfa_state_T *out1));
				178	static nfa_state_T post2nfa __ARGS((int postfix, int *end, int nfa_calc_size));
				179	static int check_char_class __ARGS((int class, int c));
				180	static void st_error __ARGS((int postfix, int end, int *p));
				181	static void nfa_save_listids __ARGS((nfa_state_T start, int list));
				182	static void nfa_restore_listids __ARGS((nfa_state_T start, int list));
				183	static void nfa_set_null_listids __ARGS((nfa_state_T *start));
				184	static void nfa_set_neg_listids __ARGS((nfa_state_T *start));
				185	static long nfa_regtry __ARGS((nfa_state_T *start, colnr_T col));
				186	static long nfa_regexec_both __ARGS((char_u *line, colnr_T col));
				187	static regprog_T nfa_regcomp __ARGS((char_u expr, int re_flags));
				188	static int nfa_regexec __ARGS((regmatch_T rmp, char_u line, colnr_T col));
				189	static long nfa_regexec_multi __ARGS((regmmatch_T rmp, win_T win, buf_T buf, linenr_T lnum, colnr_T col, proftime_T tm));
				190
				191	/* helper functions used when doing re2post() ... regatom() parsing */
				192	#define EMIT(c) do { \
				193	if (post_ptr >= post_end) \
				194	return FAIL; \
				195	*post_ptr++ = c; \
				196	} while (0)
				197
				198	#define EMIT_MBYTE(c) \
				199	len = (*mb_char2bytes)(c, buf); \
				200	EMIT(buf[0]); \
				201	for (i = 1; i < len; i++) \
				202	{ \
				203	EMIT(buf[i]); \
				204	EMIT(NFA_CONCAT); \
				205	} \
				206	EMIT(NFA_MULTIBYTE);
				207
				208	#define EMIT_COMPOSING_UTF(input) \
				209	len = utfc_ptr2len(input); \
				210	EMIT(input[0]); \
				211	for (i = 1; i < len; i++) \
				212	{ \
				213	EMIT(input[i]); \
				214	EMIT(NFA_CONCAT); \
				215	} \
				216	EMIT(NFA_COMPOSING);
				217
				218	/*
				219	* Initialize internal variables before NFA compilation.
				220	* Return OK on success, FAIL otherwise.
				221	*/
				222	static int
				223	nfa_regcomp_start(expr, re_flags)
				224	char_u *expr;
				225	int re_flags; /* see vim_regcomp() */
				226	{
				227	int postfix_size;
				228
				229	nstate = 0;
				230	istate = 0;
				231	/* A reasonable estimation for size */
				232	nstate_max = (STRLEN(expr) + 1) * NFA_POSTFIX_MULTIPLIER;
				233
				234	/* Size for postfix representation of expr */
				235	postfix_size = sizeof(post_start) nstate_max;
				236	post_start = (int *)lalloc(postfix_size, TRUE);
				237	if (post_start == NULL)
				238	return FAIL;
				239	vim_memset(post_start, 0, postfix_size);
				240	post_ptr = post_start;
				241	post_end = post_start + postfix_size;
				242	nfa_has_zend = FALSE;
				243
				244	regcomp_start(expr, re_flags);
				245
				246	return OK;
				247	}
				248
				249	/*
				250	* Search between "start" and "end" and try to recognize a
				251	* character class in expanded form. For example [0-9].
				252	* On success, return the id the character class to be emitted.
				253	* On failure, return 0 (=FAIL)
				254	* Start points to the first char of the range, while end should point
				255	* to the closing brace.
				256	*/
				257	static int
				258	nfa_recognize_char_class(start, end, extra_newl)
				259	char_u *start;
				260	char_u *end;
				261	int extra_newl;
				262	{
				263	int i;
				264	/* Each of these variables takes up a char in "config[]",
				265	* in the order they are here. */
				266	int not = FALSE, af = FALSE, AF = FALSE, az = FALSE, AZ = FALSE,
				267	o7 = FALSE, o9 = FALSE, underscore = FALSE, newl = FALSE;
				268	char_u *p;
				269	#define NCONFIGS 16
				270	int classid[NCONFIGS] = {
				271	NFA_DIGIT, NFA_NDIGIT, NFA_HEX, NFA_NHEX,
				272	NFA_OCTAL, NFA_NOCTAL, NFA_WORD, NFA_NWORD,
				273	NFA_HEAD, NFA_NHEAD, NFA_ALPHA, NFA_NALPHA,
				274	NFA_LOWER, NFA_NLOWER, NFA_UPPER, NFA_NUPPER
				275	};
				276	char_u myconfig[9];
				277	char_u config[NCONFIGS][9] = {
				278	"000000100", /* digit */
				279	"100000100", /* non digit */
				280	"011000100", /* hex-digit */
				281	"111000100", /* non hex-digit */
				282	"000001000", /* octal-digit */
				283	"100001000", /* [^0-7] */
				284	"000110110", /* [0-9A-Za-z_] */
				285	"100110110", /* [^0-9A-Za-z_] */
				286	"000110010", /* head of word */
				287	"100110010", /* not head of word */
				288	"000110000", /* alphabetic char a-z */
				289	"100110000", /* non alphabetic char */
				290	"000100000", /* lowercase letter */
				291	"100100000", /* non lowercase */
				292	"000010000", /* uppercase */
				293	"100010000" /* non uppercase */
				294	};
				295
				296	if (extra_newl == TRUE)
				297	newl = TRUE;
				298
				299	if (*end != ']')
				300	return FAIL;
				301	p = start;
				302	if (*p == '^')
				303	{
				304	not = TRUE;
				305	p ++;
				306	}
				307
				308	while (p < end)
				309	{
				310	if (p + 2 < end && *(p + 1) == '-')
				311	{
				312	switch (*p)
				313	{
				314	case '0':
				315	if (*(p + 2) == '9')
				316	{
				317	o9 = TRUE;
				318	break;
				319	}
				320	else
				321	if (*(p + 2) == '7')
				322	{
				323	o7 = TRUE;
				324	break;
				325	}
				326	case 'a':
				327	if (*(p + 2) == 'z')
				328	{
				329	az = TRUE;
				330	break;
				331	}
				332	else
				333	if (*(p + 2) == 'f')
				334	{
				335	af = TRUE;
				336	break;
				337	}
				338	case 'A':
				339	if (*(p + 2) == 'Z')
				340	{
				341	AZ = TRUE;
				342	break;
				343	}
				344	else
				345	if (*(p + 2) == 'F')
				346	{
				347	AF = TRUE;
				348	break;
				349	}
				350	/* FALLTHROUGH */
				351	default:
				352	return FAIL;
				353	}
				354	p += 3;
				355	}
				356	else if (p + 1 < end && p == '\\' && (p + 1) == 'n')
				357	{
				358	newl = TRUE;
				359	p += 2;
				360	}
				361	else if (*p == '_')
				362	{
				363	underscore = TRUE;
				364	p ++;
				365	}
				366	else if (*p == '\n')
				367	{
				368	newl = TRUE;
				369	p ++;
				370	}
				371	else
				372	return FAIL;
				373	} /* while (p < end) */
				374
				375	if (p != end)
				376	return FAIL;
				377
				378	/* build the config that represents the ranges we gathered */
				379	STRCPY(myconfig, "000000000");
				380	if (not == TRUE)
				381	myconfig[0] = '1';
				382	if (af == TRUE)
				383	myconfig[1] = '1';
				384	if (AF == TRUE)
				385	myconfig[2] = '1';
				386	if (az == TRUE)
				387	myconfig[3] = '1';
				388	if (AZ == TRUE)
				389	myconfig[4] = '1';
				390	if (o7 == TRUE)
				391	myconfig[5] = '1';
				392	if (o9 == TRUE)
				393	myconfig[6] = '1';
				394	if (underscore == TRUE)
				395	myconfig[7] = '1';
				396	if (newl == TRUE)
				397	{
				398	myconfig[8] = '1';
				399	extra_newl = ADD_NL;
				400	}
				401	/* try to recognize character classes */
				402	for (i = 0; i < NCONFIGS; i++)
				403	if (STRNCMP(myconfig, config[i],8) == 0)
				404	return classid[i] + extra_newl;
				405
				406	/* fallthrough => no success so far */
				407	return FAIL;
				408
				409	#undef NCONFIGS
				410	}
				411
				412	/*
				413	* Produce the bytes for equivalence class "c".
				414	* Currently only handles latin1, latin9 and utf-8.
				415	* Emits bytes in postfix notation: 'a,b,NFA_OR,c,NFA_OR' is
				416	* equivalent to 'a OR b OR c'
				417	*
				418	* NOTE! When changing this function, also update reg_equi_class()
				419	*/
				420	static int
				421	nfa_emit_equi_class(c, neg)
				422	int c;
				423	int neg;
				424	{
				425	int first = TRUE;
				426	int glue = neg == TRUE ? NFA_CONCAT : NFA_OR;
				427	#define EMIT2(c) \
				428	EMIT(c); \
				429	if (neg == TRUE) { \
				430	EMIT(NFA_NOT); \
				431	} \
				432	if (first == FALSE) \
				433	EMIT(glue); \
				434	else \
				435	first = FALSE; \
				436
				437	#ifdef FEAT_MBYTE
				438	if (enc_utf8 \|\| STRCMP(p_enc, "latin1") == 0
				439	\|\| STRCMP(p_enc, "iso-8859-15") == 0)
				440	#endif
				441	{
				442	switch (c)
				443	{
				444	case 'A': case '\300': case '\301': case '\302':
				445	case '\303': case '\304': case '\305':
				446	EMIT2('A'); EMIT2('\300'); EMIT2('\301');
				447	EMIT2('\302'); EMIT2('\303'); EMIT2('\304');
				448	EMIT2('\305');
				449	return OK;
				450
				451	case 'C': case '\307':
				452	EMIT2('C'); EMIT2('\307');
				453	return OK;
				454
				455	case 'E': case '\310': case '\311': case '\312': case '\313':
				456	EMIT2('E'); EMIT2('\310'); EMIT2('\311');
				457	EMIT2('\312'); EMIT2('\313');
				458	return OK;
				459
				460	case 'I': case '\314': case '\315': case '\316': case '\317':
				461	EMIT2('I'); EMIT2('\314'); EMIT2('\315');
				462	EMIT2('\316'); EMIT2('\317');
				463	return OK;
				464
				465	case 'N': case '\321':
				466	EMIT2('N'); EMIT2('\321');
				467	return OK;
				468
				469	case 'O': case '\322': case '\323': case '\324': case '\325':
				470	case '\326':
				471	EMIT2('O'); EMIT2('\322'); EMIT2('\323');
				472	EMIT2('\324'); EMIT2('\325'); EMIT2('\326');
				473	return OK;
				474
				475	case 'U': case '\331': case '\332': case '\333': case '\334':
				476	EMIT2('U'); EMIT2('\331'); EMIT2('\332');
				477	EMIT2('\333'); EMIT2('\334');
				478	return OK;
				479
				480	case 'Y': case '\335':
				481	EMIT2('Y'); EMIT2('\335');
				482	return OK;
				483
				484	case 'a': case '\340': case '\341': case '\342':
				485	case '\343': case '\344': case '\345':
				486	EMIT2('a'); EMIT2('\340'); EMIT2('\341');
				487	EMIT2('\342'); EMIT2('\343'); EMIT2('\344');
				488	EMIT2('\345');
				489	return OK;
				490
				491	case 'c': case '\347':
				492	EMIT2('c'); EMIT2('\347');
				493	return OK;
				494
				495	case 'e': case '\350': case '\351': case '\352': case '\353':
				496	EMIT2('e'); EMIT2('\350'); EMIT2('\351');
				497	EMIT2('\352'); EMIT2('\353');
				498	return OK;
				499
				500	case 'i': case '\354': case '\355': case '\356': case '\357':
				501	EMIT2('i'); EMIT2('\354'); EMIT2('\355');
				502	EMIT2('\356'); EMIT2('\357');
				503	return OK;
				504
				505	case 'n': case '\361':
				506	EMIT2('n'); EMIT2('\361');
				507	return OK;
				508
				509	case 'o': case '\362': case '\363': case '\364': case '\365':
				510	case '\366':
				511	EMIT2('o'); EMIT2('\362'); EMIT2('\363');
				512	EMIT2('\364'); EMIT2('\365'); EMIT2('\366');
				513	return OK;
				514
				515	case 'u': case '\371': case '\372': case '\373': case '\374':
				516	EMIT2('u'); EMIT2('\371'); EMIT2('\372');
				517	EMIT2('\373'); EMIT2('\374');
				518	return OK;
				519
				520	case 'y': case '\375': case '\377':
				521	EMIT2('y'); EMIT2('\375'); EMIT2('\377');
				522	return OK;
				523
				524	default:
				525	return FAIL;
				526	}
				527	}
				528
				529	EMIT(c);
				530	return OK;
				531	#undef EMIT2
				532	}
				533
				534	/*
				535	* Code to parse regular expression.
				536	*
				537	* We try to reuse parsing functions in regexp.c to
				538	* minimize surprise and keep the syntax consistent.
				539	*/
				540
				541	/*
				542	* Increments the pointer "p" by one (multi-byte) character.
				543	*/
				544	static void
				545	nfa_inc(p)
				546	char_u **p;
				547	{
				548	#ifdef FEAT_MBYTE
				549	if (has_mbyte)
				550	mb_ptr2char_adv(p);
				551	else
				552	#endif
				553	p = p + 1;
				554	}
				555
				556	/*
				557	* Decrements the pointer "p" by one (multi-byte) character.
				558	*/
				559	static void
				560	nfa_dec(p)
				561	char_u **p;
				562	{
				563	#ifdef FEAT_MBYTE
				564	char_u p2, oldp;
				565
				566	if (has_mbyte)
				567	{
				568	oldp = *p;
				569	/* Try to find the multibyte char that advances to the current
				570	* position. */
				571	do
				572	{
				573	p = p - 1;
				574	p2 = *p;
				575	mb_ptr2char_adv(&p2);
				576	} while (p2 != oldp);
				577	}
				578	#else
				579	p = p - 1;
				580	#endif
				581	}
				582
				583	/*
				584	* Parse the lowest level.
				585	*
				586	* An atom can be one of a long list of items. Many atoms match one character
				587	* in the text. It is often an ordinary character or a character class.
				588	* Braces can be used to make a pattern into an atom. The "\z(\)" construct
				589	* is only for syntax highlighting.
				590	*
				591	* atom ::= ordinary-atom
				592	* or $ pattern $
				593	* or \%( pattern \)
				594	* or \z( pattern \)
				595	*/
				596	static int
				597	nfa_regatom()
				598	{
				599	int c;
				600	int charclass;
				601	int equiclass;
				602	int collclass;
				603	int got_coll_char;
				604	char_u *p;
				605	char_u *endp;
				606	#ifdef FEAT_MBYTE
				607	char_u *old_regparse = regparse;
				608	int clen;
				609	int len;
				610	static char_u buf[30];
				611	int i;
				612	#endif
				613	int extra = 0;
				614	int first;
				615	int emit_range;
				616	int negated;
				617	int result;
				618	int startc = -1;
				619	int endc = -1;
				620	int oldstartc = -1;
				621	int cpo_lit; /* 'cpoptions' contains 'l' flag */
				622	int cpo_bsl; /* 'cpoptions' contains '\' flag */
				623	int glue; /* ID that will "glue" nodes together */
				624
				625	cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
				626	cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
				627
				628	c = getchr();
				629
				630	#ifdef FEAT_MBYTE
				631	/* clen has the length of the current char, without composing chars */
				632	clen = (*mb_char2len)(c);
				633	if (has_mbyte && clen > 1)
				634	goto nfa_do_multibyte;
				635	#endif
				636	switch (c)
				637	{
				638	case Magic('^'):
				639	EMIT(NFA_BOL);
				640	break;
				641
				642	case Magic('$'):
				643	EMIT(NFA_EOL);
				644	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				645	had_eol = TRUE;
				646	#endif
				647	break;
				648
				649	case Magic('<'):
				650	EMIT(NFA_BOW);
				651	break;
				652
				653	case Magic('>'):
				654	EMIT(NFA_EOW);
				655	break;
				656
				657	case Magic('_'):
				658	c = no_Magic(getchr());
				659	if (c == '^') /* "\_^" is start-of-line */
				660	{
				661	EMIT(NFA_BOL);
				662	break;
				663	}
				664	if (c == '$') /* "\_$" is end-of-line */
				665	{
				666	EMIT(NFA_EOL);
				667	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				668	had_eol = TRUE;
				669	#endif
				670	break;
				671	}
				672
				673	extra = ADD_NL;
				674
				675	/* "\_[" is collection plus newline */
				676	if (c == '[')
				677	/* TODO: make this work
				678	* goto collection; */
				679	return FAIL;
				680
				681	/* "\_x" is character class plus newline */
				682	/FALLTHROUGH/
				683
				684	/*
				685	* Character classes.
				686	*/
				687	case Magic('.'):
				688	case Magic('i'):
				689	case Magic('I'):
				690	case Magic('k'):
				691	case Magic('K'):
				692	case Magic('f'):
				693	case Magic('F'):
				694	case Magic('p'):
				695	case Magic('P'):
				696	case Magic('s'):
				697	case Magic('S'):
				698	case Magic('d'):
				699	case Magic('D'):
				700	case Magic('x'):
				701	case Magic('X'):
				702	case Magic('o'):
				703	case Magic('O'):
				704	case Magic('w'):
				705	case Magic('W'):
				706	case Magic('h'):
				707	case Magic('H'):
				708	case Magic('a'):
				709	case Magic('A'):
				710	case Magic('l'):
				711	case Magic('L'):
				712	case Magic('u'):
				713	case Magic('U'):
				714	p = vim_strchr(classchars, no_Magic(c));
				715	if (p == NULL)
				716	{
				717	return FAIL; /* runtime error */
				718	}
				719	#ifdef FEAT_MBYTE
				720	/* When '.' is followed by a composing char ignore the dot, so that
				721	* the composing char is matched here. */
				722	if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
				723	{
				724	c = getchr();
				725	goto nfa_do_multibyte;
				726	}
				727	#endif
				728	EMIT(nfa_classcodes[p - classchars]);
				729	if (extra == ADD_NL)
				730	{
				731	EMIT(NFA_NEWL);
				732	EMIT(NFA_OR);
				733	regflags \|= RF_HASNL;
				734	}
				735	break;
				736
				737	case Magic('n'):
				738	if (reg_string)
				739	/* In a string "\n" matches a newline character. */
				740	EMIT(NL);
				741	else
				742	{
				743	/* In buffer text "\n" matches the end of a line. */
				744	EMIT(NFA_NEWL);
				745	regflags \|= RF_HASNL;
				746	}
				747	break;
				748
				749	case Magic('('):
				750	if (nfa_reg(REG_PAREN) == FAIL)
				751	return FAIL; /* cascaded error */
				752	break;
				753
				754	case NUL:
				755	syntax_error = TRUE;
				756	EMSG_RET_FAIL(_("E865: (NFA) Regexp end encountered prematurely"));
				757
				758	case Magic('\|'):
				759	case Magic('&'):
				760	case Magic(')'):
				761	syntax_error = TRUE;
				762	EMSG2(_(e_misplaced), no_Magic(c));
				763	return FAIL;
				764
				765	case Magic('='):
				766	case Magic('?'):
				767	case Magic('+'):
				768	case Magic('@'):
				769	case Magic('*'):
				770	case Magic('{'):
				771	/* these should follow an atom, not form an atom */
				772	syntax_error = TRUE;
				773	EMSG2(_(e_misplaced), no_Magic(c));
				774	return FAIL;
				775
				776	case Magic('~'): /* previous substitute pattern */
				777	/* Not supported yet */
				778	return FAIL;
				779
				780	case Magic('1'):
				781	case Magic('2'):
				782	case Magic('3'):
				783	case Magic('4'):
				784	case Magic('5'):
				785	case Magic('6'):
				786	case Magic('7'):
				787	case Magic('8'):
				788	case Magic('9'):
				789	/* not supported yet */
				790	return FAIL;
				791
				792	case Magic('z'):
				793	c = no_Magic(getchr());
				794	switch (c)
				795	{
				796	case 's':
				797	EMIT(NFA_ZSTART);
				798	break;
				799	case 'e':
				800	EMIT(NFA_ZEND);
				801	nfa_has_zend = TRUE;
				802	/* TODO: Currently \ze does not work properly. */
				803	return FAIL;
				804	/* break; */
				805	case '1':
				806	case '2':
				807	case '3':
				808	case '4':
				809	case '5':
				810	case '6':
				811	case '7':
				812	case '8':
				813	case '9':
				814	case '(':
				815	/* \z1...\z9 and \z( not yet supported */
				816	return FAIL;
				817	default:
				818	syntax_error = TRUE;
				819	EMSG2(_("E867: (NFA) Unknown operator '\\z%c'"),
				820	no_Magic(c));
				821	return FAIL;
				822	}
				823	break;
				824
				825	case Magic('%'):
				826	c = no_Magic(getchr());
				827	switch (c)
				828	{
				829	/* () without a back reference */
				830	case '(':
				831	if (nfa_reg(REG_NPAREN) == FAIL)
				832	return FAIL;
				833	EMIT(NFA_NOPEN);
				834	break;
				835
				836	case 'd': /* %d123 decimal */
				837	case 'o': /* %o123 octal */
				838	case 'x': /* %xab hex 2 */
				839	case 'u': /* %uabcd hex 4 */
				840	case 'U': /* %U1234abcd hex 8 */
				841	/* Not yet supported */
				842	return FAIL;
				843
				844	c = coll_get_char();
				845	#ifdef FEAT_MBYTE
				846	if ((*mb_char2len)(c) > 1)
				847	{
				848	EMIT_MBYTE(c);
				849	}
				850	else
				851	#endif
				852	EMIT(c);
				853	break;
				854
				855	/* Catch \%^ and \%$ regardless of where they appear in the
				856	* pattern -- regardless of whether or not it makes sense. */
				857	case '^':
				858	EMIT(NFA_BOF);
				859	/* Not yet supported */
				860	return FAIL;
				861	break;
				862
				863	case '$':
				864	EMIT(NFA_EOF);
				865	/* Not yet supported */
				866	return FAIL;
				867	break;
				868
				869	case '#':
				870	/* not supported yet */
				871	return FAIL;
				872	break;
				873
				874	case 'V':
				875	/* not supported yet */
				876	return FAIL;
				877	break;
				878
				879	case '[':
				880	/* \%[abc] not supported yet */
				881	return FAIL;
				882
				883	default:
				884	/* not supported yet */
				885	return FAIL;
				886	}
				887	break;
				888
				889	/* collection: */
				890	case Magic('['):
				891	/*
				892	* Glue is emitted between several atoms from the [].
				893	* It is either NFA_OR, or NFA_CONCAT.
				894	*
				895	* [abc] expands to 'a b NFA_OR c NFA_OR' (in postfix notation)
				896	* [^abc] expands to 'a NFA_NOT b NFA_NOT NFA_CONCAT c NFA_NOT
				897	* NFA_CONCAT NFA_END_NEG_RANGE NFA_CONCAT' (in postfix
				898	* notation)
				899	*
				900	*/
				901
				902
				903	/* Emit negation atoms, if needed.
				904	* The CONCAT below merges the NOT with the previous node. */
				905	#define TRY_NEG() \
				906	if (negated == TRUE) \
				907	{ \
				908	EMIT(NFA_NOT); \
				909	}
				910
				911	/* Emit glue between important nodes : CONCAT or OR. */
				912	#define EMIT_GLUE() \
				913	if (first == FALSE) \
				914	EMIT(glue); \
				915	else \
				916	first = FALSE;
				917
				918	p = regparse;
				919	endp = skip_anyof(p);
				920	if (*endp == ']')
				921	{
				922	/*
				923	* Try to reverse engineer character classes. For example,
				924	* recognize that [0-9] stands for \d and [A-Za-z_] with \h,
				925	* and perform the necessary substitutions in the NFA.
				926	*/
				927	result = nfa_recognize_char_class(regparse, endp,
				928	extra == ADD_NL);
				929	if (result != FAIL)
				930	{
				931	if (result >= NFA_DIGIT && result <= NFA_NUPPER)
				932	EMIT(result);
				933	else /* must be char class + newline */
				934	{
				935	EMIT(result - ADD_NL);
				936	EMIT(NFA_NEWL);
				937	EMIT(NFA_OR);
				938	}
				939	regparse = endp;
				940	nfa_inc(&regparse);
				941	return OK;
				942	}
				943	/*
				944	* Failed to recognize a character class. Use the simple
				945	* version that turns [abc] into 'a' OR 'b' OR 'c'
				946	*/
				947	startc = endc = oldstartc = -1;
				948	first = TRUE; /* Emitting first atom in this sequence? */
				949	negated = FALSE;
				950	glue = NFA_OR;
				951	if (regparse == '^') / negated range */
				952	{
				953	negated = TRUE;
				954	glue = NFA_CONCAT;
				955	nfa_inc(&regparse);
				956	}
				957	if (*regparse == '-')
				958	{
				959	startc = '-';
				960	EMIT(startc);
				961	TRY_NEG();
				962	EMIT_GLUE();
				963	nfa_inc(&regparse);
				964	}
				965	/* Emit the OR branches for each character in the [] */
				966	emit_range = FALSE;
				967	while (regparse < endp)
				968	{
				969	oldstartc = startc;
				970	startc = -1;
				971	got_coll_char = FALSE;
				972	if (*regparse == '[')
				973	{
				974	/* Check for [: :], [= =], [. .] */
				975	equiclass = collclass = 0;
				976	charclass = get_char_class(&regparse);
				977	if (charclass == CLASS_NONE)
				978	{
				979	equiclass = get_equi_class(&regparse);
				980	if (equiclass == 0)
				981	collclass = get_coll_element(&regparse);
				982	}
				983
				984	/* Character class like [:alpha:] */
				985	if (charclass != CLASS_NONE)
				986	{
				987	switch (charclass)
				988	{
				989	case CLASS_ALNUM:
				990	EMIT(NFA_CLASS_ALNUM);
				991	break;
				992	case CLASS_ALPHA:
				993	EMIT(NFA_CLASS_ALPHA);
				994	break;
				995	case CLASS_BLANK:
				996	EMIT(NFA_CLASS_BLANK);
				997	break;
				998	case CLASS_CNTRL:
				999	EMIT(NFA_CLASS_CNTRL);
				1000	break;
				1001	case CLASS_DIGIT:
				1002	EMIT(NFA_CLASS_DIGIT);
				1003	break;
				1004	case CLASS_GRAPH:
				1005	EMIT(NFA_CLASS_GRAPH);
				1006	break;
				1007	case CLASS_LOWER:
				1008	EMIT(NFA_CLASS_LOWER);
				1009	break;
				1010	case CLASS_PRINT:
				1011	EMIT(NFA_CLASS_PRINT);
				1012	break;
				1013	case CLASS_PUNCT:
				1014	EMIT(NFA_CLASS_PUNCT);
				1015	break;
				1016	case CLASS_SPACE:
				1017	EMIT(NFA_CLASS_SPACE);
				1018	break;
				1019	case CLASS_UPPER:
				1020	EMIT(NFA_CLASS_UPPER);
				1021	break;
				1022	case CLASS_XDIGIT:
				1023	EMIT(NFA_CLASS_XDIGIT);
				1024	break;
				1025	case CLASS_TAB:
				1026	EMIT(NFA_CLASS_TAB);
				1027	break;
				1028	case CLASS_RETURN:
				1029	EMIT(NFA_CLASS_RETURN);
				1030	break;
				1031	case CLASS_BACKSPACE:
				1032	EMIT(NFA_CLASS_BACKSPACE);
				1033	break;
				1034	case CLASS_ESCAPE:
				1035	EMIT(NFA_CLASS_ESCAPE);
				1036	break;
				1037	}
				1038	TRY_NEG();
				1039	EMIT_GLUE();
				1040	continue;
				1041	}
				1042	/* Try equivalence class [=a=] and the like */
				1043	if (equiclass != 0)
				1044	{
				1045	result = nfa_emit_equi_class(equiclass, negated);
				1046	if (result == FAIL)
				1047	{
				1048	/* should never happen */
				1049	EMSG_RET_FAIL(_("E868: Error building NFA with equivalence class!"));
				1050	}
				1051	EMIT_GLUE();
				1052	continue;
				1053	}
				1054	/* Try collating class like [. .] */
				1055	if (collclass != 0)
				1056	{
				1057	startc = collclass; /* allow [.a.]-x as a range */
				1058	/* Will emit the proper atom at the end of the
				1059	* while loop. */
				1060	}
				1061	}
				1062	/* Try a range like 'a-x' or '\t-z' */
				1063	if (*regparse == '-')
				1064	{
				1065	emit_range = TRUE;
				1066	startc = oldstartc;
				1067	nfa_inc(&regparse);
				1068	continue; /* reading the end of the range */
				1069	}
				1070
				1071	/* Now handle simple and escaped characters.
				1072	* Only "\]", "\^", "\]" and "\\" are special in Vi. Vim
				1073	* accepts "\t", "\e", etc., but only when the 'l' flag in
				1074	* 'cpoptions' is not included.
				1075	* Posix doesn't recognize backslash at all.
				1076	*/
				1077	if (*regparse == '\\'
				1078	&& !cpo_bsl
				1079	&& regparse + 1 <= endp
				1080	&& (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
				1081	\|\| (!cpo_lit
				1082	&& vim_strchr(REGEXP_ABBR, regparse[1])
				1083	!= NULL)
				1084	)
				1085	)
				1086	{
				1087	nfa_inc(&regparse);
				1088
				1089	if (regparse == 'n' \|\| regparse == 'n')
				1090	startc = reg_string ? NL : NFA_NEWL;
				1091	else
				1092	if (*regparse == 'd'
				1093	\|\| *regparse == 'o'
				1094	\|\| *regparse == 'x'
				1095	\|\| *regparse == 'u'
				1096	\|\| *regparse == 'U'
				1097	)
				1098	{
				1099	/* TODO(RE) This needs more testing */
				1100	startc = coll_get_char();
				1101	got_coll_char = TRUE;
				1102	nfa_dec(&regparse);
				1103	}
				1104	else
				1105	{
				1106	/* \r,\t,\e,\b */
				1107	startc = backslash_trans(*regparse);
				1108	}
				1109	}
				1110
				1111	/* Normal printable char */
				1112	if (startc == -1)
				1113	#ifdef FEAT_MBYTE
				1114	startc = (*mb_ptr2char)(regparse);
				1115	#else
				1116	startc = *regparse;
				1117	#endif
				1118
				1119	/* Previous char was '-', so this char is end of range. */
				1120	if (emit_range)
				1121	{
				1122	endc = startc; startc = oldstartc;
				1123	if (startc > endc)
				1124	EMSG_RET_FAIL(_(e_invrange));
				1125	#ifdef FEAT_MBYTE
				1126	if (has_mbyte && ((*mb_char2len)(startc) > 1
				1127	\|\| (*mb_char2len)(endc) > 1))
				1128	{
				1129	if (endc > startc + 256)
				1130	EMSG_RET_FAIL(_(e_invrange));
				1131	/* Emit the range. "startc" was already emitted, so
				1132	* skip it. */
				1133	for (c = startc + 1; c <= endc; c++)
				1134	{
				1135	if ((*mb_char2len)(c) > 1)
				1136	{
				1137	EMIT_MBYTE(c);
				1138	}
				1139	else
				1140	EMIT(c);
				1141	TRY_NEG();
				1142	EMIT_GLUE();
				1143	}
				1144	emit_range = FALSE;
				1145	}
				1146	else
				1147	#endif
				1148	{
				1149	#ifdef EBCDIC
				1150	int alpha_only = FALSE;
				1151
				1152	/* for alphabetical range skip the gaps
				1153	* 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'. */
				1154	if (isalpha(startc) && isalpha(endc))
				1155	alpha_only = TRUE;
				1156	#endif
				1157	/* Emit the range. "startc" was already emitted, so
				1158	* skip it. */
				1159	for (c = startc + 1; c <= endc; c++)
				1160	#ifdef EBCDIC
				1161	if (!alpha_only \|\| isalpha(startc))
				1162	#endif
				1163	{
				1164	EMIT(c);
				1165	TRY_NEG();
				1166	EMIT_GLUE();
				1167	}
				1168	emit_range = FALSE;
				1169	}
				1170	}
				1171	else
				1172	{
				1173	/*
				1174	* This char (startc) is not part of a range. Just
				1175	* emit it.
				1176	*
				1177	* Normally, simply emit startc. But if we get char
				1178	* code=0 from a collating char, then replace it with
				1179	* 0x0a.
				1180	*
				1181	* This is needed to completely mimic the behaviour of
				1182	* the backtracking engine.
				1183	*/
				1184	if (got_coll_char == TRUE && startc == 0)
				1185	EMIT(0x0a);
				1186	else
				1187	#ifdef FEAT_MBYTE
				1188	if ((*mb_char2len)(startc) > 1)
				1189	{
				1190	EMIT_MBYTE(startc);
				1191	}
				1192	else
				1193	#endif
				1194	EMIT(startc);
				1195	TRY_NEG();
				1196	EMIT_GLUE();
				1197	}
				1198
				1199	nfa_inc(&regparse);
				1200	} /* while (p < endp) */
				1201
				1202	nfa_dec(&regparse);
				1203	if (regparse == '-') / if last, '-' is just a char */
				1204	{
				1205	EMIT('-');
				1206	TRY_NEG();
				1207	EMIT_GLUE();
				1208	}
				1209	nfa_inc(&regparse);
				1210
				1211	if (extra == ADD_NL) /* \_[] also matches \n */
				1212	{
				1213	EMIT(reg_string ? NL : NFA_NEWL);
				1214	TRY_NEG();
				1215	EMIT_GLUE();
				1216	}
				1217
				1218	/* skip the trailing ] */
				1219	regparse = endp;
				1220	nfa_inc(&regparse);
				1221	if (negated == TRUE)
				1222	{
				1223	/* Mark end of negated char range */
				1224	EMIT(NFA_END_NEG_RANGE);
				1225	EMIT(NFA_CONCAT);
				1226	}
				1227	return OK;
				1228	} /* if exists closing ] */
				1229	else if (reg_strict)
				1230	{
				1231	syntax_error = TRUE;
				1232	EMSG_RET_FAIL(_(e_missingbracket));
				1233	}
				1234
				1235	/* FALLTHROUGH */
				1236	default:
				1237	{
				1238	#ifdef FEAT_MBYTE
				1239	int plen;
				1240
				1241	nfa_do_multibyte:
				1242	/* length of current char, with composing chars,
				1243	* from pointer */
				1244	plen = (*mb_ptr2len)(old_regparse);
				1245	if (enc_utf8 && clen != plen)
				1246	{
				1247	/* A composing character is always handled as a
				1248	* separate atom, surrounded by NFA_COMPOSING and
				1249	* NFA_END_COMPOSING. Note that right now we are
				1250	* building the postfix form, not the NFA itself;
				1251	* a composing char could be: a, b, c, NFA_COMPOSING
				1252	* where 'a', 'b', 'c' are chars with codes > 256.
				1253	*/
				1254	EMIT_COMPOSING_UTF(old_regparse);
				1255	regparse = old_regparse + plen;
				1256	}
				1257	else
				1258	/* A multi-byte character is always handled as a
				1259	* separate atom, surrounded by NFA_MULTIBYTE and
				1260	* NFA_END_MULTIBYTE */
				1261	if (plen > 1)
				1262	{
				1263	EMIT_MBYTE(c);
				1264	}
				1265	else
				1266	#endif
				1267	{
				1268	c = no_Magic(c);
				1269	EMIT(c);
				1270	}
				1271	return OK;
				1272	}
				1273	}
				1274
				1275	#undef TRY_NEG
				1276	#undef EMIT_GLUE
				1277
				1278	return OK;
				1279	}
				1280
				1281	/*
				1282	* Parse something followed by possible [*+=].
				1283	*
				1284	* A piece is an atom, possibly followed by a multi, an indication of how many
				1285	* times the atom can be matched. Example: "a*" matches any sequence of "a"
				1286	* characters: "", "a", "aa", etc.
				1287	*
				1288	* piece ::= atom
				1289	* or atom multi
				1290	*/
				1291	static int
				1292	nfa_regpiece()
				1293	{
				1294	int i;
				1295	int op;
				1296	int ret;
				1297	long minval, maxval;
				1298	int greedy = TRUE; /* Braces are prefixed with '-' ? */
				1299	char_u old_regparse, new_regparse;
				1300	int c2;
				1301	int old_post_ptr, my_post_start;
				1302	int old_regnpar;
				1303	int quest;
				1304
				1305	/* Save the current position in the regexp, so that we can use it if
				1306	* <atom>{m,n} is next. */
				1307	old_regparse = regparse;
				1308	/* Save current number of open parenthesis, so we can use it if
				1309	* <atom>{m,n} is next */
				1310	old_regnpar = regnpar;
				1311	/* store current pos in the postfix form, for \{m,n} involving 0s */
				1312	my_post_start = post_ptr;
				1313
				1314	ret = nfa_regatom();
				1315	if (ret == FAIL)
				1316	return FAIL; /* cascaded error */
				1317
				1318	op = peekchr();
				1319	if (re_multi_type(op) == NOT_MULTI)
				1320	return OK;
				1321
				1322	skipchr();
				1323	switch (op)
				1324	{
				1325	case Magic('*'):
				1326	EMIT(NFA_STAR);
				1327	break;
				1328
				1329	case Magic('+'):
				1330	/*
				1331	* Trick: Normally, (a*)\+ would match the whole input "aaa". The
				1332	* first and only submatch would be "aaa". But the backtracking
				1333	* engine interprets the plus as "try matching one more time", and
				1334	* a* matches a second time at the end of the input, the empty
				1335	* string.
				1336	* The submatch will the empty string.
				1337	*
				1338	* In order to be consistent with the old engine, we disable
				1339	* NFA_PLUS, and replace <atom>+ with <atom><atom>*
				1340	*/
				1341	/* EMIT(NFA_PLUS); */
				1342	regnpar = old_regnpar;
				1343	regparse = old_regparse;
				1344	curchr = -1;
				1345	if (nfa_regatom() == FAIL)
				1346	return FAIL;
				1347	EMIT(NFA_STAR);
				1348	EMIT(NFA_CONCAT);
				1349	skipchr(); /* skip the \+ */
				1350	break;
				1351
				1352	case Magic('@'):
				1353	op = no_Magic(getchr());
				1354	switch(op)
				1355	{
				1356	case '=':
				1357	EMIT(NFA_PREV_ATOM_NO_WIDTH);
				1358	break;
				1359	case '!':
				1360	case '<':
				1361	case '>':
				1362	/* Not supported yet */
				1363	return FAIL;
				1364	default:
				1365	syntax_error = TRUE;
				1366	EMSG2(_("E869: (NFA) Unknown operator '\\@%c'"), op);
				1367	return FAIL;
				1368	}
				1369	break;
				1370
				1371	case Magic('?'):
				1372	case Magic('='):
				1373	EMIT(NFA_QUEST);
				1374	break;
				1375
				1376	case Magic('{'):
				1377	/* a{2,5} will expand to 'aaa?a?a?'
				1378	* a{-1,3} will expand to 'aa??a??', where ?? is the nongreedy
				1379	* version of '?'
				1380	* \v(ab){2,3} will expand to '(ab)(ab)(ab)?', where all the
				1381	* parenthesis have the same id
				1382	*/
				1383
				1384	greedy = TRUE;
				1385	c2 = peekchr();
				1386	if (c2 == '-' \|\| c2 == Magic('-'))
				1387	{
				1388	skipchr();
				1389	greedy = FALSE;
				1390	}
				1391	if (!read_limits(&minval, &maxval))
				1392	{
				1393	syntax_error = TRUE;
				1394	EMSG_RET_FAIL(_("E870: (NFA regexp) Error reading repetition limits"));
				1395	}
				1396	/* <atom>{0,inf}, <atom>{0,} and <atom>{} are equivalent to
				1397	* <atom>* */
				1398	if (minval == 0 && maxval == MAX_LIMIT && greedy)
				1399	{
				1400	EMIT(NFA_STAR);
				1401	break;
				1402	}
				1403
				1404	if (maxval > NFA_BRACES_MAXLIMIT)
				1405	{
				1406	/* This would yield a huge automaton and use too much memory.
				1407	* Revert to old engine */
				1408	return FAIL;
				1409	}
				1410
				1411	/* Special case: x{0} or x{-0} */
				1412	if (maxval == 0)
				1413	{
				1414	/* Ignore result of previous call to nfa_regatom() */
				1415	post_ptr = my_post_start;
				1416	/* NFA_SKIP_CHAR has 0-length and works everywhere */
				1417	EMIT(NFA_SKIP_CHAR);
				1418	return OK;
				1419	}
				1420
				1421	/* Ignore previous call to nfa_regatom() */
				1422	post_ptr = my_post_start;
				1423	/* Save pos after the repeated atom and the \{} */
				1424	new_regparse = regparse;
				1425
				1426	new_regparse = regparse;
				1427	quest = (greedy == TRUE? NFA_QUEST : NFA_QUEST_NONGREEDY);
				1428	for (i = 0; i < maxval; i++)
				1429	{
				1430	/* Goto beginning of the repeated atom */
				1431	regparse = old_regparse;
				1432	curchr = -1;
				1433	/* Restore count of parenthesis */
				1434	regnpar = old_regnpar;
				1435	old_post_ptr = post_ptr;
				1436	if (nfa_regatom() == FAIL)
				1437	return FAIL;
				1438	/* after "minval" times, atoms are optional */
				1439	if (i + 1 > minval)
				1440	EMIT(quest);
				1441	if (old_post_ptr != my_post_start)
				1442	EMIT(NFA_CONCAT);
				1443	}
				1444
				1445	/* Go to just after the repeated atom and the \{} */
				1446	regparse = new_regparse;
				1447	curchr = -1;
				1448
				1449	break;
				1450
				1451
				1452	default:
				1453	break;
				1454	} /* end switch */
				1455
				1456	if (re_multi_type(peekchr()) != NOT_MULTI)
				1457	{
				1458	/* Can't have a multi follow a multi. */
				1459	syntax_error = TRUE;
				1460	EMSG_RET_FAIL(_("E871: (NFA regexp) Can't have a multi follow a multi !"));
				1461	}
				1462
				1463	return OK;
				1464	}
				1465
				1466	/*
				1467	* Parse one or more pieces, concatenated. It matches a match for the
				1468	* first piece, followed by a match for the second piece, etc. Example:
				1469	* "f[0-9]b", first matches "f", then a digit and then "b".
				1470	*
				1471	* concat ::= piece
				1472	* or piece piece
				1473	* or piece piece piece
				1474	* etc.
				1475	*/
				1476	static int
				1477	nfa_regconcat()
				1478	{
				1479	int cont = TRUE;
				1480	int first = TRUE;
				1481
				1482	while (cont)
				1483	{
				1484	switch (peekchr())
				1485	{
				1486	case NUL:
				1487	case Magic('\|'):
				1488	case Magic('&'):
				1489	case Magic(')'):
				1490	cont = FALSE;
				1491	break;
				1492
				1493	case Magic('Z'):
				1494	#ifdef FEAT_MBYTE
				1495	regflags \|= RF_ICOMBINE;
				1496	#endif
				1497	skipchr_keepstart();
				1498	break;
				1499	case Magic('c'):
				1500	regflags \|= RF_ICASE;
				1501	skipchr_keepstart();
				1502	break;
				1503	case Magic('C'):
				1504	regflags \|= RF_NOICASE;
				1505	skipchr_keepstart();
				1506	break;
				1507	case Magic('v'):
				1508	reg_magic = MAGIC_ALL;
				1509	skipchr_keepstart();
				1510	curchr = -1;
				1511	break;
				1512	case Magic('m'):
				1513	reg_magic = MAGIC_ON;
				1514	skipchr_keepstart();
				1515	curchr = -1;
				1516	break;
				1517	case Magic('M'):
				1518	reg_magic = MAGIC_OFF;
				1519	skipchr_keepstart();
				1520	curchr = -1;
				1521	break;
				1522	case Magic('V'):
				1523	reg_magic = MAGIC_NONE;
				1524	skipchr_keepstart();
				1525	curchr = -1;
				1526	break;
				1527
				1528	default:
				1529	if (nfa_regpiece() == FAIL)
				1530	return FAIL;
				1531	if (first == FALSE)
				1532	EMIT(NFA_CONCAT);
				1533	else
				1534	first = FALSE;
				1535	break;
				1536	}
				1537	}
				1538
				1539	return OK;
				1540	}
				1541
				1542	/*
				1543	* Parse a branch, one or more concats, separated by "\&". It matches the
				1544	* last concat, but only if all the preceding concats also match at the same
				1545	* position. Examples:
				1546	* "foobeep\&..." matches "foo" in "foobeep".
				1547	* ".Peter\&.Bob" matches in a line containing both "Peter" and "Bob"
				1548	*
				1549	* branch ::= concat
				1550	* or concat \& concat
				1551	* or concat \& concat \& concat
				1552	* etc.
				1553	*/
				1554	static int
				1555	nfa_regbranch()
				1556	{
				1557	int ch;
				1558	int *old_post_ptr;
				1559
				1560	old_post_ptr = post_ptr;
				1561
				1562	/* First branch, possibly the only one */
				1563	if (nfa_regconcat() == FAIL)
				1564	return FAIL;
				1565
				1566	ch = peekchr();
				1567	/* Try next concats */
				1568	while (ch == Magic('&'))
				1569	{
				1570	skipchr();
				1571	EMIT(NFA_NOPEN);
				1572	EMIT(NFA_PREV_ATOM_NO_WIDTH);
				1573	old_post_ptr = post_ptr;
				1574	if (nfa_regconcat() == FAIL)
				1575	return FAIL;
				1576	/* if concat is empty, skip a input char. But do emit a node */
				1577	if (old_post_ptr == post_ptr)
				1578	EMIT(NFA_SKIP_CHAR);
				1579	EMIT(NFA_CONCAT);
				1580	ch = peekchr();
				1581	}
				1582
				1583	/* Even if a branch is empty, emit one node for it */
				1584	if (old_post_ptr == post_ptr)
				1585	EMIT(NFA_SKIP_CHAR);
				1586
				1587	return OK;
				1588	}
				1589
				1590	/*
				1591	* Parse a pattern, one or more branches, separated by "\\|". It matches
				1592	* anything that matches one of the branches. Example: "foo\\|beep" matches
				1593	* "foo" and matches "beep". If more than one branch matches, the first one
				1594	* is used.
				1595	*
				1596	* pattern ::= branch
				1597	* or branch \\| branch
				1598	* or branch \\| branch \\| branch
				1599	* etc.
				1600	*/
				1601	static int
				1602	nfa_reg(paren)
				1603	int paren; /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
				1604	{
				1605	int parno = 0;
				1606
				1607	#ifdef FEAT_SYN_HL
				1608	#endif
				1609	if (paren == REG_PAREN)
				1610	{
				1611	if (regnpar >= NSUBEXP) /* Too many `(' */
				1612	{
				1613	syntax_error = TRUE;
				1614	EMSG_RET_FAIL(_("E872: (NFA regexp) Too many '('"));
				1615	}
				1616	parno = regnpar++;
				1617	}
				1618
				1619	if (nfa_regbranch() == FAIL)
				1620	return FAIL; /* cascaded error */
				1621
				1622	while (peekchr() == Magic('\|'))
				1623	{
				1624	skipchr();
				1625	if (nfa_regbranch() == FAIL)
				1626	return FAIL; /* cascaded error */
				1627	EMIT(NFA_OR);
				1628	}
				1629
				1630	/* Check for proper termination. */
				1631	if (paren != REG_NOPAREN && getchr() != Magic(')'))
				1632	{
				1633	syntax_error = TRUE;
				1634	if (paren == REG_NPAREN)
				1635	EMSG2_RET_FAIL(_(e_unmatchedpp), reg_magic == MAGIC_ALL);
				1636	else
				1637	EMSG2_RET_FAIL(_(e_unmatchedp), reg_magic == MAGIC_ALL);
				1638	}
				1639	else if (paren == REG_NOPAREN && peekchr() != NUL)
				1640	{
				1641	syntax_error = TRUE;
				1642	if (peekchr() == Magic(')'))
				1643	EMSG2_RET_FAIL(_(e_unmatchedpar), reg_magic == MAGIC_ALL);
				1644	else
				1645	EMSG_RET_FAIL(_("E873: (NFA regexp) proper termination error"));
				1646	}
				1647	/*
				1648	* Here we set the flag allowing back references to this set of
				1649	* parentheses.
				1650	*/
				1651	if (paren == REG_PAREN)
				1652	{
				1653	had_endbrace[parno] = TRUE; /* have seen the close paren */
				1654	EMIT(NFA_MOPEN + parno);
				1655	}
				1656
				1657	return OK;
				1658	}
				1659
				1660	typedef struct
				1661	{
				1662	char_u *start[NSUBEXP];
				1663	char_u *end[NSUBEXP];
				1664	lpos_T startpos[NSUBEXP];
				1665	lpos_T endpos[NSUBEXP];
				1666	} regsub_T;
				1667
				1668	static int nfa_regmatch __ARGS((nfa_state_T start, regsub_T submatch, regsub_T *m));
				1669
				1670	#ifdef DEBUG
				1671	static char_u code[50];
				1672
				1673	static void
				1674	nfa_set_code(c)
				1675	int c;
				1676	{
				1677	int addnl = FALSE;
				1678
				1679	if (c >= NFA_FIRST_NL && c <= NFA_LAST_NL)
				1680	{
				1681	addnl = TRUE;
				1682	c -= ADD_NL;
				1683	}
				1684
				1685	STRCPY(code, "");
				1686	switch (c)
				1687	{
				1688	case NFA_MATCH: STRCPY(code, "NFA_MATCH "); break;
				1689	case NFA_SPLIT: STRCPY(code, "NFA_SPLIT "); break;
				1690	case NFA_CONCAT: STRCPY(code, "NFA_CONCAT "); break;
				1691	case NFA_NEWL: STRCPY(code, "NFA_NEWL "); break;
				1692	case NFA_ZSTART: STRCPY(code, "NFA_ZSTART"); break;
				1693	case NFA_ZEND: STRCPY(code, "NFA_ZEND"); break;
				1694
				1695	case NFA_PREV_ATOM_NO_WIDTH:
				1696	STRCPY(code, "NFA_PREV_ATOM_NO_WIDTH"); break;
				1697	case NFA_NOPEN: STRCPY(code, "NFA_MOPEN_INVISIBLE"); break;
				1698	case NFA_NCLOSE: STRCPY(code, "NFA_MCLOSE_INVISIBLE"); break;
				1699	case NFA_START_INVISIBLE: STRCPY(code, "NFA_START_INVISIBLE"); break;
				1700	case NFA_END_INVISIBLE: STRCPY(code, "NFA_END_INVISIBLE"); break;
				1701
				1702	case NFA_MULTIBYTE: STRCPY(code, "NFA_MULTIBYTE"); break;
				1703	case NFA_END_MULTIBYTE: STRCPY(code, "NFA_END_MULTIBYTE"); break;
				1704
				1705	case NFA_COMPOSING: STRCPY(code, "NFA_COMPOSING"); break;
				1706	case NFA_END_COMPOSING: STRCPY(code, "NFA_END_COMPOSING"); break;
				1707
				1708	case NFA_MOPEN + 0:
				1709	case NFA_MOPEN + 1:
				1710	case NFA_MOPEN + 2:
				1711	case NFA_MOPEN + 3:
				1712	case NFA_MOPEN + 4:
				1713	case NFA_MOPEN + 5:
				1714	case NFA_MOPEN + 6:
				1715	case NFA_MOPEN + 7:
				1716	case NFA_MOPEN + 8:
				1717	case NFA_MOPEN + 9:
				1718	STRCPY(code, "NFA_MOPEN(x)");
				1719	code[10] = c - NFA_MOPEN + '0';
				1720	break;
				1721	case NFA_MCLOSE + 0:
				1722	case NFA_MCLOSE + 1:
				1723	case NFA_MCLOSE + 2:
				1724	case NFA_MCLOSE + 3:
				1725	case NFA_MCLOSE + 4:
				1726	case NFA_MCLOSE + 5:
				1727	case NFA_MCLOSE + 6:
				1728	case NFA_MCLOSE + 7:
				1729	case NFA_MCLOSE + 8:
				1730	case NFA_MCLOSE + 9:
				1731	STRCPY(code, "NFA_MCLOSE(x)");
				1732	code[11] = c - NFA_MCLOSE + '0';
				1733	break;
				1734	case NFA_EOL: STRCPY(code, "NFA_EOL "); break;
				1735	case NFA_BOL: STRCPY(code, "NFA_BOL "); break;
				1736	case NFA_EOW: STRCPY(code, "NFA_EOW "); break;
				1737	case NFA_BOW: STRCPY(code, "NFA_BOW "); break;
				1738	case NFA_STAR: STRCPY(code, "NFA_STAR "); break;
				1739	case NFA_PLUS: STRCPY(code, "NFA_PLUS "); break;
				1740	case NFA_NOT: STRCPY(code, "NFA_NOT "); break;
				1741	case NFA_SKIP_CHAR: STRCPY(code, "NFA_SKIP_CHAR"); break;
				1742	case NFA_OR: STRCPY(code, "NFA_OR"); break;
				1743	case NFA_QUEST: STRCPY(code, "NFA_QUEST"); break;
				1744	case NFA_QUEST_NONGREEDY: STRCPY(code, "NFA_QUEST_NON_GREEDY"); break;
				1745	case NFA_END_NEG_RANGE: STRCPY(code, "NFA_END_NEG_RANGE"); break;
				1746	case NFA_CLASS_ALNUM: STRCPY(code, "NFA_CLASS_ALNUM"); break;
				1747	case NFA_CLASS_ALPHA: STRCPY(code, "NFA_CLASS_ALPHA"); break;
				1748	case NFA_CLASS_BLANK: STRCPY(code, "NFA_CLASS_BLANK"); break;
				1749	case NFA_CLASS_CNTRL: STRCPY(code, "NFA_CLASS_CNTRL"); break;
				1750	case NFA_CLASS_DIGIT: STRCPY(code, "NFA_CLASS_DIGIT"); break;
				1751	case NFA_CLASS_GRAPH: STRCPY(code, "NFA_CLASS_GRAPH"); break;
				1752	case NFA_CLASS_LOWER: STRCPY(code, "NFA_CLASS_LOWER"); break;
				1753	case NFA_CLASS_PRINT: STRCPY(code, "NFA_CLASS_PRINT"); break;
				1754	case NFA_CLASS_PUNCT: STRCPY(code, "NFA_CLASS_PUNCT"); break;
				1755	case NFA_CLASS_SPACE: STRCPY(code, "NFA_CLASS_SPACE"); break;
				1756	case NFA_CLASS_UPPER: STRCPY(code, "NFA_CLASS_UPPER"); break;
				1757	case NFA_CLASS_XDIGIT: STRCPY(code, "NFA_CLASS_XDIGIT"); break;
				1758	case NFA_CLASS_TAB: STRCPY(code, "NFA_CLASS_TAB"); break;
				1759	case NFA_CLASS_RETURN: STRCPY(code, "NFA_CLASS_RETURN"); break;
				1760	case NFA_CLASS_BACKSPACE: STRCPY(code, "NFA_CLASS_BACKSPACE"); break;
				1761	case NFA_CLASS_ESCAPE: STRCPY(code, "NFA_CLASS_ESCAPE"); break;
				1762
				1763	case NFA_ANY: STRCPY(code, "NFA_ANY"); break;
				1764	case NFA_IDENT: STRCPY(code, "NFA_IDENT"); break;
				1765	case NFA_SIDENT:STRCPY(code, "NFA_SIDENT"); break;
				1766	case NFA_KWORD: STRCPY(code, "NFA_KWORD"); break;
				1767	case NFA_SKWORD:STRCPY(code, "NFA_SKWORD"); break;
				1768	case NFA_FNAME: STRCPY(code, "NFA_FNAME"); break;
				1769	case NFA_SFNAME:STRCPY(code, "NFA_SFNAME"); break;
				1770	case NFA_PRINT: STRCPY(code, "NFA_PRINT"); break;
				1771	case NFA_SPRINT:STRCPY(code, "NFA_SPRINT"); break;
				1772	case NFA_WHITE: STRCPY(code, "NFA_WHITE"); break;
				1773	case NFA_NWHITE:STRCPY(code, "NFA_NWHITE"); break;
				1774	case NFA_DIGIT: STRCPY(code, "NFA_DIGIT"); break;
				1775	case NFA_NDIGIT:STRCPY(code, "NFA_NDIGIT"); break;
				1776	case NFA_HEX: STRCPY(code, "NFA_HEX"); break;
				1777	case NFA_NHEX: STRCPY(code, "NFA_NHEX"); break;
				1778	case NFA_OCTAL: STRCPY(code, "NFA_OCTAL"); break;
				1779	case NFA_NOCTAL:STRCPY(code, "NFA_NOCTAL"); break;
				1780	case NFA_WORD: STRCPY(code, "NFA_WORD"); break;
				1781	case NFA_NWORD: STRCPY(code, "NFA_NWORD"); break;
				1782	case NFA_HEAD: STRCPY(code, "NFA_HEAD"); break;
				1783	case NFA_NHEAD: STRCPY(code, "NFA_NHEAD"); break;
				1784	case NFA_ALPHA: STRCPY(code, "NFA_ALPHA"); break;
				1785	case NFA_NALPHA:STRCPY(code, "NFA_NALPHA"); break;
				1786	case NFA_LOWER: STRCPY(code, "NFA_LOWER"); break;
				1787	case NFA_NLOWER:STRCPY(code, "NFA_NLOWER"); break;
				1788	case NFA_UPPER: STRCPY(code, "NFA_UPPER"); break;
				1789	case NFA_NUPPER:STRCPY(code, "NFA_NUPPER"); break;
				1790
				1791	default:
				1792	STRCPY(code, "CHAR(x)");
				1793	code[5] = c;
				1794	}
				1795
				1796	if (addnl == TRUE)
				1797	STRCAT(code, " + NEWLINE ");
				1798
				1799	}
				1800
				1801	#ifdef ENABLE_LOG
				1802	static FILE *log_fd;
				1803
				1804	/*
				1805	* Print the postfix notation of the current regexp.
				1806	*/
				1807	static void
				1808	nfa_postfix_dump(expr, retval)
				1809	char_u *expr;
				1810	int retval;
				1811	{
				1812	int *p;
				1813	FILE *f;
				1814
				1815	f = fopen("LOG.log", "a");
				1816	if (f != NULL)
				1817	{
				1818	fprintf(f, "\n-------------------------\n");
				1819	if (retval == FAIL)
				1820	fprintf(f, ">>> NFA engine failed ... \n");
				1821	else if (retval == OK)
				1822	fprintf(f, ">>> NFA engine succeeded !\n");
				1823	fprintf(f, "Regexp: \"%s\"\nPostfix notation (char): \"", expr);
				1824	for (p=post_start; *p; p++)
				1825	{
				1826	nfa_set_code(*p);
				1827	fprintf(f, "%s, ", code);
				1828	}
				1829	fprintf(f, "\"\nPostfix notation (int): ");
				1830	for (p=post_start; *p; p++)
				1831	fprintf(f, "%d ", *p);
				1832	fprintf(f, "\n\n");
				1833	fclose(f);
				1834	}
				1835	}
				1836
				1837	/*
				1838	* Print the NFA starting with a root node "state".
				1839	*/
				1840	static void
				1841	nfa_print_state(debugf, state, ident)
				1842	FILE *debugf;
				1843	nfa_state_T *state;
				1844	int ident;
				1845	{
				1846	int i;
				1847
				1848	if (state == NULL)
				1849	return;
				1850
				1851	fprintf(debugf, "(%2d)", abs(state->id));
				1852	for (i = 0; i < ident; i++)
				1853	fprintf(debugf, "%c", ' ');
				1854
				1855	nfa_set_code(state->c);
				1856	fprintf(debugf, "%s %s (%d) (id=%d)\n",
				1857	state->negated ? "NOT" : "", code, state->c, abs(state->id));
				1858	if (state->id < 0)
				1859	return;
				1860
				1861	state->id = abs(state->id) * -1;
				1862	nfa_print_state(debugf, state->out, ident + 4);
				1863	nfa_print_state(debugf, state->out1, ident + 4);
				1864	}
				1865
				1866	/*
				1867	* Print the NFA state machine.
				1868	*/
				1869	static void
				1870	nfa_dump(prog)
				1871	nfa_regprog_T *prog;
				1872	{
				1873	FILE *debugf = fopen("LOG.log", "a");
				1874
				1875	if (debugf != NULL)
				1876	{
				1877	nfa_print_state(debugf, prog->start, 0);
				1878	fclose(debugf);
				1879	}
				1880	}
				1881	#endif /* ENABLE_LOG */
				1882	#endif /* DEBUG */
				1883
				1884	/*
				1885	* Parse r.e. @expr and convert it into postfix form.
				1886	* Return the postfix string on success, NULL otherwise.
				1887	*/
				1888	static int *
				1889	re2post()
				1890	{
				1891	if (nfa_reg(REG_NOPAREN) == FAIL)
				1892	return NULL;
				1893	EMIT(NFA_MOPEN);
				1894	return post_start;
				1895	}
				1896
				1897	/* NB. Some of the code below is inspired by Russ's. */
				1898
				1899	/*
				1900	* Represents an NFA state plus zero or one or two arrows exiting.
				1901	* if c == MATCH, no arrows out; matching state.
				1902	* If c == SPLIT, unlabeled arrows to out and out1 (if != NULL).
				1903	* If c < 256, labeled arrow with character c to out.
				1904	*/
				1905
				1906	static nfa_state_T state_ptr; / points to nfa_prog->state */
				1907
				1908	/*
				1909	* Allocate and initialize nfa_state_T.
				1910	*/
				1911	static nfa_state_T *
				1912	new_state(c, out, out1)
				1913	int c;
				1914	nfa_state_T *out;
				1915	nfa_state_T *out1;
				1916	{
				1917	nfa_state_T *s;
				1918
				1919	if (istate >= nstate)
				1920	return NULL;
				1921
				1922	s = &state_ptr[istate++];
				1923
				1924	s->c = c;
				1925	s->out = out;
				1926	s->out1 = out1;
				1927
				1928	s->id = istate;
				1929	s->lastlist = 0;
				1930	s->lastthread = NULL;
				1931	s->visits = 0;
				1932	s->negated = FALSE;
				1933
				1934	return s;
				1935	}
				1936
				1937	/*
				1938	* A partially built NFA without the matching state filled in.
				1939	* Frag_T.start points at the start state.
				1940	* Frag_T.out is a list of places that need to be set to the
				1941	* next state for this fragment.
				1942	*/
				1943	typedef union Ptrlist Ptrlist;
				1944	struct Frag
				1945	{
				1946	nfa_state_T *start;
				1947	Ptrlist *out;
				1948	};
				1949	typedef struct Frag Frag_T;
				1950
				1951	static Frag_T frag __ARGS((nfa_state_T start, Ptrlist out));
				1952	static Ptrlist list1 __ARGS((nfa_state_T *outp));
				1953	static void patch __ARGS((Ptrlist l, nfa_state_T s));
				1954	static Ptrlist append __ARGS((Ptrlist l1, Ptrlist *l2));
				1955	static void st_push __ARGS((Frag_T s, Frag_T *p, Frag_T stack_end));
				1956	static Frag_T st_pop __ARGS((Frag_T *p, Frag_T stack));
				1957
				1958	/*
				1959	* Initialize Frag_T struct.
				1960	*/
				1961	static Frag_T
				1962	frag(start, out)
				1963	nfa_state_T *start;
				1964	Ptrlist *out;
				1965	{
				1966	Frag_T n = { start, out };
				1967	return n;
				1968	}
				1969
				1970	/*
				1971	* Since the out pointers in the list are always
				1972	* uninitialized, we use the pointers themselves
				1973	* as storage for the Ptrlists.
				1974	*/
				1975	union Ptrlist
				1976	{
				1977	Ptrlist *next;
				1978	nfa_state_T *s;
				1979	};
				1980
				1981	/*
				1982	* Create singleton list containing just outp.
				1983	*/
				1984	static Ptrlist *
				1985	list1(outp)
				1986	nfa_state_T **outp;
				1987	{
				1988	Ptrlist *l;
				1989
				1990	l = (Ptrlist *)outp;
				1991	l->next = NULL;
				1992	return l;
				1993	}
				1994
				1995	/*
				1996	* Patch the list of states at out to point to start.
				1997	*/
				1998	static void
				1999	patch(l, s)
				2000	Ptrlist *l;
				2001	nfa_state_T *s;
				2002	{
				2003	Ptrlist *next;
				2004
				2005	for (; l; l = next)
				2006	{
				2007	next = l->next;
				2008	l->s = s;
				2009	}
				2010	}
				2011
				2012
				2013	/*
				2014	* Join the two lists l1 and l2, returning the combination.
				2015	*/
				2016	static Ptrlist *
				2017	append(l1, l2)
				2018	Ptrlist *l1;
				2019	Ptrlist *l2;
				2020	{
				2021	Ptrlist *oldl1;
				2022
				2023	oldl1 = l1;
				2024	while (l1->next)
				2025	l1 = l1->next;
				2026	l1->next = l2;
				2027	return oldl1;
				2028	}
				2029
				2030	/*
				2031	* Stack used for transforming postfix form into NFA.
				2032	*/
				2033	static Frag_T empty;
				2034
				2035	static void
				2036	st_error(postfix, end, p)
				2037	int *postfix;
				2038	int *end;
				2039	int *p;
				2040	{
				2041	FILE *df;
				2042	int *p2;
				2043
				2044	df = fopen("stack.err", "a");
				2045	if (df)
				2046	{
				2047	fprintf(df, "Error popping the stack!\n");
				2048	#ifdef DEBUG
				2049	fprintf(df, "Current regexp is \"%s\"\n", nfa_regengine.expr);
				2050	#endif
				2051	fprintf(df, "Postfix form is: ");
				2052	#ifdef DEBUG
				2053	for (p2 = postfix; p2 < end; p2++)
				2054	{
				2055	nfa_set_code(*p2);
				2056	fprintf(df, "%s, ", code);
				2057	}
				2058	nfa_set_code(*p);
				2059	fprintf(df, "\nCurrent position is: ");
				2060	for (p2 = postfix; p2 <= p; p2 ++)
				2061	{
				2062	nfa_set_code(*p2);
				2063	fprintf(df, "%s, ", code);
				2064	}
				2065	#else
				2066	for (p2 = postfix; p2 < end; p2++)
				2067	{
				2068	fprintf(df, "%d, ", *p2);
				2069	}
				2070	fprintf(df, "\nCurrent position is: ");
				2071	for (p2 = postfix; p2 <= p; p2 ++)
				2072	{
				2073	fprintf(df, "%d, ", *p2);
				2074	}
				2075	#endif
				2076	fprintf(df, "\n--------------------------\n");
				2077	fclose(df);
				2078	}
				2079	EMSG(_("E874: (NFA) Could not pop the stack !"));
				2080	}
				2081
				2082	/*
				2083	* Push an item onto the stack.
				2084	*/
				2085	static void
				2086	st_push(s, p, stack_end)
				2087	Frag_T s;
				2088	Frag_T **p;
				2089	Frag_T *stack_end;
				2090	{
				2091	Frag_T stackp = p;
				2092
				2093	if (stackp >= stack_end)
				2094	return;
				2095	*stackp = s;
				2096	p = p + 1;
				2097	}
				2098
				2099	/*
				2100	* Pop an item from the stack.
				2101	*/
				2102	static Frag_T
				2103	st_pop(p, stack)
				2104	Frag_T **p;
				2105	Frag_T *stack;
				2106	{
				2107	Frag_T *stackp;
				2108
				2109	p = p - 1;
				2110	stackp = *p;
				2111	if (stackp < stack)
				2112	return empty;
				2113	return **p;
				2114	}
				2115
				2116	/*
				2117	* Convert a postfix form into its equivalent NFA.
				2118	* Return the NFA start state on success, NULL otherwise.
				2119	*/
				2120	static nfa_state_T *
				2121	post2nfa(postfix, end, nfa_calc_size)
				2122	int *postfix;
				2123	int *end;
				2124	int nfa_calc_size;
				2125	{
				2126	int *p;
				2127	int mopen;
				2128	int mclose;
				2129	Frag_T *stack = NULL;
				2130	Frag_T *stackp = NULL;
				2131	Frag_T *stack_end = NULL;
				2132	Frag_T e1;
				2133	Frag_T e2;
				2134	Frag_T e;
				2135	nfa_state_T *s;
				2136	nfa_state_T *s1;
				2137	nfa_state_T *matchstate;
				2138
				2139	if (postfix == NULL)
				2140	return NULL;
				2141
				2142	#define PUSH(s) st_push ((s), &stackp, stack_end)
				2143	#define POP() st_pop(&stackp, stack); \
				2144	if (stackp < stack) \
				2145	{ \
				2146	st_error(postfix, end, p); \
				2147	return NULL; \
				2148	}
				2149
				2150	if (nfa_calc_size == FALSE)
				2151	{
				2152	/* Allocate space for the stack. Max states on the stack : nstate */
				2153	stack = (Frag_T ) lalloc((nstate + 1)sizeof(Frag_T), TRUE);
				2154	stackp = stack;
				2155	stack_end = stack + NFA_STACK_SIZE;
				2156	}
				2157
				2158	for (p = postfix; p < end; ++p)
				2159	{
				2160	switch (*p)
				2161	{
				2162	case NFA_CONCAT:
				2163	/* Catenation.
				2164	* Pay attention: this operator does not exist
				2165	* in the r.e. itself (it is implicit, really).
				2166	* It is added when r.e. is translated to postfix
				2167	* form in re2post().
				2168	*
				2169	* No new state added here. */
				2170	if (nfa_calc_size == TRUE)
				2171	{
				2172	nstate += 0;
				2173	break;
				2174	}
				2175	e2 = POP();
				2176	e1 = POP();
				2177	patch(e1.out, e2.start);
				2178	PUSH(frag(e1.start, e2.out));
				2179	break;
				2180
				2181	case NFA_NOT:
				2182	/* Negation of a character */
				2183	if (nfa_calc_size == TRUE)
				2184	{
				2185	nstate += 0;
				2186	break;
				2187	}
				2188	e1 = POP();
				2189	e1.start->negated = TRUE;
				2190	if (e1.start->c == NFA_MULTIBYTE \|\| e1.start->c == NFA_COMPOSING)
				2191	e1.start->out1->negated = TRUE;
				2192	PUSH(e1);
				2193	break;
				2194
				2195	case NFA_OR:
				2196	/* Alternation */
				2197	if (nfa_calc_size == TRUE)
				2198	{
				2199	nstate ++;
				2200	break;
				2201	}
				2202	e2 = POP();
				2203	e1 = POP();
				2204	s = new_state(NFA_SPLIT, e1.start, e2.start);
				2205	if (s == NULL)
				2206	return NULL;
				2207	PUSH(frag(s, append(e1.out, e2.out)));
				2208	break;
				2209
				2210	case NFA_STAR:
				2211	/* Zero or more */
				2212	if (nfa_calc_size == TRUE)
				2213	{
				2214	nstate ++;
				2215	break;
				2216	}
				2217	e = POP();
				2218	s = new_state(NFA_SPLIT, e.start, NULL);
				2219	if (s == NULL)
				2220	return NULL;
				2221	patch(e.out, s);
				2222	PUSH(frag(s, list1(&s->out1)));
				2223	break;
				2224
				2225	case NFA_QUEST:
				2226	/* one or zero atoms=> greedy match */
				2227	if (nfa_calc_size == TRUE)
				2228	{
				2229	nstate ++;
				2230	break;
				2231	}
				2232	e = POP();
				2233	s = new_state(NFA_SPLIT, e.start, NULL);
				2234	if (s == NULL)
				2235	return NULL;
				2236	PUSH(frag(s, append(e.out, list1(&s->out1))));
				2237	break;
				2238
				2239	case NFA_QUEST_NONGREEDY:
				2240	/* zero or one atoms => non-greedy match */
				2241	if (nfa_calc_size == TRUE)
				2242	{
				2243	nstate ++;
				2244	break;
				2245	}
				2246	e = POP();
				2247	s = new_state(NFA_SPLIT, NULL, e.start);
				2248	if (s == NULL)
				2249	return NULL;
				2250	PUSH(frag(s, append(e.out, list1(&s->out))));
				2251	break;
				2252
				2253	case NFA_PLUS:
				2254	/* One or more */
				2255	if (nfa_calc_size == TRUE)
				2256	{
				2257	nstate ++;
				2258	break;
				2259	}
				2260	e = POP();
				2261	s = new_state(NFA_SPLIT, e.start, NULL);
				2262	if (s == NULL)
				2263	return NULL;
				2264	patch(e.out, s);
				2265	PUSH(frag(e.start, list1(&s->out1)));
				2266	break;
				2267
				2268	case NFA_SKIP_CHAR:
				2269	/* Symbol of 0-length, Used in a repetition
				2270	* with max/min count of 0 */
				2271	if (nfa_calc_size == TRUE)
				2272	{
				2273	nstate ++;
				2274	break;
				2275	}
				2276	s = new_state(NFA_SKIP_CHAR, NULL, NULL);
				2277	if (s == NULL)
				2278	return NULL;
				2279	PUSH(frag(s, list1(&s->out)));
				2280	break;
				2281
				2282	case NFA_PREV_ATOM_NO_WIDTH:
				2283	/* The \@= operator: match the preceding atom with 0 width.
				2284	* Surrounds the preceding atom with START_INVISIBLE and
				2285	* END_INVISIBLE, similarly to MOPEN.
				2286	*/
				2287	/* TODO: Maybe this drops the speed? */
				2288	return NULL;
				2289
				2290	if (nfa_calc_size == TRUE)
				2291	{
				2292	nstate += 2;
				2293	break;
				2294	}
				2295	e = POP();
				2296	s1 = new_state(NFA_END_INVISIBLE, NULL, NULL);
				2297	if (s1 == NULL)
				2298	return NULL;
				2299	patch(e.out, s1);
				2300
				2301	s = new_state(NFA_START_INVISIBLE, e.start, s1);
				2302	if (s == NULL)
				2303	return NULL;
				2304	PUSH(frag(s, list1(&s1->out)));
				2305	break;
				2306
				2307	case NFA_MOPEN + 0: /* Submatch */
				2308	case NFA_MOPEN + 1:
				2309	case NFA_MOPEN + 2:
				2310	case NFA_MOPEN + 3:
				2311	case NFA_MOPEN + 4:
				2312	case NFA_MOPEN + 5:
				2313	case NFA_MOPEN + 6:
				2314	case NFA_MOPEN + 7:
				2315	case NFA_MOPEN + 8:
				2316	case NFA_MOPEN + 9:
				2317	case NFA_NOPEN: /* \%( "Invisible Submatch" */
				2318	case NFA_MULTIBYTE: /* mbyte char */
				2319	case NFA_COMPOSING: /* composing char */
				2320	if (nfa_calc_size == TRUE)
				2321	{
				2322	nstate += 2;
				2323	break;
				2324	}
				2325
				2326	mopen = *p;
				2327	switch (*p)
				2328	{
				2329	case NFA_NOPEN:
				2330	mclose = NFA_NCLOSE;
				2331	break;
				2332	case NFA_MULTIBYTE:
				2333	mclose = NFA_END_MULTIBYTE;
				2334	break;
				2335	case NFA_COMPOSING:
				2336	mclose = NFA_END_COMPOSING;
				2337	break;
				2338	default:
				2339	/* NFA_MOPEN(0) ... NFA_MOPEN(9) */
				2340	mclose = *p + NSUBEXP;
				2341	break;
				2342	}
				2343
				2344	/* Allow "NFA_MOPEN" as a valid postfix representation for
				2345	* the empty regexp "". In this case, the NFA will be
				2346	* NFA_MOPEN -> NFA_MCLOSE. Note that this also allows
				2347	* empty groups of parenthesis, and empty mbyte chars */
				2348	if (stackp == stack)
				2349	{
				2350	s = new_state(mopen, NULL, NULL);
				2351	if (s == NULL)
				2352	return NULL;
				2353	s1 = new_state(mclose, NULL, NULL);
				2354	if (s1 == NULL)
				2355	return NULL;
				2356	patch(list1(&s->out), s1);
				2357	PUSH(frag(s, list1(&s1->out)));
				2358	break;
				2359	}
				2360
				2361	/* At least one node was emitted before NFA_MOPEN, so
				2362	* at least one node will be between NFA_MOPEN and NFA_MCLOSE */
				2363	e = POP();
				2364	s = new_state(mopen, e.start, NULL); /* `(' */
				2365	if (s == NULL)
				2366	return NULL;
				2367
				2368	s1 = new_state(mclose, NULL, NULL); /* `)' */
				2369	if (s1 == NULL)
				2370	return NULL;
				2371	patch(e.out, s1);
				2372
				2373	if (mopen == NFA_MULTIBYTE \|\| mopen == NFA_COMPOSING)
				2374	/* MULTIBYTE->out1 = END_MULTIBYTE
				2375	* COMPOSING->out1 = END_COMPOSING */
				2376	patch(list1(&s->out1), s1);
				2377
				2378	PUSH(frag(s, list1(&s1->out)));
				2379	break;
				2380
				2381	case NFA_ZSTART:
				2382	case NFA_ZEND:
				2383	default:
				2384	/* Operands */
				2385	if (nfa_calc_size == TRUE)
				2386	{
				2387	nstate ++;
				2388	break;
				2389	}
				2390	s = new_state(*p, NULL, NULL);
				2391	if (s == NULL)
				2392	return NULL;
				2393	PUSH(frag(s, list1(&s->out)));
				2394	break;
				2395
				2396	} /* switch(p) /
				2397
				2398	} /* for(p = postfix; p; ++p) /
				2399
				2400	if (nfa_calc_size == TRUE)
				2401	{
				2402	nstate ++;
				2403	return NULL; /* Return value when counting size is ignored anyway */
				2404	}
				2405
				2406	e = POP();
				2407	if (stackp != stack)
				2408	EMSG_RET_NULL(_("E875: (NFA regexp) (While converting from postfix to NFA), too many states left on stack"));
				2409
				2410	if (istate >= nstate)
				2411	EMSG_RET_NULL(_("E876: (NFA regexp) Not enough space to store the whole NFA "));
				2412
				2413	vim_free(stack);
				2414
				2415	matchstate = &state_ptr[istate++]; /* the match state */
				2416	matchstate->c = NFA_MATCH;
				2417	matchstate->out = matchstate->out1 = NULL;
				2418
				2419	patch(e.out, matchstate);
				2420	return e.start;
				2421
				2422	#undef POP1
				2423	#undef PUSH1
				2424	#undef POP2
				2425	#undef PUSH2
				2426	#undef POP
				2427	#undef PUSH
				2428	}
				2429
				2430	/****************************************************************
				2431	* NFA execution code.
				2432	****************************************************************/
				2433
				2434	/* thread_T contains runtime information of a NFA state */
				2435	struct thread
				2436	{
				2437	nfa_state_T *state;
				2438	regsub_T sub; /* submatch info */
				2439	};
				2440
				2441	typedef struct
				2442	{
				2443	thread_T *t;
				2444	int n;
				2445	} List;
				2446
				2447	static void addstate __ARGS((List l, nfa_state_T state, regsub_T m, int off, int lid, int match));
				2448
				2449	static void
				2450	addstate(l, state, m, off, lid, match)
				2451	List l; / runtime state list */
				2452	nfa_state_T state; / state to update */
				2453	regsub_T m; / pointers to subexpressions */
				2454	int off;
				2455	int lid;
				2456	int match; / found match? */
				2457	{
				2458	regsub_T save;
				2459	int subidx = 0;
				2460
				2461	if (l == NULL \|\| state == NULL)
				2462	return;
				2463
				2464	switch (state->c)
				2465	{
				2466	case NFA_SPLIT:
				2467	case NFA_NOT:
				2468	case NFA_NOPEN:
				2469	case NFA_NCLOSE:
				2470	case NFA_MCLOSE:
				2471	case NFA_MCLOSE + 1:
				2472	case NFA_MCLOSE + 2:
				2473	case NFA_MCLOSE + 3:
				2474	case NFA_MCLOSE + 4:
				2475	case NFA_MCLOSE + 5:
				2476	case NFA_MCLOSE + 6:
				2477	case NFA_MCLOSE + 7:
				2478	case NFA_MCLOSE + 8:
				2479	case NFA_MCLOSE + 9:
				2480	/* Do not remember these nodes in list "thislist" or "nextlist" */
				2481	break;
				2482
				2483	default:
				2484	if (state->lastlist == lid)
				2485	{
				2486	if (++state->visits > 2)
				2487	return;
				2488	}
				2489	else
				2490	{
				2491	/* add the state to the list */
				2492	state->lastlist = lid;
				2493	state->lastthread = &l->t[l->n++];
				2494	state->lastthread->state = state;
				2495	state->lastthread->sub = *m;
				2496	}
				2497	}
				2498
				2499	#ifdef ENABLE_LOG
				2500	nfa_set_code(state->c);
				2501	fprintf(log_fd, "> Adding state %d to list. Character %s, code %d\n",
				2502	abs(state->id), code, state->c);
				2503	#endif
				2504	switch (state->c)
				2505	{
				2506	case NFA_MATCH:
				2507	*match = TRUE;
				2508	break;
				2509
				2510	case NFA_SPLIT:
				2511	addstate(l, state->out, m, off, lid, match);
				2512	addstate(l, state->out1, m, off, lid, match);
				2513	break;
				2514
				2515	case NFA_SKIP_CHAR:
				2516	addstate(l, state->out, m, off, lid, match);
				2517	break;
				2518
				2519	#if 0
				2520	case NFA_END_NEG_RANGE:
				2521	/* Nothing to handle here. nfa_regmatch() will take care of it */
				2522	break;
				2523
				2524	case NFA_NOT:
				2525	EMSG(_("E999: (NFA regexp internal error) Should not process NOT node !"));
				2526	#ifdef ENABLE_LOG
				2527	fprintf(f, "\n\n>>> E999: Added state NFA_NOT to a list ... Something went wrong ! Why wasn't it processed already? \n\n");
				2528	#endif
				2529	break;
				2530
				2531	case NFA_COMPOSING:
				2532	/* nfa_regmatch() will match all the bytes of this composing char. */
				2533	break;
				2534
				2535	case NFA_MULTIBYTE:
				2536	/* nfa_regmatch() will match all the bytes of this multibyte char. */
				2537	break;
				2538	#endif
				2539
				2540	case NFA_END_MULTIBYTE:
				2541	/* Successfully matched this mbyte char */
				2542	addstate(l, state->out, m, off, lid, match);
				2543	break;
				2544
				2545	case NFA_NOPEN:
				2546	case NFA_NCLOSE:
				2547	addstate(l, state->out, m, off, lid, match);
				2548	break;
				2549
				2550	/* If this state is reached, then a recursive call of nfa_regmatch()
				2551	* succeeded. the next call saves the found submatches in the
				2552	* first state after the "invisible" branch. */
				2553	#if 0
				2554	case NFA_END_INVISIBLE:
				2555	break;
				2556	#endif
				2557
				2558	case NFA_MOPEN + 0:
				2559	case NFA_MOPEN + 1:
				2560	case NFA_MOPEN + 2:
				2561	case NFA_MOPEN + 3:
				2562	case NFA_MOPEN + 4:
				2563	case NFA_MOPEN + 5:
				2564	case NFA_MOPEN + 6:
				2565	case NFA_MOPEN + 7:
				2566	case NFA_MOPEN + 8:
				2567	case NFA_MOPEN + 9:
				2568	case NFA_ZSTART:
				2569	subidx = state->c - NFA_MOPEN;
				2570	if (state->c == NFA_ZSTART)
				2571	subidx = 0;
				2572
				2573	if (REG_MULTI)
				2574	{
				2575	save.startpos[subidx] = m->startpos[subidx];
				2576	save.endpos[subidx] = m->endpos[subidx];
				2577	m->startpos[subidx].lnum = reglnum;
				2578	m->startpos[subidx].col = reginput - regline + off;
				2579	}
				2580	else
				2581	{
				2582	save.start[subidx] = m->start[subidx];
				2583	save.end[subidx] = m->end[subidx];
				2584	m->start[subidx] = reginput + off;
				2585	}
				2586
				2587	addstate(l, state->out, m, off, lid, match);
				2588
				2589	if (REG_MULTI)
				2590	{
				2591	m->startpos[subidx] = save.startpos[subidx];
				2592	m->endpos[subidx] = save.endpos[subidx];
				2593	}
				2594	else
				2595	{
				2596	m->start[subidx] = save.start[subidx];
				2597	m->end[subidx] = save.end[subidx];
				2598	}
				2599	break;
				2600
				2601	case NFA_MCLOSE + 0:
				2602	if (nfa_has_zend == TRUE)
				2603	{
				2604	addstate(l, state->out, m, off, lid, match);
				2605	break;
				2606	}
				2607	case NFA_MCLOSE + 1:
				2608	case NFA_MCLOSE + 2:
				2609	case NFA_MCLOSE + 3:
				2610	case NFA_MCLOSE + 4:
				2611	case NFA_MCLOSE + 5:
				2612	case NFA_MCLOSE + 6:
				2613	case NFA_MCLOSE + 7:
				2614	case NFA_MCLOSE + 8:
				2615	case NFA_MCLOSE + 9:
				2616	case NFA_ZEND:
				2617	subidx = state->c - NFA_MCLOSE;
				2618	if (state->c == NFA_ZEND)
				2619	subidx = 0;
				2620
				2621	if (REG_MULTI)
				2622	{
				2623	save.startpos[subidx] = m->startpos[subidx];
				2624	save.endpos[subidx] = m->endpos[subidx];
				2625	m->endpos[subidx].lnum = reglnum;
				2626	m->endpos[subidx].col = reginput - regline + off;
				2627	}
				2628	else
				2629	{
				2630	save.start[subidx] = m->start[subidx];
				2631	save.end[subidx] = m->end[subidx];
				2632	m->end[subidx] = reginput + off;
				2633	}
				2634
				2635	addstate(l, state->out, m, off, lid, match);
				2636
				2637	if (REG_MULTI)
				2638	{
				2639	m->startpos[subidx] = save.startpos[subidx];
				2640	m->endpos[subidx] = save.endpos[subidx];
				2641	}
				2642	else
				2643	{
				2644	m->start[subidx] = save.start[subidx];
				2645	m->end[subidx] = save.end[subidx];
				2646	}
				2647	break;
				2648	}
				2649	}
				2650
				2651	/*
				2652	* Check character class "class" against current character c.
				2653	*/
				2654	static int
				2655	check_char_class(class, c)
				2656	int class;
				2657	int c;
				2658	{
				2659	switch (class)
				2660	{
				2661	case NFA_CLASS_ALNUM:
				2662	if (isalnum(c))
				2663	return OK;
				2664	break;
				2665	case NFA_CLASS_ALPHA:
				2666	if (isalpha(c))
				2667	return OK;
				2668	break;
				2669	case NFA_CLASS_BLANK:
				2670	if (c == ' ' \|\| c == '\t')
				2671	return OK;
				2672	break;
				2673	case NFA_CLASS_CNTRL:
				2674	if (iscntrl(c))
				2675	return OK;
				2676	break;
				2677	case NFA_CLASS_DIGIT:
				2678	if (VIM_ISDIGIT(c))
				2679	return OK;
				2680	break;
				2681	case NFA_CLASS_GRAPH:
				2682	if (isgraph(c))
				2683	return OK;
				2684	break;
				2685	case NFA_CLASS_LOWER:
				2686	if (MB_ISLOWER(c))
				2687	return OK;
				2688	break;
				2689	case NFA_CLASS_PRINT:
				2690	if (vim_isprintc(c))
				2691	return OK;
				2692	break;
				2693	case NFA_CLASS_PUNCT:
				2694	if (ispunct(c))
				2695	return OK;
				2696	break;
				2697	case NFA_CLASS_SPACE:
				2698	if ((c >=9 && c <= 13) \|\| (c == ' '))
				2699	return OK;
				2700	break;
				2701	case NFA_CLASS_UPPER:
				2702	if (MB_ISUPPER(c))
				2703	return OK;
				2704	break;
				2705	case NFA_CLASS_XDIGIT:
				2706	if (vim_isxdigit(c))
				2707	return OK;
				2708	break;
				2709	case NFA_CLASS_TAB:
				2710	if (c == '\t')
				2711	return OK;
				2712	break;
				2713	case NFA_CLASS_RETURN:
				2714	if (c == '\r')
				2715	return OK;
				2716	break;
				2717	case NFA_CLASS_BACKSPACE:
				2718	if (c == '\b')
				2719	return OK;
				2720	break;
				2721	case NFA_CLASS_ESCAPE:
				2722	if (c == '\033')
				2723	return OK;
				2724	break;
				2725
				2726	default:
				2727	/* should not be here :P */
				2728	EMSG_RET_FAIL(_("E877: (NFA regexp) Invalid character class "));
				2729	}
				2730	return FAIL;
				2731	}
				2732
				2733	/*
				2734	* Set all NFA nodes' list ID equal to -1.
				2735	*/
				2736	static void
				2737	nfa_set_neg_listids(start)
				2738	nfa_state_T *start;
				2739	{
				2740	if (start == NULL)
				2741	return;
				2742	if (start->lastlist >= 0)
				2743	{
				2744	start->lastlist = -1;
				2745	nfa_set_neg_listids(start->out);
				2746	nfa_set_neg_listids(start->out1);
				2747	}
				2748	}
				2749
				2750	/*
				2751	* Set all NFA nodes' list ID equal to 0.
				2752	*/
				2753	static void
				2754	nfa_set_null_listids(start)
				2755	nfa_state_T *start;
				2756	{
				2757	if (start == NULL)
				2758	return;
				2759	if (start->lastlist == -1)
				2760	{
				2761	start->lastlist = 0;
				2762	nfa_set_null_listids(start->out);
				2763	nfa_set_null_listids(start->out1);
				2764	}
				2765	}
				2766
				2767	/*
				2768	* Save list IDs for all NFA states in "list".
				2769	*/
				2770	static void
				2771	nfa_save_listids(start, list)
				2772	nfa_state_T *start;
				2773	int *list;
				2774	{
				2775	if (start == NULL)
				2776	return;
				2777	if (start->lastlist != -1)
				2778	{
				2779	list[abs(start->id)] = start->lastlist;
				2780	start->lastlist = -1;
				2781	nfa_save_listids(start->out, list);
				2782	nfa_save_listids(start->out1, list);
				2783	}
				2784	}
				2785
				2786	/*
				2787	* Restore list IDs from "list" to all NFA states.
				2788	*/
				2789	static void
				2790	nfa_restore_listids(start, list)
				2791	nfa_state_T *start;
				2792	int *list;
				2793	{
				2794	if (start == NULL)
				2795	return;
				2796	if (start->lastlist == -1)
				2797	{
				2798	start->lastlist = list[abs(start->id)];
				2799	nfa_restore_listids(start->out, list);
				2800	nfa_restore_listids(start->out1, list);
				2801	}
				2802	}
				2803
				2804	/*
				2805	* Main matching routine.
				2806	*
				2807	* Run NFA to determine whether it matches reginput.
				2808	*
				2809	* Return TRUE if there is a match, FALSE otherwise.
				2810	* Note: Caller must ensure that: start != NULL.
				2811	*/
				2812	static int
				2813	nfa_regmatch(start, submatch, m)
				2814	nfa_state_T *start;
				2815	regsub_T *submatch;
				2816	regsub_T *m;
				2817	{
				2818	int c = -1;
				2819	int n;
				2820	int i = 0;
				2821	int result;
				2822	int size = 0;
				2823	int match = FALSE;
				2824	int flag = 0;
				2825	int old_reglnum = -1;
				2826	int reginput_updated = FALSE;
				2827	thread_T *t;
				2828	char_u *cc;
				2829	char_u *old_reginput = NULL;
				2830	char_u *old_regline = NULL;
				2831	nfa_state_T *sta;
				2832	nfa_state_T *end;
				2833	List list[3];
				2834	List *listtbl[2][2];
				2835	List *ll;
				2836	int listid = 1;
				2837	int endnode = 0;
				2838	List *thislist;
				2839	List *nextlist;
				2840	List *neglist;
				2841	int *listids = NULL;
				2842	int j = 0;
				2843	int len = 0;
				2844	#ifdef DEBUG
				2845	FILE *debug = fopen("list.log", "a");
				2846
				2847	if (debug == NULL)
				2848	{
				2849	EMSG(_("(NFA) COULD NOT OPEN list.log !"));
				2850	return FALSE;
				2851	}
				2852	#endif
				2853
				2854	/* Allocate memory for the lists of nodes */
				2855	size = (nstate + 1) * sizeof(thread_T);
				2856	list[0].t = (thread_T *)lalloc(size, TRUE);
				2857	list[1].t = (thread_T *)lalloc(size, TRUE);
				2858	list[2].t = (thread_T *)lalloc(size, TRUE);
				2859	if (list[0].t == NULL \|\| list[1].t == NULL \|\| list[2].t == NULL)
				2860	goto theend;
				2861	vim_memset(list[0].t, 0, size);
				2862	vim_memset(list[1].t, 0, size);
				2863	vim_memset(list[2].t, 0, size);
				2864
				2865	#ifdef ENABLE_LOG
				2866	log_fd = fopen(LOG_NAME, "a");
				2867	if (log_fd != NULL)
				2868	{
				2869	fprintf(log_fd, "**********************************\n");
				2870	nfa_set_code(start->c);
				2871	fprintf(log_fd, " RUNNING nfa_regmatch() starting with state %d, code %s\n",
				2872	abs(start->id), code);
				2873	fprintf(log_fd, "**********************************\n");
				2874	}
				2875	else
				2876	{
				2877	EMSG(_("Could not open temporary log file for writing, displaying on stderr ... "));
				2878	log_fd = stderr;
				2879	}
				2880	#endif
				2881
				2882	thislist = &list[0];
				2883	thislist->n = 0;
				2884	nextlist = &list[1];
				2885	nextlist->n = 0;
				2886	neglist = &list[2];
				2887	neglist->n = 0;
				2888	#ifdef ENABLE_LOG
				2889	fprintf(log_fd, "(---) STARTSTATE\n");
				2890	#endif
				2891	addstate(thislist, start, m, 0, listid, &match);
				2892
				2893	/* There are two cases when the NFA advances: 1. input char matches the
				2894	* NFA node and 2. input char does not match the NFA node, but the next
				2895	* node is NFA_NOT. The following macro calls addstate() according to
				2896	* these rules. It is used A LOT, so use the "listtbl" table for speed */
				2897	listtbl[0][0] = NULL;
				2898	listtbl[0][1] = neglist;
				2899	listtbl[1][0] = nextlist;
				2900	listtbl[1][1] = NULL;
				2901	#define ADD_POS_NEG_STATE(node) \
				2902	ll = listtbl[result ? 1 : 0][node->negated]; \
				2903	if (ll != NULL) \
				2904	addstate(ll, node->out , &t->sub, n, listid + 1, &match);
				2905
				2906
				2907	/*
				2908	* Run for each character.
				2909	*/
				2910	do {
				2911	again:
				2912	#ifdef FEAT_MBYTE
				2913	if (has_mbyte)
				2914	{
				2915	c = (*mb_ptr2char)(reginput);
				2916	n = (*mb_ptr2len)(reginput);
				2917	}
				2918	else
				2919	#endif
				2920	{
				2921	c = *reginput;
				2922	n = 1;
				2923	}
				2924	if (c == NUL)
				2925	n = 0;
				2926	cc = (char_u *)&c;
				2927
				2928	/* swap lists */
				2929	thislist = &list[flag];
				2930	nextlist = &list[flag ^= 1];
				2931	nextlist->n = 0; /* `clear' nextlist */
				2932	listtbl[1][0] = nextlist;
				2933	++listid;
				2934
				2935	#ifdef ENABLE_LOG
				2936	fprintf(log_fd, "------------------------------------------\n");
				2937	fprintf(log_fd, ">>> Reginput is \"%s\"\n", reginput);
				2938	fprintf(log_fd, ">>> Advanced one character ... Current char is %c (code %d) \n", c, (int)c);
				2939	fprintf(log_fd, ">>> Thislist has %d states available: ", thislist->n);
				2940	for (i = 0; i< thislist->n; i++)
				2941	fprintf(log_fd, "%d ", abs(thislist->t[i].state->id));
				2942	fprintf(log_fd, "\n");
				2943	#endif
				2944
				2945	#ifdef DEBUG
				2946	fprintf(debug, "\n-------------------\n");
				2947	#endif
				2948
				2949	/* compute nextlist */
				2950	for (i = 0; i < thislist->n \|\| neglist->n > 0; ++i)
				2951	{
				2952	if (neglist->n > 0)
				2953	{
				2954	t = &neglist->t[0];
				2955	neglist->n --;
				2956	i--;
				2957	}
				2958	else
				2959	t = &thislist->t[i];
				2960
				2961	#ifdef DEBUG
				2962	nfa_set_code(t->state->c);
				2963	fprintf(debug, "%s, ", code);
				2964	#endif
				2965	#ifdef ENABLE_LOG
				2966	nfa_set_code(t->state->c);
				2967	fprintf(log_fd, "(%d) %s, code %d ... \n", abs(t->state->id),
				2968	code, (int)t->state->c);
				2969	#endif
				2970
				2971	/*
				2972	* Handle the possible codes of the current state.
				2973	* The most important is NFA_MATCH.
				2974	*/
				2975	switch (t->state->c)
				2976	{
				2977	case NFA_MATCH:
				2978	match = TRUE;
				2979	*submatch = t->sub;
				2980	#ifdef ENABLE_LOG
				2981	for (j = 0; j < 4; j++)
				2982	if (REG_MULTI)
				2983	fprintf(log_fd, "\n *** group %d, start: c=%d, l=%d, end: c=%d, l=%d",
				2984	j,
				2985	t->sub.startpos[j].col,
				2986	(int)t->sub.startpos[j].lnum,
				2987	t->sub.endpos[j].col,
				2988	(int)t->sub.endpos[j].lnum);
				2989	else
				2990	fprintf(log_fd, "\n *** group %d, start: \"%s\", end: \"%s\"",
				2991	j,
				2992	(char *)t->sub.start[j],
				2993	(char *)t->sub.end[j]);
				2994	fprintf(log_fd, "\n");
				2995	#endif
				2996	goto nextchar; /* found the left-most longest match */
				2997
				2998	case NFA_END_INVISIBLE:
				2999	/* This is only encountered after a NFA_START_INVISIBLE node.
				3000	* They surround a zero-width group, used with "\@=" and "\&".
				3001	* If we got here, it means that the current "invisible" group
				3002	* finished successfully, so return control to the parent
				3003	* nfa_regmatch(). Submatches are stored in *m, and used in
				3004	* the parent call. */
				3005	if (start->c == NFA_MOPEN + 0)
				3006	addstate(thislist, t->state->out, &t->sub, 0, listid,
				3007	&match);
				3008	else
				3009	{
				3010	*m = t->sub;
				3011	match = TRUE;
				3012	}
				3013	break;
				3014
				3015	case NFA_START_INVISIBLE:
				3016	/* Save global variables, and call nfa_regmatch() to check if
				3017	* the current concat matches at this position. The concat
				3018	* ends with the node NFA_END_INVISIBLE */
				3019	old_reginput = reginput;
				3020	old_regline = regline;
				3021	old_reglnum = reglnum;
				3022	if (listids == NULL)
				3023	{
				3024	listids = (int ) lalloc(sizeof(int) nstate, TRUE);
				3025	if (listids == NULL)
				3026	{
				3027	EMSG(_("E878: (NFA) Could not allocate memory for branch traversal!"));
				3028	return 0;
				3029	}
				3030	}
				3031	#ifdef ENABLE_LOG
				3032	if (log_fd != stderr)
				3033	fclose(log_fd);
				3034	log_fd = NULL;
				3035	#endif
				3036	/* Have to clear the listid field of the NFA nodes, so that
				3037	* nfa_regmatch() and addstate() can run properly after
				3038	* recursion. */
				3039	nfa_save_listids(start, listids);
				3040	nfa_set_null_listids(start);
				3041	result = nfa_regmatch(t->state->out, submatch, m);
				3042	nfa_set_neg_listids(start);
				3043	nfa_restore_listids(start, listids);
				3044
				3045	#ifdef ENABLE_LOG
				3046	log_fd = fopen(LOG_NAME, "a");
				3047	if (log_fd != NULL)
				3048	{
				3049	fprintf(log_fd, "****************************\n");
				3050	fprintf(log_fd, "FINISHED RUNNING nfa_regmatch() recursively\n");
				3051	fprintf(log_fd, "MATCH = %s\n", result == TRUE ? "OK" : "FALSE");
				3052	fprintf(log_fd, "****************************\n");
				3053	}
				3054	else
				3055	{
				3056	EMSG(_("Could not open temporary log file for writing, displaying on stderr ... "));
				3057	log_fd = stderr;
				3058	}
				3059	#endif
				3060	if (result == TRUE)
				3061	{
				3062	/* Restore position in input text */
				3063	reginput = old_reginput;
				3064	regline = old_regline;
				3065	reglnum = old_reglnum;
				3066	/* Copy submatch info from the recursive call */
				3067	if (REG_MULTI)
				3068	for (j = 1; j < NSUBEXP; j++)
				3069	{
				3070	t->sub.startpos[j] = m->startpos[j];
				3071	t->sub.endpos[j] = m->endpos[j];
				3072	}
				3073	else
				3074	for (j = 1; j < NSUBEXP; j++)
				3075	{
				3076	t->sub.start[j] = m->start[j];
				3077	t->sub.end[j] = m->end[j];
				3078	}
				3079	/* t->state->out1 is the corresponding END_INVISIBLE node */
				3080	addstate(thislist, t->state->out1->out, &t->sub, 0, listid,
				3081	&match);
				3082	}
				3083	else
				3084	{
				3085	/* continue with next input char */
				3086	reginput = old_reginput;
				3087	}
				3088	break;
				3089
				3090	case NFA_BOL:
				3091	if (reginput == regline)
				3092	addstate(thislist, t->state->out, &t->sub, 0, listid,
				3093	&match);
				3094	break;
				3095
				3096	case NFA_EOL:
				3097	if (c == NUL)
				3098	addstate(thislist, t->state->out, &t->sub, 0, listid,
				3099	&match);
				3100	break;
				3101
				3102	case NFA_BOW:
				3103	{
				3104	int bow = TRUE;
				3105
				3106	if (c == NUL)
				3107	bow = FALSE;
				3108	#ifdef FEAT_MBYTE
				3109	else if (has_mbyte)
				3110	{
				3111	int this_class;
				3112
				3113	/* Get class of current and previous char (if it exists). */
				3114	this_class = mb_get_class(reginput);
				3115	if (this_class <= 1)
				3116	bow = FALSE;
				3117	else if (reg_prev_class() == this_class)
				3118	bow = FALSE;
				3119	}
				3120	#endif
				3121	else if (!vim_iswordc(c)
				3122	\|\| (reginput > regline && vim_iswordc(reginput[-1])))
				3123	bow = FALSE;
				3124	if (bow)
				3125	addstate(thislist, t->state->out, &t->sub, 0, listid,
				3126	&match);
				3127	break;
				3128	}
				3129
				3130	case NFA_EOW:
				3131	{
				3132	int eow = TRUE;
				3133
				3134	if (reginput == regline)
				3135	eow = FALSE;
				3136	#ifdef FEAT_MBYTE
				3137	else if (has_mbyte)
				3138	{
				3139	int this_class, prev_class;
				3140
				3141	/* Get class of current and previous char (if it exists). */
				3142	this_class = mb_get_class(reginput);
				3143	prev_class = reg_prev_class();
				3144	if (this_class == prev_class
				3145	\|\| prev_class == 0 \|\| prev_class == 1)
				3146	eow = FALSE;
				3147	}
				3148	#endif
				3149	else if (!vim_iswordc(reginput[-1])
				3150	\|\| (reginput[0] != NUL && vim_iswordc(c)))
				3151	eow = FALSE;
				3152	if (eow)
				3153	addstate(thislist, t->state->out, &t->sub, 0, listid,
				3154	&match);
				3155	break;
				3156	}
				3157
				3158	case NFA_MULTIBYTE:
				3159	case NFA_COMPOSING:
				3160	switch (t->state->c)
				3161	{
				3162	case NFA_MULTIBYTE: endnode = NFA_END_MULTIBYTE; break;
				3163	case NFA_COMPOSING: endnode = NFA_END_COMPOSING; break;
				3164	default: endnode = 0;
				3165	}
				3166
				3167	result = OK;
				3168	sta = t->state->out;
				3169	len = 1;
				3170	while (sta->c != endnode && len <= n)
				3171	{
				3172	if (reginput[len-1] != sta->c)
				3173	{
				3174	result = OK - 1;
				3175	break;
				3176	}
				3177	len++;
				3178	sta = sta->out;
				3179	}
				3180
				3181	/* if input char length doesn't match regexp char length */
				3182	if (len -1 < n \|\| sta->c != endnode)
				3183	result = OK - 1;
				3184	end = t->state->out1; /* NFA_END_MULTIBYTE or
				3185	NFA_END_COMPOSING */
				3186	/* If \Z was present, then ignore composing characters */
				3187	if (regflags & RF_ICOMBINE)
				3188	result = 1 ^ sta->negated;
				3189	ADD_POS_NEG_STATE(end);
				3190	break;
				3191
				3192	case NFA_NEWL:
				3193	if (!reg_line_lbr && REG_MULTI
				3194	&& c == NUL && reglnum <= reg_maxline)
				3195	{
				3196	if (reginput_updated == FALSE)
				3197	{
				3198	reg_nextline();
				3199	reginput_updated = TRUE;
				3200	}
				3201	addstate(nextlist, t->state->out, &t->sub, n, listid + 1,
				3202	&match);
				3203	}
				3204	break;
				3205
				3206	case NFA_CLASS_ALNUM:
				3207	case NFA_CLASS_ALPHA:
				3208	case NFA_CLASS_BLANK:
				3209	case NFA_CLASS_CNTRL:
				3210	case NFA_CLASS_DIGIT:
				3211	case NFA_CLASS_GRAPH:
				3212	case NFA_CLASS_LOWER:
				3213	case NFA_CLASS_PRINT:
				3214	case NFA_CLASS_PUNCT:
				3215	case NFA_CLASS_SPACE:
				3216	case NFA_CLASS_UPPER:
				3217	case NFA_CLASS_XDIGIT:
				3218	case NFA_CLASS_TAB:
				3219	case NFA_CLASS_RETURN:
				3220	case NFA_CLASS_BACKSPACE:
				3221	case NFA_CLASS_ESCAPE:
				3222	result = check_char_class(t->state->c, c);
				3223	ADD_POS_NEG_STATE(t->state);
				3224	break;
				3225
				3226	case NFA_END_NEG_RANGE:
				3227	/* This follows a series of negated nodes, like:
				3228	* CHAR(x), NFA_NOT, CHAR(y), NFA_NOT etc. */
				3229	if (c > 0)
				3230	addstate(nextlist, t->state->out, &t->sub, n, listid + 1,
				3231	&match);
				3232	break;
				3233
				3234	case NFA_ANY:
				3235	/* Any printable char, not just any char. '\0' (end of input)
				3236	* must not match */
				3237	if (c > 0)
				3238	addstate(nextlist, t->state->out, &t->sub, n, listid + 1,
				3239	&match);
				3240	break;
				3241
				3242	/*
				3243	* Character classes like \a for alpha, \d for digit etc.
				3244	*/
				3245	case NFA_IDENT: /* \i */
				3246	result = vim_isIDc(c);
				3247	ADD_POS_NEG_STATE(t->state);
				3248	break;
				3249
				3250	case NFA_SIDENT: /* \I */
				3251	result = !VIM_ISDIGIT(c) && vim_isIDc(c);
				3252	ADD_POS_NEG_STATE(t->state);
				3253	break;
				3254
				3255	case NFA_KWORD: /* \k */
				3256	result = vim_iswordp(cc);
				3257	ADD_POS_NEG_STATE(t->state);
				3258	break;
				3259
				3260	case NFA_SKWORD: /* \K */
				3261	result = !VIM_ISDIGIT(c) && vim_iswordp(cc);
				3262	ADD_POS_NEG_STATE(t->state);
				3263	break;
				3264
				3265	case NFA_FNAME: /* \f */
				3266	result = vim_isfilec(c);
				3267	ADD_POS_NEG_STATE(t->state);
				3268	break;
				3269
				3270	case NFA_SFNAME: /* \F */
				3271	result = !VIM_ISDIGIT(c) && vim_isfilec(c);
				3272	ADD_POS_NEG_STATE(t->state);
				3273	break;
				3274
				3275	case NFA_PRINT: /* \p */
				3276	result = ptr2cells(cc) == 1;
				3277	ADD_POS_NEG_STATE(t->state);
				3278	break;
				3279
				3280	case NFA_SPRINT: /* \P */
				3281	result = !VIM_ISDIGIT(c) && ptr2cells(cc) == 1;
				3282	ADD_POS_NEG_STATE(t->state);
				3283	break;
				3284
				3285	case NFA_WHITE: /* \s */
				3286	result = vim_iswhite(c);
				3287	ADD_POS_NEG_STATE(t->state);
				3288	break;
				3289
				3290	case NFA_NWHITE: /* \S */
				3291	result = c != NUL && !vim_iswhite(c);
				3292	ADD_POS_NEG_STATE(t->state);
				3293	break;
				3294
				3295	case NFA_DIGIT: /* \d */
				3296	result = ri_digit(c);
				3297	ADD_POS_NEG_STATE(t->state);
				3298	break;
				3299
				3300	case NFA_NDIGIT: /* \D */
				3301	result = c != NUL && !ri_digit(c);
				3302	ADD_POS_NEG_STATE(t->state);
				3303	break;
				3304
				3305	case NFA_HEX: /* \x */
				3306	result = ri_hex(c);
				3307	ADD_POS_NEG_STATE(t->state);
				3308	break;
				3309
				3310	case NFA_NHEX: /* \X */
				3311	result = c != NUL && !ri_hex(c);
				3312	ADD_POS_NEG_STATE(t->state);
				3313	break;
				3314
				3315	case NFA_OCTAL: /* \o */
				3316	result = ri_octal(c);
				3317	ADD_POS_NEG_STATE(t->state);
				3318	break;
				3319
				3320	case NFA_NOCTAL: /* \O */
				3321	result = c != NUL && !ri_octal(c);
				3322	ADD_POS_NEG_STATE(t->state);
				3323	break;
				3324
				3325	case NFA_WORD: /* \w */
				3326	result = ri_word(c);
				3327	ADD_POS_NEG_STATE(t->state);
				3328	break;
				3329
				3330	case NFA_NWORD: /* \W */
				3331	result = c != NUL && !ri_word(c);
				3332	ADD_POS_NEG_STATE(t->state);
				3333	break;
				3334
				3335	case NFA_HEAD: /* \h */
				3336	result = ri_head(c);
				3337	ADD_POS_NEG_STATE(t->state);
				3338	break;
				3339
				3340	case NFA_NHEAD: /* \H */
				3341	result = c != NUL && !ri_head(c);
				3342	ADD_POS_NEG_STATE(t->state);
				3343	break;
				3344
				3345	case NFA_ALPHA: /* \a */
				3346	result = ri_alpha(c);
				3347	ADD_POS_NEG_STATE(t->state);
				3348	break;
				3349
				3350	case NFA_NALPHA: /* \A */
				3351	result = c != NUL && !ri_alpha(c);
				3352	ADD_POS_NEG_STATE(t->state);
				3353	break;
				3354
				3355	case NFA_LOWER: /* \l */
				3356	result = ri_lower(c);
				3357	ADD_POS_NEG_STATE(t->state);
				3358	break;
				3359
				3360	case NFA_NLOWER: /* \L */
				3361	result = c != NUL && !ri_lower(c);
				3362	ADD_POS_NEG_STATE(t->state);
				3363	break;
				3364
				3365	case NFA_UPPER: /* \u */
				3366	result = ri_upper(c);
				3367	ADD_POS_NEG_STATE(t->state);
				3368	break;
				3369
				3370	case NFA_NUPPER: /* \U */
				3371	result = c != NUL && !ri_upper(c);
				3372	ADD_POS_NEG_STATE(t->state);
				3373	break;
				3374
				3375	default: /* regular character */
				3376	result = (no_Magic(t->state->c) == c);
				3377	if (!result)
				3378	result = ireg_ic == TRUE
				3379	&& MB_TOLOWER(t->state->c) == MB_TOLOWER(c);
				3380	ADD_POS_NEG_STATE(t->state);
				3381	break;
				3382	}
				3383
				3384	} /* for (thislist = thislist; thislist->state; thislist++) */
				3385
				3386	/* The first found match is the leftmost one, but there may be a
				3387	* longer one. Keep running the NFA, but don't start from the
				3388	* beginning. Also, do not add the start state in recursive calls of
				3389	* nfa_regmatch(), because recursive calls should only start in the
				3390	* first position. */
				3391	if (match == FALSE && start->c == NFA_MOPEN + 0)
				3392	{
				3393	#ifdef ENABLE_LOG
				3394	fprintf(log_fd, "(---) STARTSTATE\n");
				3395	#endif
				3396	addstate(nextlist, start, m, n, listid + 1, &match);
				3397	}
				3398
				3399	if (reginput_updated)
				3400	{
				3401	reginput_updated = FALSE;
				3402	goto again;
				3403	}
				3404
				3405	#ifdef ENABLE_LOG
				3406	fprintf(log_fd, ">>> Thislist had %d states available: ", thislist->n);
				3407	for (i = 0; i< thislist->n; i++)
				3408	fprintf(log_fd, "%d ", abs(thislist->t[i].state->id));
				3409	fprintf(log_fd, "\n");
				3410	#endif
				3411
				3412	nextchar:
				3413	reginput += n;
				3414	} while (c \|\| reginput_updated);
				3415
				3416	#ifdef ENABLE_LOG
				3417	if (log_fd != stderr)
				3418	fclose(log_fd);
				3419	log_fd = NULL;
				3420	#endif
				3421
				3422	theend:
				3423	/* Free memory */
				3424	vim_free(list[0].t);
				3425	vim_free(list[1].t);
				3426	vim_free(list[2].t);
				3427	list[0].t = list[1].t = list[2].t = NULL;
				3428	if (listids != NULL)
				3429	vim_free(listids);
				3430	#undef ADD_POS_NEG_STATE
				3431	#ifdef DEBUG
				3432	fclose(debug);
				3433	#endif
				3434
				3435	return match;
				3436	}
				3437
				3438	/*
				3439	* Try match of "prog" with at regline["col"].
				3440	* Returns 0 for failure, number of lines contained in the match otherwise.
				3441	*/
				3442	static long
				3443	nfa_regtry(start, col)
				3444	nfa_state_T *start;
				3445	colnr_T col;
				3446	{
				3447	int i;
				3448	regsub_T sub, m;
				3449	#ifdef ENABLE_LOG
				3450	FILE *f;
				3451	#endif
				3452
				3453	reginput = regline + col;
				3454	need_clear_subexpr = TRUE;
				3455
				3456	#ifdef ENABLE_LOG
				3457	f = fopen(LOG_NAME, "a");
				3458	if (f != NULL)
				3459	{
				3460	fprintf(f, "\n\n\n\n\n\n\t\t=======================================================\n");
				3461	fprintf(f, " =======================================================\n");
				3462	#ifdef DEBUG
				3463	fprintf(f, "\tRegexp is \"%s\"\n", nfa_regengine.expr);
				3464	#endif
				3465	fprintf(f, "\tInput text is \"%s\" \n", reginput);
				3466	fprintf(f, " =======================================================\n\n\n\n\n\n\n");
				3467	nfa_print_state(f, start, 0);
				3468	fprintf(f, "\n\n");
				3469	fclose(f);
				3470	}
				3471	else
				3472	EMSG(_("Could not open temporary log file for writing "));
				3473	#endif
				3474
				3475	if (REG_MULTI)
				3476	{
				3477	/* Use 0xff to set lnum to -1 */
				3478	vim_memset(sub.startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				3479	vim_memset(sub.endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				3480	vim_memset(m.startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				3481	vim_memset(m.endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				3482	}
				3483	else
				3484	{
				3485	vim_memset(sub.start, 0, sizeof(char_u ) NSUBEXP);
				3486	vim_memset(sub.end, 0, sizeof(char_u ) NSUBEXP);
				3487	vim_memset(m.start, 0, sizeof(char_u ) NSUBEXP);
				3488	vim_memset(m.end, 0, sizeof(char_u ) NSUBEXP);
				3489	}
				3490
				3491	if (nfa_regmatch(start, &sub, &m) == FALSE)
				3492	return 0;
				3493
				3494	cleanup_subexpr();
				3495	if (REG_MULTI)
				3496	{
				3497	for (i = 0; i < NSUBEXP; i++)
				3498	{
				3499	reg_startpos[i] = sub.startpos[i];
				3500	reg_endpos[i] = sub.endpos[i];
				3501	}
				3502
				3503	if (reg_startpos[0].lnum < 0)
				3504	{
				3505	reg_startpos[0].lnum = 0;
				3506	reg_startpos[0].col = col;
				3507	}
				3508	if (reg_endpos[0].lnum < 0)
				3509	{
				3510	reg_endpos[0].lnum = reglnum;
				3511	reg_endpos[0].col = (int)(reginput - regline);
				3512	}
				3513	else
				3514	/* Use line number of "\ze". */
				3515	reglnum = reg_endpos[0].lnum;
				3516	}
				3517	else
				3518	{
				3519	for (i = 0; i < NSUBEXP; i++)
				3520	{
				3521	reg_startp[i] = sub.start[i];
				3522	reg_endp[i] = sub.end[i];
				3523	}
				3524
				3525	if (reg_startp[0] == NULL)
				3526	reg_startp[0] = regline + col;
				3527	if (reg_endp[0] == NULL)
				3528	reg_endp[0] = reginput;
				3529	}
				3530
				3531	return 1 + reglnum;
				3532	}
				3533
				3534	/*
				3535	* Match a regexp against a string ("line" points to the string) or multiple
				3536	* lines ("line" is NULL, use reg_getline()).
				3537	*
				3538	* Returns 0 for failure, number of lines contained in the match otherwise.
				3539	*/
				3540	static long
				3541	nfa_regexec_both(line, col)
				3542	char_u *line;
				3543	colnr_T col; /* column to start looking for match */
				3544	{
				3545	nfa_regprog_T *prog;
				3546	long retval = 0L;
				3547	int i;
				3548
				3549	if (REG_MULTI)
				3550	{
				3551	prog = (nfa_regprog_T *)reg_mmatch->regprog;
				3552	line = reg_getline((linenr_T)0); /* relative to the cursor */
				3553	reg_startpos = reg_mmatch->startpos;
				3554	reg_endpos = reg_mmatch->endpos;
				3555	}
				3556	else
				3557	{
				3558	prog = (nfa_regprog_T *)reg_match->regprog;
				3559	reg_startp = reg_match->startp;
				3560	reg_endp = reg_match->endp;
				3561	}
				3562
				3563	/* Be paranoid... */
				3564	if (prog == NULL \|\| line == NULL)
				3565	{
				3566	EMSG(_(e_null));
				3567	goto theend;
				3568	}
				3569
				3570	/* If the start column is past the maximum column: no need to try. */
				3571	if (ireg_maxcol > 0 && col >= ireg_maxcol)
				3572	goto theend;
				3573
				3574	/* If pattern contains "\c" or "\C": overrule value of ireg_ic */
				3575	if (prog->regflags & RF_ICASE)
				3576	ireg_ic = TRUE;
				3577	else if (prog->regflags & RF_NOICASE)
				3578	ireg_ic = FALSE;
				3579
				3580	#ifdef FEAT_MBYTE
				3581	/* If pattern contains "\Z" overrule value of ireg_icombine */
				3582	if (prog->regflags & RF_ICOMBINE)
				3583	ireg_icombine = TRUE;
				3584	#endif
				3585
				3586	regline = line;
				3587	reglnum = 0; /* relative to line */
				3588
				3589	nstate = prog->nstate;
				3590
				3591	for (i = 0; i < nstate; ++i)
				3592	{
				3593	prog->state[i].id = i;
				3594	prog->state[i].lastlist = 0;
				3595	prog->state[i].visits = 0;
				3596	prog->state[i].lastthread = NULL;
				3597	}
				3598
				3599	retval = nfa_regtry(prog->start, col);
				3600
				3601	theend:
				3602	return retval;
				3603	}
				3604
				3605	/*
				3606	* Compile a regular expression into internal code for the NFA matcher.
				3607	* Returns the program in allocated space. Returns NULL for an error.
				3608	*/
				3609	static regprog_T *
				3610	nfa_regcomp(expr, re_flags)
				3611	char_u *expr;
				3612	int re_flags;
				3613	{
				3614	nfa_regprog_T *prog;
				3615	int prog_size;
				3616	int *postfix;
				3617
				3618	if (expr == NULL)
				3619	return NULL;
				3620
				3621	#ifdef DEBUG
				3622	nfa_regengine.expr = expr;
				3623	#endif
				3624
				3625	init_class_tab();
				3626
				3627	if (nfa_regcomp_start(expr, re_flags) == FAIL)
				3628	return NULL;
				3629
				3630	/* Space for compiled regexp */
				3631	prog_size = sizeof(nfa_regprog_T) + sizeof(nfa_state_T) * nstate_max;
				3632	prog = (nfa_regprog_T *)lalloc(prog_size, TRUE);
				3633	if (prog == NULL)
				3634	goto fail;
				3635	vim_memset(prog, 0, prog_size);
				3636
				3637	/* Build postfix form of the regexp. Needed to build the NFA
				3638	* (and count its size) */
				3639	postfix = re2post();
				3640	if (postfix == NULL)
				3641	goto fail; /* Cascaded (syntax?) error */
				3642
				3643	/*
				3644	* In order to build the NFA, we parse the input regexp twice:
				3645	* 1. first pass to count size (so we can allocate space)
				3646	* 2. second to emit code
				3647	*/
				3648	#ifdef ENABLE_LOG
				3649	{
				3650	FILE *f = fopen(LOG_NAME, "a");
				3651
				3652	if (f != NULL)
				3653	{
				3654	fprintf(f, "\n*****************************\n\n\n\n\tCompiling regexp \"%s\" ... hold on !\n", expr);
				3655	fclose(f);
				3656	}
				3657	}
				3658	#endif
				3659
				3660	/*
				3661	* PASS 1
				3662	* Count number of NFA states in "nstate". Do not build the NFA.
				3663	*/
				3664	post2nfa(postfix, post_ptr, TRUE);
				3665	state_ptr = prog->state;
				3666
				3667	/*
				3668	* PASS 2
				3669	* Build the NFA
				3670	*/
				3671	prog->start = post2nfa(postfix, post_ptr, FALSE);
				3672	if (prog->start == NULL)
				3673	goto fail;
				3674
				3675	prog->regflags = regflags;
				3676	prog->engine = &nfa_regengine;
				3677	prog->nstate = nstate;
				3678	#ifdef ENABLE_LOG
				3679	nfa_postfix_dump(expr, OK);
				3680	nfa_dump(prog);
				3681	#endif
				3682
				3683	out:
				3684	vim_free(post_start);
				3685	post_start = post_ptr = post_end = NULL;
				3686	state_ptr = NULL;
				3687	return (regprog_T *)prog;
				3688
				3689	fail:
				3690	vim_free(prog);
				3691	prog = NULL;
				3692	#ifdef ENABLE_LOG
				3693	nfa_postfix_dump(expr, FAIL);
				3694	#endif
				3695	#ifdef DEBUG
				3696	nfa_regengine.expr = NULL;
				3697	#endif
				3698	goto out;
				3699	}
				3700
				3701
				3702	/*
				3703	* Match a regexp against a string.
				3704	* "rmp->regprog" is a compiled regexp as returned by nfa_regcomp().
				3705	* Uses curbuf for line count and 'iskeyword'.
				3706	*
				3707	* Return TRUE if there is a match, FALSE if not.
				3708	*/
				3709	static int
				3710	nfa_regexec(rmp, line, col)
				3711	regmatch_T *rmp;
				3712	char_u line; / string to match against */
				3713	colnr_T col; /* column to start looking for match */
				3714	{
				3715	reg_match = rmp;
				3716	reg_mmatch = NULL;
				3717	reg_maxline = 0;
				3718	reg_line_lbr = FALSE;
				3719	reg_buf = curbuf;
				3720	reg_win = NULL;
				3721	ireg_ic = rmp->rm_ic;
				3722	#ifdef FEAT_MBYTE
				3723	ireg_icombine = FALSE;
				3724	#endif
				3725	ireg_maxcol = 0;
				3726	return (nfa_regexec_both(line, col) != 0);
				3727	}
				3728
				3729	#if defined(FEAT_MODIFY_FNAME) \|\| defined(FEAT_EVAL) \
				3730	\|\| defined(FIND_REPLACE_DIALOG) \|\| defined(PROTO)
				3731
				3732	static int nfa_regexec_nl __ARGS((regmatch_T rmp, char_u line, colnr_T col));
				3733
				3734	/*
				3735	* Like nfa_regexec(), but consider a "\n" in "line" to be a line break.
				3736	*/
				3737	static int
				3738	nfa_regexec_nl(rmp, line, col)
				3739	regmatch_T *rmp;
				3740	char_u line; / string to match against */
				3741	colnr_T col; /* column to start looking for match */
				3742	{
				3743	reg_match = rmp;
				3744	reg_mmatch = NULL;
				3745	reg_maxline = 0;
				3746	reg_line_lbr = TRUE;
				3747	reg_buf = curbuf;
				3748	reg_win = NULL;
				3749	ireg_ic = rmp->rm_ic;
				3750	#ifdef FEAT_MBYTE
				3751	ireg_icombine = FALSE;
				3752	#endif
				3753	ireg_maxcol = 0;
				3754	return (nfa_regexec_both(line, col) != 0);
				3755	}
				3756	#endif
				3757
				3758
				3759	/*
				3760	* Match a regexp against multiple lines.
				3761	* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
				3762	* Uses curbuf for line count and 'iskeyword'.
				3763	*
				3764	* Return zero if there is no match. Return number of lines contained in the
				3765	* match otherwise.
				3766	*
				3767	* Note: the body is the same as bt_regexec() except for nfa_regexec_both()
				3768	*
				3769	* ! Also NOTE : match may actually be in another line. e.g.:
				3770	* when r.e. is \nc, cursor is at 'a' and the text buffer looks like
				3771	*
				3772	* +-------------------------+
				3773	* \|a \|
				3774	* \|b \|
				3775	* \|c \|
				3776	* \| \|
				3777	* +-------------------------+
				3778	*
				3779	* then nfa_regexec_multi() returns 3. while the original
				3780	* vim_regexec_multi() returns 0 and a second call at line 2 will return 2.
				3781	*
				3782	* FIXME if this behavior is not compatible.
				3783	*/
				3784	static long
				3785	nfa_regexec_multi(rmp, win, buf, lnum, col, tm)
				3786	regmmatch_T *rmp;
				3787	win_T win; / window in which to search or NULL */
				3788	buf_T buf; / buffer in which to search */
				3789	linenr_T lnum; /* nr of line to start looking for match */
				3790	colnr_T col; /* column to start looking for match */
				3791	proftime_T tm UNUSED; / timeout limit or NULL */
				3792	{
				3793	long r;
				3794	buf_T *save_curbuf = curbuf;
				3795
				3796	reg_match = NULL;
				3797	reg_mmatch = rmp;
				3798	reg_buf = buf;
				3799	reg_win = win;
				3800	reg_firstlnum = lnum;
				3801	reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
				3802	reg_line_lbr = FALSE;
				3803	ireg_ic = rmp->rmm_ic;
				3804	#ifdef FEAT_MBYTE
				3805	ireg_icombine = FALSE;
				3806	#endif
				3807	ireg_maxcol = rmp->rmm_maxcol;
				3808
				3809	/* Need to switch to buffer "buf" to make vim_iswordc() work. */
				3810	curbuf = buf;
				3811	r = nfa_regexec_both(NULL, col);
				3812	curbuf = save_curbuf;
				3813
				3814	return r;
				3815	}
				3816
				3817	#ifdef DEBUG
				3818	# undef ENABLE_LOG
				3819	#endif