Blame - src/regexp_nfa.c - android_external_vim

blob: eea173816ab176a2faa73ef58d4fdaacfb87cf6d [file] [log] [blame]

Bram Moolenaar	fbc0d2e	2013-05-19 19:40:29 +0200	[diff] [blame]	1	/* vi:set ts=8 sts=4 sw=4:
				2	*
				3	* NFA regular expression implementation.
				4	*
				5	* This file is included in "regexp.c".
				6	*/
				7
				8	#ifdef DEBUG
				9	/* Comment this out to disable log files. They can get pretty big */
				10	# define ENABLE_LOG
				11	# define LOG_NAME "log_nfarun.log"
				12	#endif
				13
				14	/* Upper limit allowed for {m,n} repetitions handled by NFA */
				15	#define NFA_BRACES_MAXLIMIT 10
				16	/* For allocating space for the postfix representation */
				17	#define NFA_POSTFIX_MULTIPLIER (NFA_BRACES_MAXLIMIT + 2)*2
				18	/* Size of stack, used when converting the postfix regexp into NFA */
				19	#define NFA_STACK_SIZE 1024
				20
				21	enum
				22	{
				23	NFA_SPLIT = -1024,
				24	NFA_MATCH,
				25	NFA_SKIP_CHAR, /* matches a 0-length char */
				26	NFA_END_NEG_RANGE, /* Used when expanding [^ab] */
				27
				28	NFA_CONCAT,
				29	NFA_OR,
				30	NFA_STAR,
				31	NFA_PLUS,
				32	NFA_QUEST,
				33	NFA_QUEST_NONGREEDY, /* Non-greedy version of \? */
				34	NFA_NOT, /* used for [^ab] negated char ranges */
				35
				36	NFA_BOL, /* ^ Begin line */
				37	NFA_EOL, /* $ End line */
				38	NFA_BOW, /* \< Begin word */
				39	NFA_EOW, /* \> End word */
				40	NFA_BOF, /* \%^ Begin file */
				41	NFA_EOF, /* \%$ End file */
				42	NFA_NEWL,
				43	NFA_ZSTART, /* Used for \zs */
				44	NFA_ZEND, /* Used for \ze */
				45	NFA_NOPEN, /* Start of subexpression marked with \%( */
				46	NFA_NCLOSE, /* End of subexpr. marked with \%( ... \) */
				47	NFA_START_INVISIBLE,
				48	NFA_END_INVISIBLE,
				49	NFA_MULTIBYTE, /* Next nodes in NFA are part of the same
				50	multibyte char */
				51	NFA_END_MULTIBYTE, /* End of multibyte char in the NFA */
				52	NFA_COMPOSING, /* Next nodes in NFA are part of the
				53	composing multibyte char */
				54	NFA_END_COMPOSING, /* End of a composing char in the NFA */
				55
				56	/* The following are used only in the postfix form, not in the NFA */
				57	NFA_PREV_ATOM_NO_WIDTH, /* Used for \@= */
				58	NFA_PREV_ATOM_NO_WIDTH_NEG, /* Used for \@! */
				59	NFA_PREV_ATOM_JUST_BEFORE, /* Used for \@<= */
				60	NFA_PREV_ATOM_JUST_BEFORE_NEG, /* Used for \@<! */
				61	NFA_PREV_ATOM_LIKE_PATTERN, /* Used for \@> */
				62
				63	NFA_MOPEN,
				64	NFA_MCLOSE = NFA_MOPEN + NSUBEXP,
				65
				66	/* NFA_FIRST_NL */
				67	NFA_ANY = NFA_MCLOSE + NSUBEXP, /* Match any one character. */
				68	NFA_ANYOF, /* Match any character in this string. */
				69	NFA_ANYBUT, /* Match any character not in this string. */
				70	NFA_IDENT, /* Match identifier char */
				71	NFA_SIDENT, /* Match identifier char but no digit */
				72	NFA_KWORD, /* Match keyword char */
				73	NFA_SKWORD, /* Match word char but no digit */
				74	NFA_FNAME, /* Match file name char */
				75	NFA_SFNAME, /* Match file name char but no digit */
				76	NFA_PRINT, /* Match printable char */
				77	NFA_SPRINT, /* Match printable char but no digit */
				78	NFA_WHITE, /* Match whitespace char */
				79	NFA_NWHITE, /* Match non-whitespace char */
				80	NFA_DIGIT, /* Match digit char */
				81	NFA_NDIGIT, /* Match non-digit char */
				82	NFA_HEX, /* Match hex char */
				83	NFA_NHEX, /* Match non-hex char */
				84	NFA_OCTAL, /* Match octal char */
				85	NFA_NOCTAL, /* Match non-octal char */
				86	NFA_WORD, /* Match word char */
				87	NFA_NWORD, /* Match non-word char */
				88	NFA_HEAD, /* Match head char */
				89	NFA_NHEAD, /* Match non-head char */
				90	NFA_ALPHA, /* Match alpha char */
				91	NFA_NALPHA, /* Match non-alpha char */
				92	NFA_LOWER, /* Match lowercase char */
				93	NFA_NLOWER, /* Match non-lowercase char */
				94	NFA_UPPER, /* Match uppercase char */
				95	NFA_NUPPER, /* Match non-uppercase char */
				96	NFA_FIRST_NL = NFA_ANY + ADD_NL,
				97	NFA_LAST_NL = NFA_NUPPER + ADD_NL,
				98
				99	/* Character classes [:alnum:] etc */
				100	NFA_CLASS_ALNUM,
				101	NFA_CLASS_ALPHA,
				102	NFA_CLASS_BLANK,
				103	NFA_CLASS_CNTRL,
				104	NFA_CLASS_DIGIT,
				105	NFA_CLASS_GRAPH,
				106	NFA_CLASS_LOWER,
				107	NFA_CLASS_PRINT,
				108	NFA_CLASS_PUNCT,
				109	NFA_CLASS_SPACE,
				110	NFA_CLASS_UPPER,
				111	NFA_CLASS_XDIGIT,
				112	NFA_CLASS_TAB,
				113	NFA_CLASS_RETURN,
				114	NFA_CLASS_BACKSPACE,
				115	NFA_CLASS_ESCAPE
				116	};
				117
				118	/* Keep in sync with classchars. */
				119	static int nfa_classcodes[] = {
				120	NFA_ANY, NFA_IDENT, NFA_SIDENT, NFA_KWORD,NFA_SKWORD,
				121	NFA_FNAME, NFA_SFNAME, NFA_PRINT, NFA_SPRINT,
				122	NFA_WHITE, NFA_NWHITE, NFA_DIGIT, NFA_NDIGIT,
				123	NFA_HEX, NFA_NHEX, NFA_OCTAL, NFA_NOCTAL,
				124	NFA_WORD, NFA_NWORD, NFA_HEAD, NFA_NHEAD,
				125	NFA_ALPHA, NFA_NALPHA, NFA_LOWER, NFA_NLOWER,
				126	NFA_UPPER, NFA_NUPPER
				127	};
				128
				129	static char_u e_misplaced[] = N_("E866: (NFA regexp) Misplaced %c");
				130
				131	/*
				132	* NFA errors can be of 3 types:
				133	* *** NFA runtime errors, when something unknown goes wrong. The NFA fails
				134	* silently and revert the to backtracking engine.
				135	* syntax_error = FALSE;
				136	* *** Regexp syntax errors, when the input regexp is not syntactically correct.
				137	* The NFA engine displays an error message, and nothing else happens.
				138	* syntax_error = TRUE
				139	* *** Unsupported features, when the input regexp uses an operator that is not
				140	* implemented in the NFA. The NFA engine fails silently, and reverts to the
				141	* old backtracking engine.
				142	* syntax_error = FALSE
				143	* "The NFA fails" means that "compiling the regexp with the NFA fails":
				144	* nfa_regcomp() returns FAIL.
				145	*/
				146	static int syntax_error = FALSE;
				147
				148	/* NFA regexp \ze operator encountered. */
				149	static int nfa_has_zend = FALSE;
				150
				151	static int post_start; / holds the postfix form of r.e. */
				152	static int *post_end;
				153	static int *post_ptr;
				154
				155	static int nstate; /* Number of states in the NFA. */
				156	static int istate; /* Index in the state vector, used in new_state() */
				157	static int nstate_max; /* Upper bound of estimated number of states. */
				158
				159
				160	static int nfa_regcomp_start __ARGS((char_u*expr, int re_flags));
				161	static int nfa_recognize_char_class __ARGS((char_u start, char_u end, int extra_newl));
				162	static int nfa_emit_equi_class __ARGS((int c, int neg));
				163	static void nfa_inc __ARGS((char_u **p));
				164	static void nfa_dec __ARGS((char_u **p));
				165	static int nfa_regatom __ARGS((void));
				166	static int nfa_regpiece __ARGS((void));
				167	static int nfa_regconcat __ARGS((void));
				168	static int nfa_regbranch __ARGS((void));
				169	static int nfa_reg __ARGS((int paren));
				170	#ifdef DEBUG
				171	static void nfa_set_code __ARGS((int c));
				172	static void nfa_postfix_dump __ARGS((char_u *expr, int retval));
				173	static void nfa_print_state __ARGS((FILE debugf, nfa_state_T state, int ident));
				174	static void nfa_dump __ARGS((nfa_regprog_T *prog));
				175	#endif
				176	static int *re2post __ARGS((void));
				177	static nfa_state_T new_state __ARGS((int c, nfa_state_T out, nfa_state_T *out1));
				178	static nfa_state_T post2nfa __ARGS((int postfix, int *end, int nfa_calc_size));
				179	static int check_char_class __ARGS((int class, int c));
				180	static void st_error __ARGS((int postfix, int end, int *p));
				181	static void nfa_save_listids __ARGS((nfa_state_T start, int list));
				182	static void nfa_restore_listids __ARGS((nfa_state_T start, int list));
				183	static void nfa_set_null_listids __ARGS((nfa_state_T *start));
				184	static void nfa_set_neg_listids __ARGS((nfa_state_T *start));
				185	static long nfa_regtry __ARGS((nfa_state_T *start, colnr_T col));
				186	static long nfa_regexec_both __ARGS((char_u *line, colnr_T col));
				187	static regprog_T nfa_regcomp __ARGS((char_u expr, int re_flags));
				188	static int nfa_regexec __ARGS((regmatch_T rmp, char_u line, colnr_T col));
				189	static long nfa_regexec_multi __ARGS((regmmatch_T rmp, win_T win, buf_T buf, linenr_T lnum, colnr_T col, proftime_T tm));
				190
				191	/* helper functions used when doing re2post() ... regatom() parsing */
				192	#define EMIT(c) do { \
				193	if (post_ptr >= post_end) \
				194	return FAIL; \
				195	*post_ptr++ = c; \
				196	} while (0)
				197
				198	#define EMIT_MBYTE(c) \
				199	len = (*mb_char2bytes)(c, buf); \
				200	EMIT(buf[0]); \
				201	for (i = 1; i < len; i++) \
				202	{ \
				203	EMIT(buf[i]); \
				204	EMIT(NFA_CONCAT); \
				205	} \
				206	EMIT(NFA_MULTIBYTE);
				207
				208	#define EMIT_COMPOSING_UTF(input) \
				209	len = utfc_ptr2len(input); \
				210	EMIT(input[0]); \
				211	for (i = 1; i < len; i++) \
				212	{ \
				213	EMIT(input[i]); \
				214	EMIT(NFA_CONCAT); \
				215	} \
				216	EMIT(NFA_COMPOSING);
				217
				218	/*
				219	* Initialize internal variables before NFA compilation.
				220	* Return OK on success, FAIL otherwise.
				221	*/
				222	static int
				223	nfa_regcomp_start(expr, re_flags)
				224	char_u *expr;
				225	int re_flags; /* see vim_regcomp() */
				226	{
				227	int postfix_size;
				228
				229	nstate = 0;
				230	istate = 0;
				231	/* A reasonable estimation for size */
				232	nstate_max = (STRLEN(expr) + 1) * NFA_POSTFIX_MULTIPLIER;
				233
Bram Moolenaar	bc0ea8f	2013-05-20 13:44:29 +0200	[diff] [blame^]	234	/* Some items blow up in size, such as [A-z]. Add more space for that.
				235	* TODO: some patterns may still fail. */
				236	// nstate_max += 1000;
				237
				238	/* Size for postfix representation of expr. */
Bram Moolenaar	fbc0d2e	2013-05-19 19:40:29 +0200	[diff] [blame]	239	postfix_size = sizeof(post_start) nstate_max;
Bram Moolenaar	bc0ea8f	2013-05-20 13:44:29 +0200	[diff] [blame^]	240
Bram Moolenaar	fbc0d2e	2013-05-19 19:40:29 +0200	[diff] [blame]	241	post_start = (int *)lalloc(postfix_size, TRUE);
				242	if (post_start == NULL)
				243	return FAIL;
				244	vim_memset(post_start, 0, postfix_size);
				245	post_ptr = post_start;
Bram Moolenaar	bc0ea8f	2013-05-20 13:44:29 +0200	[diff] [blame^]	246	post_end = post_start + nstate_max;
Bram Moolenaar	fbc0d2e	2013-05-19 19:40:29 +0200	[diff] [blame]	247	nfa_has_zend = FALSE;
				248
				249	regcomp_start(expr, re_flags);
				250
				251	return OK;
				252	}
				253
				254	/*
				255	* Search between "start" and "end" and try to recognize a
				256	* character class in expanded form. For example [0-9].
				257	* On success, return the id the character class to be emitted.
				258	* On failure, return 0 (=FAIL)
				259	* Start points to the first char of the range, while end should point
				260	* to the closing brace.
				261	*/
				262	static int
				263	nfa_recognize_char_class(start, end, extra_newl)
				264	char_u *start;
				265	char_u *end;
				266	int extra_newl;
				267	{
				268	int i;
				269	/* Each of these variables takes up a char in "config[]",
				270	* in the order they are here. */
				271	int not = FALSE, af = FALSE, AF = FALSE, az = FALSE, AZ = FALSE,
				272	o7 = FALSE, o9 = FALSE, underscore = FALSE, newl = FALSE;
				273	char_u *p;
				274	#define NCONFIGS 16
				275	int classid[NCONFIGS] = {
				276	NFA_DIGIT, NFA_NDIGIT, NFA_HEX, NFA_NHEX,
				277	NFA_OCTAL, NFA_NOCTAL, NFA_WORD, NFA_NWORD,
				278	NFA_HEAD, NFA_NHEAD, NFA_ALPHA, NFA_NALPHA,
				279	NFA_LOWER, NFA_NLOWER, NFA_UPPER, NFA_NUPPER
				280	};
Bram Moolenaar	ba40447	2013-05-19 22:31:18 +0200	[diff] [blame]	281	char_u myconfig[10];
Bram Moolenaar	fbc0d2e	2013-05-19 19:40:29 +0200	[diff] [blame]	282	char_u config[NCONFIGS][9] = {
				283	"000000100", /* digit */
				284	"100000100", /* non digit */
				285	"011000100", /* hex-digit */
				286	"111000100", /* non hex-digit */
				287	"000001000", /* octal-digit */
				288	"100001000", /* [^0-7] */
				289	"000110110", /* [0-9A-Za-z_] */
				290	"100110110", /* [^0-9A-Za-z_] */
				291	"000110010", /* head of word */
				292	"100110010", /* not head of word */
				293	"000110000", /* alphabetic char a-z */
				294	"100110000", /* non alphabetic char */
				295	"000100000", /* lowercase letter */
				296	"100100000", /* non lowercase */
				297	"000010000", /* uppercase */
				298	"100010000" /* non uppercase */
				299	};
				300
				301	if (extra_newl == TRUE)
				302	newl = TRUE;
				303
				304	if (*end != ']')
				305	return FAIL;
				306	p = start;
				307	if (*p == '^')
				308	{
				309	not = TRUE;
				310	p ++;
				311	}
				312
				313	while (p < end)
				314	{
				315	if (p + 2 < end && *(p + 1) == '-')
				316	{
				317	switch (*p)
				318	{
				319	case '0':
				320	if (*(p + 2) == '9')
				321	{
				322	o9 = TRUE;
				323	break;
				324	}
				325	else
				326	if (*(p + 2) == '7')
				327	{
				328	o7 = TRUE;
				329	break;
				330	}
				331	case 'a':
				332	if (*(p + 2) == 'z')
				333	{
				334	az = TRUE;
				335	break;
				336	}
				337	else
				338	if (*(p + 2) == 'f')
				339	{
				340	af = TRUE;
				341	break;
				342	}
				343	case 'A':
				344	if (*(p + 2) == 'Z')
				345	{
				346	AZ = TRUE;
				347	break;
				348	}
				349	else
				350	if (*(p + 2) == 'F')
				351	{
				352	AF = TRUE;
				353	break;
				354	}
				355	/* FALLTHROUGH */
				356	default:
				357	return FAIL;
				358	}
				359	p += 3;
				360	}
				361	else if (p + 1 < end && p == '\\' && (p + 1) == 'n')
				362	{
				363	newl = TRUE;
				364	p += 2;
				365	}
				366	else if (*p == '_')
				367	{
				368	underscore = TRUE;
				369	p ++;
				370	}
				371	else if (*p == '\n')
				372	{
				373	newl = TRUE;
				374	p ++;
				375	}
				376	else
				377	return FAIL;
				378	} /* while (p < end) */
				379
				380	if (p != end)
				381	return FAIL;
				382
				383	/* build the config that represents the ranges we gathered */
				384	STRCPY(myconfig, "000000000");
				385	if (not == TRUE)
				386	myconfig[0] = '1';
				387	if (af == TRUE)
				388	myconfig[1] = '1';
				389	if (AF == TRUE)
				390	myconfig[2] = '1';
				391	if (az == TRUE)
				392	myconfig[3] = '1';
				393	if (AZ == TRUE)
				394	myconfig[4] = '1';
				395	if (o7 == TRUE)
				396	myconfig[5] = '1';
				397	if (o9 == TRUE)
				398	myconfig[6] = '1';
				399	if (underscore == TRUE)
				400	myconfig[7] = '1';
				401	if (newl == TRUE)
				402	{
				403	myconfig[8] = '1';
				404	extra_newl = ADD_NL;
				405	}
				406	/* try to recognize character classes */
				407	for (i = 0; i < NCONFIGS; i++)
Bram Moolenaar	ba40447	2013-05-19 22:31:18 +0200	[diff] [blame]	408	if (STRNCMP(myconfig, config[i], 8) == 0)
Bram Moolenaar	fbc0d2e	2013-05-19 19:40:29 +0200	[diff] [blame]	409	return classid[i] + extra_newl;
				410
				411	/* fallthrough => no success so far */
				412	return FAIL;
				413
				414	#undef NCONFIGS
				415	}
				416
				417	/*
				418	* Produce the bytes for equivalence class "c".
				419	* Currently only handles latin1, latin9 and utf-8.
				420	* Emits bytes in postfix notation: 'a,b,NFA_OR,c,NFA_OR' is
				421	* equivalent to 'a OR b OR c'
				422	*
				423	* NOTE! When changing this function, also update reg_equi_class()
				424	*/
				425	static int
				426	nfa_emit_equi_class(c, neg)
				427	int c;
				428	int neg;
				429	{
				430	int first = TRUE;
				431	int glue = neg == TRUE ? NFA_CONCAT : NFA_OR;
				432	#define EMIT2(c) \
				433	EMIT(c); \
				434	if (neg == TRUE) { \
				435	EMIT(NFA_NOT); \
				436	} \
				437	if (first == FALSE) \
				438	EMIT(glue); \
				439	else \
				440	first = FALSE; \
				441
				442	#ifdef FEAT_MBYTE
				443	if (enc_utf8 \|\| STRCMP(p_enc, "latin1") == 0
				444	\|\| STRCMP(p_enc, "iso-8859-15") == 0)
				445	#endif
				446	{
				447	switch (c)
				448	{
				449	case 'A': case '\300': case '\301': case '\302':
				450	case '\303': case '\304': case '\305':
				451	EMIT2('A'); EMIT2('\300'); EMIT2('\301');
				452	EMIT2('\302'); EMIT2('\303'); EMIT2('\304');
				453	EMIT2('\305');
				454	return OK;
				455
				456	case 'C': case '\307':
				457	EMIT2('C'); EMIT2('\307');
				458	return OK;
				459
				460	case 'E': case '\310': case '\311': case '\312': case '\313':
				461	EMIT2('E'); EMIT2('\310'); EMIT2('\311');
				462	EMIT2('\312'); EMIT2('\313');
				463	return OK;
				464
				465	case 'I': case '\314': case '\315': case '\316': case '\317':
				466	EMIT2('I'); EMIT2('\314'); EMIT2('\315');
				467	EMIT2('\316'); EMIT2('\317');
				468	return OK;
				469
				470	case 'N': case '\321':
				471	EMIT2('N'); EMIT2('\321');
				472	return OK;
				473
				474	case 'O': case '\322': case '\323': case '\324': case '\325':
				475	case '\326':
				476	EMIT2('O'); EMIT2('\322'); EMIT2('\323');
				477	EMIT2('\324'); EMIT2('\325'); EMIT2('\326');
				478	return OK;
				479
				480	case 'U': case '\331': case '\332': case '\333': case '\334':
				481	EMIT2('U'); EMIT2('\331'); EMIT2('\332');
				482	EMIT2('\333'); EMIT2('\334');
				483	return OK;
				484
				485	case 'Y': case '\335':
				486	EMIT2('Y'); EMIT2('\335');
				487	return OK;
				488
				489	case 'a': case '\340': case '\341': case '\342':
				490	case '\343': case '\344': case '\345':
				491	EMIT2('a'); EMIT2('\340'); EMIT2('\341');
				492	EMIT2('\342'); EMIT2('\343'); EMIT2('\344');
				493	EMIT2('\345');
				494	return OK;
				495
				496	case 'c': case '\347':
				497	EMIT2('c'); EMIT2('\347');
				498	return OK;
				499
				500	case 'e': case '\350': case '\351': case '\352': case '\353':
				501	EMIT2('e'); EMIT2('\350'); EMIT2('\351');
				502	EMIT2('\352'); EMIT2('\353');
				503	return OK;
				504
				505	case 'i': case '\354': case '\355': case '\356': case '\357':
				506	EMIT2('i'); EMIT2('\354'); EMIT2('\355');
				507	EMIT2('\356'); EMIT2('\357');
				508	return OK;
				509
				510	case 'n': case '\361':
				511	EMIT2('n'); EMIT2('\361');
				512	return OK;
				513
				514	case 'o': case '\362': case '\363': case '\364': case '\365':
				515	case '\366':
				516	EMIT2('o'); EMIT2('\362'); EMIT2('\363');
				517	EMIT2('\364'); EMIT2('\365'); EMIT2('\366');
				518	return OK;
				519
				520	case 'u': case '\371': case '\372': case '\373': case '\374':
				521	EMIT2('u'); EMIT2('\371'); EMIT2('\372');
				522	EMIT2('\373'); EMIT2('\374');
				523	return OK;
				524
				525	case 'y': case '\375': case '\377':
				526	EMIT2('y'); EMIT2('\375'); EMIT2('\377');
				527	return OK;
				528
				529	default:
				530	return FAIL;
				531	}
				532	}
				533
				534	EMIT(c);
				535	return OK;
				536	#undef EMIT2
				537	}
				538
				539	/*
				540	* Code to parse regular expression.
				541	*
				542	* We try to reuse parsing functions in regexp.c to
				543	* minimize surprise and keep the syntax consistent.
				544	*/
				545
				546	/*
				547	* Increments the pointer "p" by one (multi-byte) character.
				548	*/
				549	static void
				550	nfa_inc(p)
				551	char_u **p;
				552	{
				553	#ifdef FEAT_MBYTE
				554	if (has_mbyte)
				555	mb_ptr2char_adv(p);
				556	else
				557	#endif
				558	p = p + 1;
				559	}
				560
				561	/*
				562	* Decrements the pointer "p" by one (multi-byte) character.
				563	*/
				564	static void
				565	nfa_dec(p)
				566	char_u **p;
				567	{
				568	#ifdef FEAT_MBYTE
				569	char_u p2, oldp;
				570
				571	if (has_mbyte)
				572	{
				573	oldp = *p;
				574	/* Try to find the multibyte char that advances to the current
				575	* position. */
				576	do
				577	{
				578	p = p - 1;
				579	p2 = *p;
				580	mb_ptr2char_adv(&p2);
				581	} while (p2 != oldp);
				582	}
				583	#else
				584	p = p - 1;
				585	#endif
				586	}
				587
				588	/*
				589	* Parse the lowest level.
				590	*
				591	* An atom can be one of a long list of items. Many atoms match one character
				592	* in the text. It is often an ordinary character or a character class.
				593	* Braces can be used to make a pattern into an atom. The "\z(\)" construct
				594	* is only for syntax highlighting.
				595	*
				596	* atom ::= ordinary-atom
				597	* or $ pattern $
				598	* or \%( pattern \)
				599	* or \z( pattern \)
				600	*/
				601	static int
				602	nfa_regatom()
				603	{
				604	int c;
				605	int charclass;
				606	int equiclass;
				607	int collclass;
				608	int got_coll_char;
				609	char_u *p;
				610	char_u *endp;
				611	#ifdef FEAT_MBYTE
				612	char_u *old_regparse = regparse;
				613	int clen;
				614	int len;
				615	static char_u buf[30];
				616	int i;
				617	#endif
				618	int extra = 0;
				619	int first;
				620	int emit_range;
				621	int negated;
				622	int result;
				623	int startc = -1;
				624	int endc = -1;
				625	int oldstartc = -1;
				626	int cpo_lit; /* 'cpoptions' contains 'l' flag */
				627	int cpo_bsl; /* 'cpoptions' contains '\' flag */
				628	int glue; /* ID that will "glue" nodes together */
				629
				630	cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
				631	cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
				632
				633	c = getchr();
				634
				635	#ifdef FEAT_MBYTE
				636	/* clen has the length of the current char, without composing chars */
				637	clen = (*mb_char2len)(c);
				638	if (has_mbyte && clen > 1)
				639	goto nfa_do_multibyte;
				640	#endif
				641	switch (c)
				642	{
				643	case Magic('^'):
				644	EMIT(NFA_BOL);
				645	break;
				646
				647	case Magic('$'):
				648	EMIT(NFA_EOL);
				649	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				650	had_eol = TRUE;
				651	#endif
				652	break;
				653
				654	case Magic('<'):
				655	EMIT(NFA_BOW);
				656	break;
				657
				658	case Magic('>'):
				659	EMIT(NFA_EOW);
				660	break;
				661
				662	case Magic('_'):
				663	c = no_Magic(getchr());
				664	if (c == '^') /* "\_^" is start-of-line */
				665	{
				666	EMIT(NFA_BOL);
				667	break;
				668	}
				669	if (c == '$') /* "\_$" is end-of-line */
				670	{
				671	EMIT(NFA_EOL);
				672	#if defined(FEAT_SYN_HL) \|\| defined(PROTO)
				673	had_eol = TRUE;
				674	#endif
				675	break;
				676	}
				677
				678	extra = ADD_NL;
				679
				680	/* "\_[" is collection plus newline */
				681	if (c == '[')
				682	/* TODO: make this work
				683	* goto collection; */
				684	return FAIL;
				685
				686	/* "\_x" is character class plus newline */
				687	/FALLTHROUGH/
				688
				689	/*
				690	* Character classes.
				691	*/
				692	case Magic('.'):
				693	case Magic('i'):
				694	case Magic('I'):
				695	case Magic('k'):
				696	case Magic('K'):
				697	case Magic('f'):
				698	case Magic('F'):
				699	case Magic('p'):
				700	case Magic('P'):
				701	case Magic('s'):
				702	case Magic('S'):
				703	case Magic('d'):
				704	case Magic('D'):
				705	case Magic('x'):
				706	case Magic('X'):
				707	case Magic('o'):
				708	case Magic('O'):
				709	case Magic('w'):
				710	case Magic('W'):
				711	case Magic('h'):
				712	case Magic('H'):
				713	case Magic('a'):
				714	case Magic('A'):
				715	case Magic('l'):
				716	case Magic('L'):
				717	case Magic('u'):
				718	case Magic('U'):
				719	p = vim_strchr(classchars, no_Magic(c));
				720	if (p == NULL)
				721	{
				722	return FAIL; /* runtime error */
				723	}
				724	#ifdef FEAT_MBYTE
				725	/* When '.' is followed by a composing char ignore the dot, so that
				726	* the composing char is matched here. */
				727	if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
				728	{
				729	c = getchr();
				730	goto nfa_do_multibyte;
				731	}
				732	#endif
				733	EMIT(nfa_classcodes[p - classchars]);
				734	if (extra == ADD_NL)
				735	{
				736	EMIT(NFA_NEWL);
				737	EMIT(NFA_OR);
				738	regflags \|= RF_HASNL;
				739	}
				740	break;
				741
				742	case Magic('n'):
				743	if (reg_string)
				744	/* In a string "\n" matches a newline character. */
				745	EMIT(NL);
				746	else
				747	{
				748	/* In buffer text "\n" matches the end of a line. */
				749	EMIT(NFA_NEWL);
				750	regflags \|= RF_HASNL;
				751	}
				752	break;
				753
				754	case Magic('('):
				755	if (nfa_reg(REG_PAREN) == FAIL)
				756	return FAIL; /* cascaded error */
				757	break;
				758
				759	case NUL:
				760	syntax_error = TRUE;
				761	EMSG_RET_FAIL(_("E865: (NFA) Regexp end encountered prematurely"));
				762
				763	case Magic('\|'):
				764	case Magic('&'):
				765	case Magic(')'):
				766	syntax_error = TRUE;
Bram Moolenaar	ba40447	2013-05-19 22:31:18 +0200	[diff] [blame]	767	EMSGN(_(e_misplaced), no_Magic(c));
Bram Moolenaar	fbc0d2e	2013-05-19 19:40:29 +0200	[diff] [blame]	768	return FAIL;
				769
				770	case Magic('='):
				771	case Magic('?'):
				772	case Magic('+'):
				773	case Magic('@'):
				774	case Magic('*'):
				775	case Magic('{'):
				776	/* these should follow an atom, not form an atom */
				777	syntax_error = TRUE;
Bram Moolenaar	ba40447	2013-05-19 22:31:18 +0200	[diff] [blame]	778	EMSGN(_(e_misplaced), no_Magic(c));
Bram Moolenaar	fbc0d2e	2013-05-19 19:40:29 +0200	[diff] [blame]	779	return FAIL;
				780
				781	case Magic('~'): /* previous substitute pattern */
				782	/* Not supported yet */
				783	return FAIL;
				784
				785	case Magic('1'):
				786	case Magic('2'):
				787	case Magic('3'):
				788	case Magic('4'):
				789	case Magic('5'):
				790	case Magic('6'):
				791	case Magic('7'):
				792	case Magic('8'):
				793	case Magic('9'):
				794	/* not supported yet */
				795	return FAIL;
				796
				797	case Magic('z'):
				798	c = no_Magic(getchr());
				799	switch (c)
				800	{
				801	case 's':
				802	EMIT(NFA_ZSTART);
				803	break;
				804	case 'e':
				805	EMIT(NFA_ZEND);
				806	nfa_has_zend = TRUE;
				807	/* TODO: Currently \ze does not work properly. */
				808	return FAIL;
				809	/* break; */
				810	case '1':
				811	case '2':
				812	case '3':
				813	case '4':
				814	case '5':
				815	case '6':
				816	case '7':
				817	case '8':
				818	case '9':
				819	case '(':
				820	/* \z1...\z9 and \z( not yet supported */
				821	return FAIL;
				822	default:
				823	syntax_error = TRUE;
Bram Moolenaar	ba40447	2013-05-19 22:31:18 +0200	[diff] [blame]	824	EMSGN(_("E867: (NFA) Unknown operator '\\z%c'"),
Bram Moolenaar	fbc0d2e	2013-05-19 19:40:29 +0200	[diff] [blame]	825	no_Magic(c));
				826	return FAIL;
				827	}
				828	break;
				829
				830	case Magic('%'):
				831	c = no_Magic(getchr());
				832	switch (c)
				833	{
				834	/* () without a back reference */
				835	case '(':
				836	if (nfa_reg(REG_NPAREN) == FAIL)
				837	return FAIL;
				838	EMIT(NFA_NOPEN);
				839	break;
				840
				841	case 'd': /* %d123 decimal */
				842	case 'o': /* %o123 octal */
				843	case 'x': /* %xab hex 2 */
				844	case 'u': /* %uabcd hex 4 */
				845	case 'U': /* %U1234abcd hex 8 */
				846	/* Not yet supported */
				847	return FAIL;
				848
				849	c = coll_get_char();
				850	#ifdef FEAT_MBYTE
				851	if ((*mb_char2len)(c) > 1)
				852	{
				853	EMIT_MBYTE(c);
				854	}
				855	else
				856	#endif
				857	EMIT(c);
				858	break;
				859
				860	/* Catch \%^ and \%$ regardless of where they appear in the
				861	* pattern -- regardless of whether or not it makes sense. */
				862	case '^':
				863	EMIT(NFA_BOF);
				864	/* Not yet supported */
				865	return FAIL;
				866	break;
				867
				868	case '$':
				869	EMIT(NFA_EOF);
				870	/* Not yet supported */
				871	return FAIL;
				872	break;
				873
				874	case '#':
				875	/* not supported yet */
				876	return FAIL;
				877	break;
				878
				879	case 'V':
				880	/* not supported yet */
				881	return FAIL;
				882	break;
				883
				884	case '[':
				885	/* \%[abc] not supported yet */
				886	return FAIL;
				887
				888	default:
				889	/* not supported yet */
				890	return FAIL;
				891	}
				892	break;
				893
				894	/* collection: */
				895	case Magic('['):
				896	/*
				897	* Glue is emitted between several atoms from the [].
				898	* It is either NFA_OR, or NFA_CONCAT.
				899	*
				900	* [abc] expands to 'a b NFA_OR c NFA_OR' (in postfix notation)
				901	* [^abc] expands to 'a NFA_NOT b NFA_NOT NFA_CONCAT c NFA_NOT
				902	* NFA_CONCAT NFA_END_NEG_RANGE NFA_CONCAT' (in postfix
				903	* notation)
				904	*
				905	*/
				906
				907
				908	/* Emit negation atoms, if needed.
				909	* The CONCAT below merges the NOT with the previous node. */
				910	#define TRY_NEG() \
				911	if (negated == TRUE) \
				912	{ \
				913	EMIT(NFA_NOT); \
				914	}
				915
				916	/* Emit glue between important nodes : CONCAT or OR. */
				917	#define EMIT_GLUE() \
				918	if (first == FALSE) \
				919	EMIT(glue); \
				920	else \
				921	first = FALSE;
				922
				923	p = regparse;
				924	endp = skip_anyof(p);
				925	if (*endp == ']')
				926	{
				927	/*
				928	* Try to reverse engineer character classes. For example,
				929	* recognize that [0-9] stands for \d and [A-Za-z_] with \h,
				930	* and perform the necessary substitutions in the NFA.
				931	*/
				932	result = nfa_recognize_char_class(regparse, endp,
				933	extra == ADD_NL);
				934	if (result != FAIL)
				935	{
				936	if (result >= NFA_DIGIT && result <= NFA_NUPPER)
				937	EMIT(result);
				938	else /* must be char class + newline */
				939	{
				940	EMIT(result - ADD_NL);
				941	EMIT(NFA_NEWL);
				942	EMIT(NFA_OR);
				943	}
				944	regparse = endp;
				945	nfa_inc(&regparse);
				946	return OK;
				947	}
				948	/*
				949	* Failed to recognize a character class. Use the simple
				950	* version that turns [abc] into 'a' OR 'b' OR 'c'
				951	*/
				952	startc = endc = oldstartc = -1;
				953	first = TRUE; /* Emitting first atom in this sequence? */
				954	negated = FALSE;
				955	glue = NFA_OR;
				956	if (regparse == '^') / negated range */
				957	{
				958	negated = TRUE;
				959	glue = NFA_CONCAT;
				960	nfa_inc(&regparse);
				961	}
				962	if (*regparse == '-')
				963	{
				964	startc = '-';
				965	EMIT(startc);
				966	TRY_NEG();
				967	EMIT_GLUE();
				968	nfa_inc(&regparse);
				969	}
				970	/* Emit the OR branches for each character in the [] */
				971	emit_range = FALSE;
				972	while (regparse < endp)
				973	{
				974	oldstartc = startc;
				975	startc = -1;
				976	got_coll_char = FALSE;
				977	if (*regparse == '[')
				978	{
				979	/* Check for [: :], [= =], [. .] */
				980	equiclass = collclass = 0;
				981	charclass = get_char_class(&regparse);
				982	if (charclass == CLASS_NONE)
				983	{
				984	equiclass = get_equi_class(&regparse);
				985	if (equiclass == 0)
				986	collclass = get_coll_element(&regparse);
				987	}
				988
				989	/* Character class like [:alpha:] */
				990	if (charclass != CLASS_NONE)
				991	{
				992	switch (charclass)
				993	{
				994	case CLASS_ALNUM:
				995	EMIT(NFA_CLASS_ALNUM);
				996	break;
				997	case CLASS_ALPHA:
				998	EMIT(NFA_CLASS_ALPHA);
				999	break;
				1000	case CLASS_BLANK:
				1001	EMIT(NFA_CLASS_BLANK);
				1002	break;
				1003	case CLASS_CNTRL:
				1004	EMIT(NFA_CLASS_CNTRL);
				1005	break;
				1006	case CLASS_DIGIT:
				1007	EMIT(NFA_CLASS_DIGIT);
				1008	break;
				1009	case CLASS_GRAPH:
				1010	EMIT(NFA_CLASS_GRAPH);
				1011	break;
				1012	case CLASS_LOWER:
				1013	EMIT(NFA_CLASS_LOWER);
				1014	break;
				1015	case CLASS_PRINT:
				1016	EMIT(NFA_CLASS_PRINT);
				1017	break;
				1018	case CLASS_PUNCT:
				1019	EMIT(NFA_CLASS_PUNCT);
				1020	break;
				1021	case CLASS_SPACE:
				1022	EMIT(NFA_CLASS_SPACE);
				1023	break;
				1024	case CLASS_UPPER:
				1025	EMIT(NFA_CLASS_UPPER);
				1026	break;
				1027	case CLASS_XDIGIT:
				1028	EMIT(NFA_CLASS_XDIGIT);
				1029	break;
				1030	case CLASS_TAB:
				1031	EMIT(NFA_CLASS_TAB);
				1032	break;
				1033	case CLASS_RETURN:
				1034	EMIT(NFA_CLASS_RETURN);
				1035	break;
				1036	case CLASS_BACKSPACE:
				1037	EMIT(NFA_CLASS_BACKSPACE);
				1038	break;
				1039	case CLASS_ESCAPE:
				1040	EMIT(NFA_CLASS_ESCAPE);
				1041	break;
				1042	}
				1043	TRY_NEG();
				1044	EMIT_GLUE();
				1045	continue;
				1046	}
				1047	/* Try equivalence class [=a=] and the like */
				1048	if (equiclass != 0)
				1049	{
				1050	result = nfa_emit_equi_class(equiclass, negated);
				1051	if (result == FAIL)
				1052	{
				1053	/* should never happen */
				1054	EMSG_RET_FAIL(_("E868: Error building NFA with equivalence class!"));
				1055	}
				1056	EMIT_GLUE();
				1057	continue;
				1058	}
				1059	/* Try collating class like [. .] */
				1060	if (collclass != 0)
				1061	{
				1062	startc = collclass; /* allow [.a.]-x as a range */
				1063	/* Will emit the proper atom at the end of the
				1064	* while loop. */
				1065	}
				1066	}
				1067	/* Try a range like 'a-x' or '\t-z' */
				1068	if (*regparse == '-')
				1069	{
				1070	emit_range = TRUE;
				1071	startc = oldstartc;
				1072	nfa_inc(&regparse);
				1073	continue; /* reading the end of the range */
				1074	}
				1075
				1076	/* Now handle simple and escaped characters.
				1077	* Only "\]", "\^", "\]" and "\\" are special in Vi. Vim
				1078	* accepts "\t", "\e", etc., but only when the 'l' flag in
				1079	* 'cpoptions' is not included.
				1080	* Posix doesn't recognize backslash at all.
				1081	*/
				1082	if (*regparse == '\\'
				1083	&& !cpo_bsl
				1084	&& regparse + 1 <= endp
				1085	&& (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
				1086	\|\| (!cpo_lit
				1087	&& vim_strchr(REGEXP_ABBR, regparse[1])
				1088	!= NULL)
				1089	)
				1090	)
				1091	{
				1092	nfa_inc(&regparse);
				1093
				1094	if (regparse == 'n' \|\| regparse == 'n')
				1095	startc = reg_string ? NL : NFA_NEWL;
				1096	else
				1097	if (*regparse == 'd'
				1098	\|\| *regparse == 'o'
				1099	\|\| *regparse == 'x'
				1100	\|\| *regparse == 'u'
				1101	\|\| *regparse == 'U'
				1102	)
				1103	{
				1104	/* TODO(RE) This needs more testing */
				1105	startc = coll_get_char();
				1106	got_coll_char = TRUE;
				1107	nfa_dec(&regparse);
				1108	}
				1109	else
				1110	{
				1111	/* \r,\t,\e,\b */
				1112	startc = backslash_trans(*regparse);
				1113	}
				1114	}
				1115
				1116	/* Normal printable char */
				1117	if (startc == -1)
				1118	#ifdef FEAT_MBYTE
				1119	startc = (*mb_ptr2char)(regparse);
				1120	#else
				1121	startc = *regparse;
				1122	#endif
				1123
				1124	/* Previous char was '-', so this char is end of range. */
				1125	if (emit_range)
				1126	{
				1127	endc = startc; startc = oldstartc;
				1128	if (startc > endc)
				1129	EMSG_RET_FAIL(_(e_invrange));
				1130	#ifdef FEAT_MBYTE
				1131	if (has_mbyte && ((*mb_char2len)(startc) > 1
				1132	\|\| (*mb_char2len)(endc) > 1))
				1133	{
				1134	if (endc > startc + 256)
				1135	EMSG_RET_FAIL(_(e_invrange));
				1136	/* Emit the range. "startc" was already emitted, so
				1137	* skip it. */
				1138	for (c = startc + 1; c <= endc; c++)
				1139	{
				1140	if ((*mb_char2len)(c) > 1)
				1141	{
				1142	EMIT_MBYTE(c);
				1143	}
				1144	else
				1145	EMIT(c);
				1146	TRY_NEG();
				1147	EMIT_GLUE();
				1148	}
				1149	emit_range = FALSE;
				1150	}
				1151	else
				1152	#endif
				1153	{
				1154	#ifdef EBCDIC
				1155	int alpha_only = FALSE;
				1156
				1157	/* for alphabetical range skip the gaps
				1158	* 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'. */
				1159	if (isalpha(startc) && isalpha(endc))
				1160	alpha_only = TRUE;
				1161	#endif
				1162	/* Emit the range. "startc" was already emitted, so
				1163	* skip it. */
				1164	for (c = startc + 1; c <= endc; c++)
				1165	#ifdef EBCDIC
				1166	if (!alpha_only \|\| isalpha(startc))
				1167	#endif
				1168	{
				1169	EMIT(c);
				1170	TRY_NEG();
				1171	EMIT_GLUE();
				1172	}
				1173	emit_range = FALSE;
				1174	}
				1175	}
				1176	else
				1177	{
				1178	/*
				1179	* This char (startc) is not part of a range. Just
				1180	* emit it.
				1181	*
				1182	* Normally, simply emit startc. But if we get char
				1183	* code=0 from a collating char, then replace it with
				1184	* 0x0a.
				1185	*
				1186	* This is needed to completely mimic the behaviour of
				1187	* the backtracking engine.
				1188	*/
				1189	if (got_coll_char == TRUE && startc == 0)
				1190	EMIT(0x0a);
				1191	else
				1192	#ifdef FEAT_MBYTE
				1193	if ((*mb_char2len)(startc) > 1)
				1194	{
				1195	EMIT_MBYTE(startc);
				1196	}
				1197	else
				1198	#endif
				1199	EMIT(startc);
				1200	TRY_NEG();
				1201	EMIT_GLUE();
				1202	}
				1203
				1204	nfa_inc(&regparse);
				1205	} /* while (p < endp) */
				1206
				1207	nfa_dec(&regparse);
				1208	if (regparse == '-') / if last, '-' is just a char */
				1209	{
				1210	EMIT('-');
				1211	TRY_NEG();
				1212	EMIT_GLUE();
				1213	}
				1214	nfa_inc(&regparse);
				1215
				1216	if (extra == ADD_NL) /* \_[] also matches \n */
				1217	{
				1218	EMIT(reg_string ? NL : NFA_NEWL);
				1219	TRY_NEG();
				1220	EMIT_GLUE();
				1221	}
				1222
				1223	/* skip the trailing ] */
				1224	regparse = endp;
				1225	nfa_inc(&regparse);
				1226	if (negated == TRUE)
				1227	{
				1228	/* Mark end of negated char range */
				1229	EMIT(NFA_END_NEG_RANGE);
				1230	EMIT(NFA_CONCAT);
				1231	}
				1232	return OK;
				1233	} /* if exists closing ] */
				1234	else if (reg_strict)
				1235	{
				1236	syntax_error = TRUE;
				1237	EMSG_RET_FAIL(_(e_missingbracket));
				1238	}
				1239
				1240	/* FALLTHROUGH */
				1241	default:
				1242	{
				1243	#ifdef FEAT_MBYTE
				1244	int plen;
				1245
				1246	nfa_do_multibyte:
				1247	/* length of current char, with composing chars,
				1248	* from pointer */
				1249	plen = (*mb_ptr2len)(old_regparse);
				1250	if (enc_utf8 && clen != plen)
				1251	{
				1252	/* A composing character is always handled as a
				1253	* separate atom, surrounded by NFA_COMPOSING and
				1254	* NFA_END_COMPOSING. Note that right now we are
				1255	* building the postfix form, not the NFA itself;
				1256	* a composing char could be: a, b, c, NFA_COMPOSING
				1257	* where 'a', 'b', 'c' are chars with codes > 256.
				1258	*/
				1259	EMIT_COMPOSING_UTF(old_regparse);
				1260	regparse = old_regparse + plen;
				1261	}
				1262	else
				1263	/* A multi-byte character is always handled as a
				1264	* separate atom, surrounded by NFA_MULTIBYTE and
				1265	* NFA_END_MULTIBYTE */
				1266	if (plen > 1)
				1267	{
				1268	EMIT_MBYTE(c);
				1269	}
				1270	else
				1271	#endif
				1272	{
				1273	c = no_Magic(c);
				1274	EMIT(c);
				1275	}
				1276	return OK;
				1277	}
				1278	}
				1279
				1280	#undef TRY_NEG
				1281	#undef EMIT_GLUE
				1282
				1283	return OK;
				1284	}
				1285
				1286	/*
				1287	* Parse something followed by possible [*+=].
				1288	*
				1289	* A piece is an atom, possibly followed by a multi, an indication of how many
				1290	* times the atom can be matched. Example: "a*" matches any sequence of "a"
				1291	* characters: "", "a", "aa", etc.
				1292	*
				1293	* piece ::= atom
				1294	* or atom multi
				1295	*/
				1296	static int
				1297	nfa_regpiece()
				1298	{
				1299	int i;
				1300	int op;
				1301	int ret;
				1302	long minval, maxval;
				1303	int greedy = TRUE; /* Braces are prefixed with '-' ? */
				1304	char_u old_regparse, new_regparse;
				1305	int c2;
				1306	int old_post_ptr, my_post_start;
				1307	int old_regnpar;
				1308	int quest;
				1309
				1310	/* Save the current position in the regexp, so that we can use it if
				1311	* <atom>{m,n} is next. */
				1312	old_regparse = regparse;
				1313	/* Save current number of open parenthesis, so we can use it if
				1314	* <atom>{m,n} is next */
				1315	old_regnpar = regnpar;
				1316	/* store current pos in the postfix form, for \{m,n} involving 0s */
				1317	my_post_start = post_ptr;
				1318
				1319	ret = nfa_regatom();
				1320	if (ret == FAIL)
				1321	return FAIL; /* cascaded error */
				1322
				1323	op = peekchr();
				1324	if (re_multi_type(op) == NOT_MULTI)
				1325	return OK;
				1326
				1327	skipchr();
				1328	switch (op)
				1329	{
				1330	case Magic('*'):
				1331	EMIT(NFA_STAR);
				1332	break;
				1333
				1334	case Magic('+'):
				1335	/*
				1336	* Trick: Normally, (a*)\+ would match the whole input "aaa". The
				1337	* first and only submatch would be "aaa". But the backtracking
				1338	* engine interprets the plus as "try matching one more time", and
				1339	* a* matches a second time at the end of the input, the empty
				1340	* string.
				1341	* The submatch will the empty string.
				1342	*
				1343	* In order to be consistent with the old engine, we disable
				1344	* NFA_PLUS, and replace <atom>+ with <atom><atom>*
				1345	*/
				1346	/* EMIT(NFA_PLUS); */
				1347	regnpar = old_regnpar;
				1348	regparse = old_regparse;
				1349	curchr = -1;
				1350	if (nfa_regatom() == FAIL)
				1351	return FAIL;
				1352	EMIT(NFA_STAR);
				1353	EMIT(NFA_CONCAT);
				1354	skipchr(); /* skip the \+ */
				1355	break;
				1356
				1357	case Magic('@'):
				1358	op = no_Magic(getchr());
				1359	switch(op)
				1360	{
				1361	case '=':
				1362	EMIT(NFA_PREV_ATOM_NO_WIDTH);
				1363	break;
				1364	case '!':
				1365	case '<':
				1366	case '>':
				1367	/* Not supported yet */
				1368	return FAIL;
				1369	default:
				1370	syntax_error = TRUE;
Bram Moolenaar	ba40447	2013-05-19 22:31:18 +0200	[diff] [blame]	1371	EMSGN(_("E869: (NFA) Unknown operator '\\@%c'"), op);
Bram Moolenaar	fbc0d2e	2013-05-19 19:40:29 +0200	[diff] [blame]	1372	return FAIL;
				1373	}
				1374	break;
				1375
				1376	case Magic('?'):
				1377	case Magic('='):
				1378	EMIT(NFA_QUEST);
				1379	break;
				1380
				1381	case Magic('{'):
				1382	/* a{2,5} will expand to 'aaa?a?a?'
				1383	* a{-1,3} will expand to 'aa??a??', where ?? is the nongreedy
				1384	* version of '?'
				1385	* \v(ab){2,3} will expand to '(ab)(ab)(ab)?', where all the
				1386	* parenthesis have the same id
				1387	*/
				1388
				1389	greedy = TRUE;
				1390	c2 = peekchr();
				1391	if (c2 == '-' \|\| c2 == Magic('-'))
				1392	{
				1393	skipchr();
				1394	greedy = FALSE;
				1395	}
				1396	if (!read_limits(&minval, &maxval))
				1397	{
				1398	syntax_error = TRUE;
				1399	EMSG_RET_FAIL(_("E870: (NFA regexp) Error reading repetition limits"));
				1400	}
				1401	/* <atom>{0,inf}, <atom>{0,} and <atom>{} are equivalent to
				1402	* <atom>* */
				1403	if (minval == 0 && maxval == MAX_LIMIT && greedy)
				1404	{
				1405	EMIT(NFA_STAR);
				1406	break;
				1407	}
				1408
				1409	if (maxval > NFA_BRACES_MAXLIMIT)
				1410	{
				1411	/* This would yield a huge automaton and use too much memory.
				1412	* Revert to old engine */
				1413	return FAIL;
				1414	}
				1415
				1416	/* Special case: x{0} or x{-0} */
				1417	if (maxval == 0)
				1418	{
				1419	/* Ignore result of previous call to nfa_regatom() */
				1420	post_ptr = my_post_start;
				1421	/* NFA_SKIP_CHAR has 0-length and works everywhere */
				1422	EMIT(NFA_SKIP_CHAR);
				1423	return OK;
				1424	}
				1425
				1426	/* Ignore previous call to nfa_regatom() */
				1427	post_ptr = my_post_start;
				1428	/* Save pos after the repeated atom and the \{} */
				1429	new_regparse = regparse;
				1430
				1431	new_regparse = regparse;
				1432	quest = (greedy == TRUE? NFA_QUEST : NFA_QUEST_NONGREEDY);
				1433	for (i = 0; i < maxval; i++)
				1434	{
				1435	/* Goto beginning of the repeated atom */
				1436	regparse = old_regparse;
				1437	curchr = -1;
				1438	/* Restore count of parenthesis */
				1439	regnpar = old_regnpar;
				1440	old_post_ptr = post_ptr;
				1441	if (nfa_regatom() == FAIL)
				1442	return FAIL;
				1443	/* after "minval" times, atoms are optional */
				1444	if (i + 1 > minval)
				1445	EMIT(quest);
				1446	if (old_post_ptr != my_post_start)
				1447	EMIT(NFA_CONCAT);
				1448	}
				1449
				1450	/* Go to just after the repeated atom and the \{} */
				1451	regparse = new_regparse;
				1452	curchr = -1;
				1453
				1454	break;
				1455
				1456
				1457	default:
				1458	break;
				1459	} /* end switch */
				1460
				1461	if (re_multi_type(peekchr()) != NOT_MULTI)
				1462	{
				1463	/* Can't have a multi follow a multi. */
				1464	syntax_error = TRUE;
				1465	EMSG_RET_FAIL(_("E871: (NFA regexp) Can't have a multi follow a multi !"));
				1466	}
				1467
				1468	return OK;
				1469	}
				1470
				1471	/*
				1472	* Parse one or more pieces, concatenated. It matches a match for the
				1473	* first piece, followed by a match for the second piece, etc. Example:
				1474	* "f[0-9]b", first matches "f", then a digit and then "b".
				1475	*
				1476	* concat ::= piece
				1477	* or piece piece
				1478	* or piece piece piece
				1479	* etc.
				1480	*/
				1481	static int
				1482	nfa_regconcat()
				1483	{
				1484	int cont = TRUE;
				1485	int first = TRUE;
				1486
				1487	while (cont)
				1488	{
				1489	switch (peekchr())
				1490	{
				1491	case NUL:
				1492	case Magic('\|'):
				1493	case Magic('&'):
				1494	case Magic(')'):
				1495	cont = FALSE;
				1496	break;
				1497
				1498	case Magic('Z'):
				1499	#ifdef FEAT_MBYTE
				1500	regflags \|= RF_ICOMBINE;
				1501	#endif
				1502	skipchr_keepstart();
				1503	break;
				1504	case Magic('c'):
				1505	regflags \|= RF_ICASE;
				1506	skipchr_keepstart();
				1507	break;
				1508	case Magic('C'):
				1509	regflags \|= RF_NOICASE;
				1510	skipchr_keepstart();
				1511	break;
				1512	case Magic('v'):
				1513	reg_magic = MAGIC_ALL;
				1514	skipchr_keepstart();
				1515	curchr = -1;
				1516	break;
				1517	case Magic('m'):
				1518	reg_magic = MAGIC_ON;
				1519	skipchr_keepstart();
				1520	curchr = -1;
				1521	break;
				1522	case Magic('M'):
				1523	reg_magic = MAGIC_OFF;
				1524	skipchr_keepstart();
				1525	curchr = -1;
				1526	break;
				1527	case Magic('V'):
				1528	reg_magic = MAGIC_NONE;
				1529	skipchr_keepstart();
				1530	curchr = -1;
				1531	break;
				1532
				1533	default:
				1534	if (nfa_regpiece() == FAIL)
				1535	return FAIL;
				1536	if (first == FALSE)
				1537	EMIT(NFA_CONCAT);
				1538	else
				1539	first = FALSE;
				1540	break;
				1541	}
				1542	}
				1543
				1544	return OK;
				1545	}
				1546
				1547	/*
				1548	* Parse a branch, one or more concats, separated by "\&". It matches the
				1549	* last concat, but only if all the preceding concats also match at the same
				1550	* position. Examples:
				1551	* "foobeep\&..." matches "foo" in "foobeep".
				1552	* ".Peter\&.Bob" matches in a line containing both "Peter" and "Bob"
				1553	*
				1554	* branch ::= concat
				1555	* or concat \& concat
				1556	* or concat \& concat \& concat
				1557	* etc.
				1558	*/
				1559	static int
				1560	nfa_regbranch()
				1561	{
				1562	int ch;
				1563	int *old_post_ptr;
				1564
				1565	old_post_ptr = post_ptr;
				1566
				1567	/* First branch, possibly the only one */
				1568	if (nfa_regconcat() == FAIL)
				1569	return FAIL;
				1570
				1571	ch = peekchr();
				1572	/* Try next concats */
				1573	while (ch == Magic('&'))
				1574	{
				1575	skipchr();
				1576	EMIT(NFA_NOPEN);
				1577	EMIT(NFA_PREV_ATOM_NO_WIDTH);
				1578	old_post_ptr = post_ptr;
				1579	if (nfa_regconcat() == FAIL)
				1580	return FAIL;
				1581	/* if concat is empty, skip a input char. But do emit a node */
				1582	if (old_post_ptr == post_ptr)
				1583	EMIT(NFA_SKIP_CHAR);
				1584	EMIT(NFA_CONCAT);
				1585	ch = peekchr();
				1586	}
				1587
				1588	/* Even if a branch is empty, emit one node for it */
				1589	if (old_post_ptr == post_ptr)
				1590	EMIT(NFA_SKIP_CHAR);
				1591
				1592	return OK;
				1593	}
				1594
				1595	/*
				1596	* Parse a pattern, one or more branches, separated by "\\|". It matches
				1597	* anything that matches one of the branches. Example: "foo\\|beep" matches
				1598	* "foo" and matches "beep". If more than one branch matches, the first one
				1599	* is used.
				1600	*
				1601	* pattern ::= branch
				1602	* or branch \\| branch
				1603	* or branch \\| branch \\| branch
				1604	* etc.
				1605	*/
				1606	static int
				1607	nfa_reg(paren)
				1608	int paren; /* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
				1609	{
				1610	int parno = 0;
				1611
				1612	#ifdef FEAT_SYN_HL
				1613	#endif
				1614	if (paren == REG_PAREN)
				1615	{
				1616	if (regnpar >= NSUBEXP) /* Too many `(' */
				1617	{
				1618	syntax_error = TRUE;
				1619	EMSG_RET_FAIL(_("E872: (NFA regexp) Too many '('"));
				1620	}
				1621	parno = regnpar++;
				1622	}
				1623
				1624	if (nfa_regbranch() == FAIL)
				1625	return FAIL; /* cascaded error */
				1626
				1627	while (peekchr() == Magic('\|'))
				1628	{
				1629	skipchr();
				1630	if (nfa_regbranch() == FAIL)
				1631	return FAIL; /* cascaded error */
				1632	EMIT(NFA_OR);
				1633	}
				1634
				1635	/* Check for proper termination. */
				1636	if (paren != REG_NOPAREN && getchr() != Magic(')'))
				1637	{
				1638	syntax_error = TRUE;
				1639	if (paren == REG_NPAREN)
				1640	EMSG2_RET_FAIL(_(e_unmatchedpp), reg_magic == MAGIC_ALL);
				1641	else
				1642	EMSG2_RET_FAIL(_(e_unmatchedp), reg_magic == MAGIC_ALL);
				1643	}
				1644	else if (paren == REG_NOPAREN && peekchr() != NUL)
				1645	{
				1646	syntax_error = TRUE;
				1647	if (peekchr() == Magic(')'))
				1648	EMSG2_RET_FAIL(_(e_unmatchedpar), reg_magic == MAGIC_ALL);
				1649	else
				1650	EMSG_RET_FAIL(_("E873: (NFA regexp) proper termination error"));
				1651	}
				1652	/*
				1653	* Here we set the flag allowing back references to this set of
				1654	* parentheses.
				1655	*/
				1656	if (paren == REG_PAREN)
				1657	{
				1658	had_endbrace[parno] = TRUE; /* have seen the close paren */
				1659	EMIT(NFA_MOPEN + parno);
				1660	}
				1661
				1662	return OK;
				1663	}
				1664
				1665	typedef struct
				1666	{
				1667	char_u *start[NSUBEXP];
				1668	char_u *end[NSUBEXP];
				1669	lpos_T startpos[NSUBEXP];
				1670	lpos_T endpos[NSUBEXP];
				1671	} regsub_T;
				1672
				1673	static int nfa_regmatch __ARGS((nfa_state_T start, regsub_T submatch, regsub_T *m));
				1674
				1675	#ifdef DEBUG
				1676	static char_u code[50];
				1677
				1678	static void
				1679	nfa_set_code(c)
				1680	int c;
				1681	{
				1682	int addnl = FALSE;
				1683
				1684	if (c >= NFA_FIRST_NL && c <= NFA_LAST_NL)
				1685	{
				1686	addnl = TRUE;
				1687	c -= ADD_NL;
				1688	}
				1689
				1690	STRCPY(code, "");
				1691	switch (c)
				1692	{
				1693	case NFA_MATCH: STRCPY(code, "NFA_MATCH "); break;
				1694	case NFA_SPLIT: STRCPY(code, "NFA_SPLIT "); break;
				1695	case NFA_CONCAT: STRCPY(code, "NFA_CONCAT "); break;
				1696	case NFA_NEWL: STRCPY(code, "NFA_NEWL "); break;
				1697	case NFA_ZSTART: STRCPY(code, "NFA_ZSTART"); break;
				1698	case NFA_ZEND: STRCPY(code, "NFA_ZEND"); break;
				1699
				1700	case NFA_PREV_ATOM_NO_WIDTH:
				1701	STRCPY(code, "NFA_PREV_ATOM_NO_WIDTH"); break;
				1702	case NFA_NOPEN: STRCPY(code, "NFA_MOPEN_INVISIBLE"); break;
				1703	case NFA_NCLOSE: STRCPY(code, "NFA_MCLOSE_INVISIBLE"); break;
				1704	case NFA_START_INVISIBLE: STRCPY(code, "NFA_START_INVISIBLE"); break;
				1705	case NFA_END_INVISIBLE: STRCPY(code, "NFA_END_INVISIBLE"); break;
				1706
				1707	case NFA_MULTIBYTE: STRCPY(code, "NFA_MULTIBYTE"); break;
				1708	case NFA_END_MULTIBYTE: STRCPY(code, "NFA_END_MULTIBYTE"); break;
				1709
				1710	case NFA_COMPOSING: STRCPY(code, "NFA_COMPOSING"); break;
				1711	case NFA_END_COMPOSING: STRCPY(code, "NFA_END_COMPOSING"); break;
				1712
				1713	case NFA_MOPEN + 0:
				1714	case NFA_MOPEN + 1:
				1715	case NFA_MOPEN + 2:
				1716	case NFA_MOPEN + 3:
				1717	case NFA_MOPEN + 4:
				1718	case NFA_MOPEN + 5:
				1719	case NFA_MOPEN + 6:
				1720	case NFA_MOPEN + 7:
				1721	case NFA_MOPEN + 8:
				1722	case NFA_MOPEN + 9:
				1723	STRCPY(code, "NFA_MOPEN(x)");
				1724	code[10] = c - NFA_MOPEN + '0';
				1725	break;
				1726	case NFA_MCLOSE + 0:
				1727	case NFA_MCLOSE + 1:
				1728	case NFA_MCLOSE + 2:
				1729	case NFA_MCLOSE + 3:
				1730	case NFA_MCLOSE + 4:
				1731	case NFA_MCLOSE + 5:
				1732	case NFA_MCLOSE + 6:
				1733	case NFA_MCLOSE + 7:
				1734	case NFA_MCLOSE + 8:
				1735	case NFA_MCLOSE + 9:
				1736	STRCPY(code, "NFA_MCLOSE(x)");
				1737	code[11] = c - NFA_MCLOSE + '0';
				1738	break;
				1739	case NFA_EOL: STRCPY(code, "NFA_EOL "); break;
				1740	case NFA_BOL: STRCPY(code, "NFA_BOL "); break;
				1741	case NFA_EOW: STRCPY(code, "NFA_EOW "); break;
				1742	case NFA_BOW: STRCPY(code, "NFA_BOW "); break;
				1743	case NFA_STAR: STRCPY(code, "NFA_STAR "); break;
				1744	case NFA_PLUS: STRCPY(code, "NFA_PLUS "); break;
				1745	case NFA_NOT: STRCPY(code, "NFA_NOT "); break;
				1746	case NFA_SKIP_CHAR: STRCPY(code, "NFA_SKIP_CHAR"); break;
				1747	case NFA_OR: STRCPY(code, "NFA_OR"); break;
				1748	case NFA_QUEST: STRCPY(code, "NFA_QUEST"); break;
				1749	case NFA_QUEST_NONGREEDY: STRCPY(code, "NFA_QUEST_NON_GREEDY"); break;
				1750	case NFA_END_NEG_RANGE: STRCPY(code, "NFA_END_NEG_RANGE"); break;
				1751	case NFA_CLASS_ALNUM: STRCPY(code, "NFA_CLASS_ALNUM"); break;
				1752	case NFA_CLASS_ALPHA: STRCPY(code, "NFA_CLASS_ALPHA"); break;
				1753	case NFA_CLASS_BLANK: STRCPY(code, "NFA_CLASS_BLANK"); break;
				1754	case NFA_CLASS_CNTRL: STRCPY(code, "NFA_CLASS_CNTRL"); break;
				1755	case NFA_CLASS_DIGIT: STRCPY(code, "NFA_CLASS_DIGIT"); break;
				1756	case NFA_CLASS_GRAPH: STRCPY(code, "NFA_CLASS_GRAPH"); break;
				1757	case NFA_CLASS_LOWER: STRCPY(code, "NFA_CLASS_LOWER"); break;
				1758	case NFA_CLASS_PRINT: STRCPY(code, "NFA_CLASS_PRINT"); break;
				1759	case NFA_CLASS_PUNCT: STRCPY(code, "NFA_CLASS_PUNCT"); break;
				1760	case NFA_CLASS_SPACE: STRCPY(code, "NFA_CLASS_SPACE"); break;
				1761	case NFA_CLASS_UPPER: STRCPY(code, "NFA_CLASS_UPPER"); break;
				1762	case NFA_CLASS_XDIGIT: STRCPY(code, "NFA_CLASS_XDIGIT"); break;
				1763	case NFA_CLASS_TAB: STRCPY(code, "NFA_CLASS_TAB"); break;
				1764	case NFA_CLASS_RETURN: STRCPY(code, "NFA_CLASS_RETURN"); break;
				1765	case NFA_CLASS_BACKSPACE: STRCPY(code, "NFA_CLASS_BACKSPACE"); break;
				1766	case NFA_CLASS_ESCAPE: STRCPY(code, "NFA_CLASS_ESCAPE"); break;
				1767
				1768	case NFA_ANY: STRCPY(code, "NFA_ANY"); break;
				1769	case NFA_IDENT: STRCPY(code, "NFA_IDENT"); break;
				1770	case NFA_SIDENT:STRCPY(code, "NFA_SIDENT"); break;
				1771	case NFA_KWORD: STRCPY(code, "NFA_KWORD"); break;
				1772	case NFA_SKWORD:STRCPY(code, "NFA_SKWORD"); break;
				1773	case NFA_FNAME: STRCPY(code, "NFA_FNAME"); break;
				1774	case NFA_SFNAME:STRCPY(code, "NFA_SFNAME"); break;
				1775	case NFA_PRINT: STRCPY(code, "NFA_PRINT"); break;
				1776	case NFA_SPRINT:STRCPY(code, "NFA_SPRINT"); break;
				1777	case NFA_WHITE: STRCPY(code, "NFA_WHITE"); break;
				1778	case NFA_NWHITE:STRCPY(code, "NFA_NWHITE"); break;
				1779	case NFA_DIGIT: STRCPY(code, "NFA_DIGIT"); break;
				1780	case NFA_NDIGIT:STRCPY(code, "NFA_NDIGIT"); break;
				1781	case NFA_HEX: STRCPY(code, "NFA_HEX"); break;
				1782	case NFA_NHEX: STRCPY(code, "NFA_NHEX"); break;
				1783	case NFA_OCTAL: STRCPY(code, "NFA_OCTAL"); break;
				1784	case NFA_NOCTAL:STRCPY(code, "NFA_NOCTAL"); break;
				1785	case NFA_WORD: STRCPY(code, "NFA_WORD"); break;
				1786	case NFA_NWORD: STRCPY(code, "NFA_NWORD"); break;
				1787	case NFA_HEAD: STRCPY(code, "NFA_HEAD"); break;
				1788	case NFA_NHEAD: STRCPY(code, "NFA_NHEAD"); break;
				1789	case NFA_ALPHA: STRCPY(code, "NFA_ALPHA"); break;
				1790	case NFA_NALPHA:STRCPY(code, "NFA_NALPHA"); break;
				1791	case NFA_LOWER: STRCPY(code, "NFA_LOWER"); break;
				1792	case NFA_NLOWER:STRCPY(code, "NFA_NLOWER"); break;
				1793	case NFA_UPPER: STRCPY(code, "NFA_UPPER"); break;
				1794	case NFA_NUPPER:STRCPY(code, "NFA_NUPPER"); break;
				1795
				1796	default:
				1797	STRCPY(code, "CHAR(x)");
				1798	code[5] = c;
				1799	}
				1800
				1801	if (addnl == TRUE)
				1802	STRCAT(code, " + NEWLINE ");
				1803
				1804	}
				1805
				1806	#ifdef ENABLE_LOG
				1807	static FILE *log_fd;
				1808
				1809	/*
				1810	* Print the postfix notation of the current regexp.
				1811	*/
				1812	static void
				1813	nfa_postfix_dump(expr, retval)
				1814	char_u *expr;
				1815	int retval;
				1816	{
				1817	int *p;
				1818	FILE *f;
				1819
				1820	f = fopen("LOG.log", "a");
				1821	if (f != NULL)
				1822	{
				1823	fprintf(f, "\n-------------------------\n");
				1824	if (retval == FAIL)
				1825	fprintf(f, ">>> NFA engine failed ... \n");
				1826	else if (retval == OK)
				1827	fprintf(f, ">>> NFA engine succeeded !\n");
				1828	fprintf(f, "Regexp: \"%s\"\nPostfix notation (char): \"", expr);
				1829	for (p=post_start; *p; p++)
				1830	{
				1831	nfa_set_code(*p);
				1832	fprintf(f, "%s, ", code);
				1833	}
				1834	fprintf(f, "\"\nPostfix notation (int): ");
				1835	for (p=post_start; *p; p++)
				1836	fprintf(f, "%d ", *p);
				1837	fprintf(f, "\n\n");
				1838	fclose(f);
				1839	}
				1840	}
				1841
				1842	/*
				1843	* Print the NFA starting with a root node "state".
				1844	*/
				1845	static void
				1846	nfa_print_state(debugf, state, ident)
				1847	FILE *debugf;
				1848	nfa_state_T *state;
				1849	int ident;
				1850	{
				1851	int i;
				1852
				1853	if (state == NULL)
				1854	return;
				1855
				1856	fprintf(debugf, "(%2d)", abs(state->id));
				1857	for (i = 0; i < ident; i++)
				1858	fprintf(debugf, "%c", ' ');
				1859
				1860	nfa_set_code(state->c);
				1861	fprintf(debugf, "%s %s (%d) (id=%d)\n",
				1862	state->negated ? "NOT" : "", code, state->c, abs(state->id));
				1863	if (state->id < 0)
				1864	return;
				1865
				1866	state->id = abs(state->id) * -1;
				1867	nfa_print_state(debugf, state->out, ident + 4);
				1868	nfa_print_state(debugf, state->out1, ident + 4);
				1869	}
				1870
				1871	/*
				1872	* Print the NFA state machine.
				1873	*/
				1874	static void
				1875	nfa_dump(prog)
				1876	nfa_regprog_T *prog;
				1877	{
				1878	FILE *debugf = fopen("LOG.log", "a");
				1879
				1880	if (debugf != NULL)
				1881	{
				1882	nfa_print_state(debugf, prog->start, 0);
				1883	fclose(debugf);
				1884	}
				1885	}
				1886	#endif /* ENABLE_LOG */
				1887	#endif /* DEBUG */
				1888
				1889	/*
				1890	* Parse r.e. @expr and convert it into postfix form.
				1891	* Return the postfix string on success, NULL otherwise.
				1892	*/
				1893	static int *
				1894	re2post()
				1895	{
				1896	if (nfa_reg(REG_NOPAREN) == FAIL)
				1897	return NULL;
				1898	EMIT(NFA_MOPEN);
				1899	return post_start;
				1900	}
				1901
				1902	/* NB. Some of the code below is inspired by Russ's. */
				1903
				1904	/*
				1905	* Represents an NFA state plus zero or one or two arrows exiting.
				1906	* if c == MATCH, no arrows out; matching state.
				1907	* If c == SPLIT, unlabeled arrows to out and out1 (if != NULL).
				1908	* If c < 256, labeled arrow with character c to out.
				1909	*/
				1910
				1911	static nfa_state_T state_ptr; / points to nfa_prog->state */
				1912
				1913	/*
				1914	* Allocate and initialize nfa_state_T.
				1915	*/
				1916	static nfa_state_T *
				1917	new_state(c, out, out1)
				1918	int c;
				1919	nfa_state_T *out;
				1920	nfa_state_T *out1;
				1921	{
				1922	nfa_state_T *s;
				1923
				1924	if (istate >= nstate)
				1925	return NULL;
				1926
				1927	s = &state_ptr[istate++];
				1928
				1929	s->c = c;
				1930	s->out = out;
				1931	s->out1 = out1;
				1932
				1933	s->id = istate;
				1934	s->lastlist = 0;
				1935	s->lastthread = NULL;
				1936	s->visits = 0;
				1937	s->negated = FALSE;
				1938
				1939	return s;
				1940	}
				1941
				1942	/*
				1943	* A partially built NFA without the matching state filled in.
				1944	* Frag_T.start points at the start state.
				1945	* Frag_T.out is a list of places that need to be set to the
				1946	* next state for this fragment.
				1947	*/
				1948	typedef union Ptrlist Ptrlist;
				1949	struct Frag
				1950	{
				1951	nfa_state_T *start;
				1952	Ptrlist *out;
				1953	};
				1954	typedef struct Frag Frag_T;
				1955
				1956	static Frag_T frag __ARGS((nfa_state_T start, Ptrlist out));
				1957	static Ptrlist list1 __ARGS((nfa_state_T *outp));
				1958	static void patch __ARGS((Ptrlist l, nfa_state_T s));
				1959	static Ptrlist append __ARGS((Ptrlist l1, Ptrlist *l2));
				1960	static void st_push __ARGS((Frag_T s, Frag_T *p, Frag_T stack_end));
				1961	static Frag_T st_pop __ARGS((Frag_T *p, Frag_T stack));
				1962
				1963	/*
				1964	* Initialize Frag_T struct.
				1965	*/
				1966	static Frag_T
				1967	frag(start, out)
				1968	nfa_state_T *start;
				1969	Ptrlist *out;
				1970	{
				1971	Frag_T n = { start, out };
				1972	return n;
				1973	}
				1974
				1975	/*
				1976	* Since the out pointers in the list are always
				1977	* uninitialized, we use the pointers themselves
				1978	* as storage for the Ptrlists.
				1979	*/
				1980	union Ptrlist
				1981	{
				1982	Ptrlist *next;
				1983	nfa_state_T *s;
				1984	};
				1985
				1986	/*
				1987	* Create singleton list containing just outp.
				1988	*/
				1989	static Ptrlist *
				1990	list1(outp)
				1991	nfa_state_T **outp;
				1992	{
				1993	Ptrlist *l;
				1994
				1995	l = (Ptrlist *)outp;
				1996	l->next = NULL;
				1997	return l;
				1998	}
				1999
				2000	/*
				2001	* Patch the list of states at out to point to start.
				2002	*/
				2003	static void
				2004	patch(l, s)
				2005	Ptrlist *l;
				2006	nfa_state_T *s;
				2007	{
				2008	Ptrlist *next;
				2009
				2010	for (; l; l = next)
				2011	{
				2012	next = l->next;
				2013	l->s = s;
				2014	}
				2015	}
				2016
				2017
				2018	/*
				2019	* Join the two lists l1 and l2, returning the combination.
				2020	*/
				2021	static Ptrlist *
				2022	append(l1, l2)
				2023	Ptrlist *l1;
				2024	Ptrlist *l2;
				2025	{
				2026	Ptrlist *oldl1;
				2027
				2028	oldl1 = l1;
				2029	while (l1->next)
				2030	l1 = l1->next;
				2031	l1->next = l2;
				2032	return oldl1;
				2033	}
				2034
				2035	/*
				2036	* Stack used for transforming postfix form into NFA.
				2037	*/
				2038	static Frag_T empty;
				2039
				2040	static void
				2041	st_error(postfix, end, p)
				2042	int *postfix;
				2043	int *end;
				2044	int *p;
				2045	{
				2046	FILE *df;
				2047	int *p2;
				2048
				2049	df = fopen("stack.err", "a");
				2050	if (df)
				2051	{
				2052	fprintf(df, "Error popping the stack!\n");
				2053	#ifdef DEBUG
				2054	fprintf(df, "Current regexp is \"%s\"\n", nfa_regengine.expr);
				2055	#endif
				2056	fprintf(df, "Postfix form is: ");
				2057	#ifdef DEBUG
				2058	for (p2 = postfix; p2 < end; p2++)
				2059	{
				2060	nfa_set_code(*p2);
				2061	fprintf(df, "%s, ", code);
				2062	}
				2063	nfa_set_code(*p);
				2064	fprintf(df, "\nCurrent position is: ");
				2065	for (p2 = postfix; p2 <= p; p2 ++)
				2066	{
				2067	nfa_set_code(*p2);
				2068	fprintf(df, "%s, ", code);
				2069	}
				2070	#else
				2071	for (p2 = postfix; p2 < end; p2++)
				2072	{
				2073	fprintf(df, "%d, ", *p2);
				2074	}
				2075	fprintf(df, "\nCurrent position is: ");
				2076	for (p2 = postfix; p2 <= p; p2 ++)
				2077	{
				2078	fprintf(df, "%d, ", *p2);
				2079	}
				2080	#endif
				2081	fprintf(df, "\n--------------------------\n");
				2082	fclose(df);
				2083	}
				2084	EMSG(_("E874: (NFA) Could not pop the stack !"));
				2085	}
				2086
				2087	/*
				2088	* Push an item onto the stack.
				2089	*/
				2090	static void
				2091	st_push(s, p, stack_end)
				2092	Frag_T s;
				2093	Frag_T **p;
				2094	Frag_T *stack_end;
				2095	{
				2096	Frag_T stackp = p;
				2097
				2098	if (stackp >= stack_end)
				2099	return;
				2100	*stackp = s;
				2101	p = p + 1;
				2102	}
				2103
				2104	/*
				2105	* Pop an item from the stack.
				2106	*/
				2107	static Frag_T
				2108	st_pop(p, stack)
				2109	Frag_T **p;
				2110	Frag_T *stack;
				2111	{
				2112	Frag_T *stackp;
				2113
				2114	p = p - 1;
				2115	stackp = *p;
				2116	if (stackp < stack)
				2117	return empty;
				2118	return **p;
				2119	}
				2120
				2121	/*
				2122	* Convert a postfix form into its equivalent NFA.
				2123	* Return the NFA start state on success, NULL otherwise.
				2124	*/
				2125	static nfa_state_T *
				2126	post2nfa(postfix, end, nfa_calc_size)
				2127	int *postfix;
				2128	int *end;
				2129	int nfa_calc_size;
				2130	{
				2131	int *p;
				2132	int mopen;
				2133	int mclose;
				2134	Frag_T *stack = NULL;
				2135	Frag_T *stackp = NULL;
				2136	Frag_T *stack_end = NULL;
				2137	Frag_T e1;
				2138	Frag_T e2;
				2139	Frag_T e;
				2140	nfa_state_T *s;
				2141	nfa_state_T *s1;
				2142	nfa_state_T *matchstate;
				2143
				2144	if (postfix == NULL)
				2145	return NULL;
				2146
				2147	#define PUSH(s) st_push ((s), &stackp, stack_end)
				2148	#define POP() st_pop(&stackp, stack); \
				2149	if (stackp < stack) \
				2150	{ \
				2151	st_error(postfix, end, p); \
				2152	return NULL; \
				2153	}
				2154
				2155	if (nfa_calc_size == FALSE)
				2156	{
				2157	/* Allocate space for the stack. Max states on the stack : nstate */
				2158	stack = (Frag_T ) lalloc((nstate + 1)sizeof(Frag_T), TRUE);
				2159	stackp = stack;
				2160	stack_end = stack + NFA_STACK_SIZE;
				2161	}
				2162
				2163	for (p = postfix; p < end; ++p)
				2164	{
				2165	switch (*p)
				2166	{
				2167	case NFA_CONCAT:
				2168	/* Catenation.
				2169	* Pay attention: this operator does not exist
				2170	* in the r.e. itself (it is implicit, really).
				2171	* It is added when r.e. is translated to postfix
				2172	* form in re2post().
				2173	*
				2174	* No new state added here. */
				2175	if (nfa_calc_size == TRUE)
				2176	{
				2177	nstate += 0;
				2178	break;
				2179	}
				2180	e2 = POP();
				2181	e1 = POP();
				2182	patch(e1.out, e2.start);
				2183	PUSH(frag(e1.start, e2.out));
				2184	break;
				2185
				2186	case NFA_NOT:
				2187	/* Negation of a character */
				2188	if (nfa_calc_size == TRUE)
				2189	{
				2190	nstate += 0;
				2191	break;
				2192	}
				2193	e1 = POP();
				2194	e1.start->negated = TRUE;
				2195	if (e1.start->c == NFA_MULTIBYTE \|\| e1.start->c == NFA_COMPOSING)
				2196	e1.start->out1->negated = TRUE;
				2197	PUSH(e1);
				2198	break;
				2199
				2200	case NFA_OR:
				2201	/* Alternation */
				2202	if (nfa_calc_size == TRUE)
				2203	{
				2204	nstate ++;
				2205	break;
				2206	}
				2207	e2 = POP();
				2208	e1 = POP();
				2209	s = new_state(NFA_SPLIT, e1.start, e2.start);
				2210	if (s == NULL)
				2211	return NULL;
				2212	PUSH(frag(s, append(e1.out, e2.out)));
				2213	break;
				2214
				2215	case NFA_STAR:
				2216	/* Zero or more */
				2217	if (nfa_calc_size == TRUE)
				2218	{
				2219	nstate ++;
				2220	break;
				2221	}
				2222	e = POP();
				2223	s = new_state(NFA_SPLIT, e.start, NULL);
				2224	if (s == NULL)
				2225	return NULL;
				2226	patch(e.out, s);
				2227	PUSH(frag(s, list1(&s->out1)));
				2228	break;
				2229
				2230	case NFA_QUEST:
				2231	/* one or zero atoms=> greedy match */
				2232	if (nfa_calc_size == TRUE)
				2233	{
				2234	nstate ++;
				2235	break;
				2236	}
				2237	e = POP();
				2238	s = new_state(NFA_SPLIT, e.start, NULL);
				2239	if (s == NULL)
				2240	return NULL;
				2241	PUSH(frag(s, append(e.out, list1(&s->out1))));
				2242	break;
				2243
				2244	case NFA_QUEST_NONGREEDY:
				2245	/* zero or one atoms => non-greedy match */
				2246	if (nfa_calc_size == TRUE)
				2247	{
				2248	nstate ++;
				2249	break;
				2250	}
				2251	e = POP();
				2252	s = new_state(NFA_SPLIT, NULL, e.start);
				2253	if (s == NULL)
				2254	return NULL;
				2255	PUSH(frag(s, append(e.out, list1(&s->out))));
				2256	break;
				2257
				2258	case NFA_PLUS:
				2259	/* One or more */
				2260	if (nfa_calc_size == TRUE)
				2261	{
				2262	nstate ++;
				2263	break;
				2264	}
				2265	e = POP();
				2266	s = new_state(NFA_SPLIT, e.start, NULL);
				2267	if (s == NULL)
				2268	return NULL;
				2269	patch(e.out, s);
				2270	PUSH(frag(e.start, list1(&s->out1)));
				2271	break;
				2272
				2273	case NFA_SKIP_CHAR:
				2274	/* Symbol of 0-length, Used in a repetition
				2275	* with max/min count of 0 */
				2276	if (nfa_calc_size == TRUE)
				2277	{
				2278	nstate ++;
				2279	break;
				2280	}
				2281	s = new_state(NFA_SKIP_CHAR, NULL, NULL);
				2282	if (s == NULL)
				2283	return NULL;
				2284	PUSH(frag(s, list1(&s->out)));
				2285	break;
				2286
				2287	case NFA_PREV_ATOM_NO_WIDTH:
				2288	/* The \@= operator: match the preceding atom with 0 width.
				2289	* Surrounds the preceding atom with START_INVISIBLE and
				2290	* END_INVISIBLE, similarly to MOPEN.
				2291	*/
				2292	/* TODO: Maybe this drops the speed? */
				2293	return NULL;
				2294
				2295	if (nfa_calc_size == TRUE)
				2296	{
				2297	nstate += 2;
				2298	break;
				2299	}
				2300	e = POP();
				2301	s1 = new_state(NFA_END_INVISIBLE, NULL, NULL);
				2302	if (s1 == NULL)
				2303	return NULL;
				2304	patch(e.out, s1);
				2305
				2306	s = new_state(NFA_START_INVISIBLE, e.start, s1);
				2307	if (s == NULL)
				2308	return NULL;
				2309	PUSH(frag(s, list1(&s1->out)));
				2310	break;
				2311
				2312	case NFA_MOPEN + 0: /* Submatch */
				2313	case NFA_MOPEN + 1:
				2314	case NFA_MOPEN + 2:
				2315	case NFA_MOPEN + 3:
				2316	case NFA_MOPEN + 4:
				2317	case NFA_MOPEN + 5:
				2318	case NFA_MOPEN + 6:
				2319	case NFA_MOPEN + 7:
				2320	case NFA_MOPEN + 8:
				2321	case NFA_MOPEN + 9:
				2322	case NFA_NOPEN: /* \%( "Invisible Submatch" */
				2323	case NFA_MULTIBYTE: /* mbyte char */
				2324	case NFA_COMPOSING: /* composing char */
				2325	if (nfa_calc_size == TRUE)
				2326	{
				2327	nstate += 2;
				2328	break;
				2329	}
				2330
				2331	mopen = *p;
				2332	switch (*p)
				2333	{
				2334	case NFA_NOPEN:
				2335	mclose = NFA_NCLOSE;
				2336	break;
				2337	case NFA_MULTIBYTE:
				2338	mclose = NFA_END_MULTIBYTE;
				2339	break;
				2340	case NFA_COMPOSING:
				2341	mclose = NFA_END_COMPOSING;
				2342	break;
				2343	default:
				2344	/* NFA_MOPEN(0) ... NFA_MOPEN(9) */
				2345	mclose = *p + NSUBEXP;
				2346	break;
				2347	}
				2348
				2349	/* Allow "NFA_MOPEN" as a valid postfix representation for
				2350	* the empty regexp "". In this case, the NFA will be
				2351	* NFA_MOPEN -> NFA_MCLOSE. Note that this also allows
				2352	* empty groups of parenthesis, and empty mbyte chars */
				2353	if (stackp == stack)
				2354	{
				2355	s = new_state(mopen, NULL, NULL);
				2356	if (s == NULL)
				2357	return NULL;
				2358	s1 = new_state(mclose, NULL, NULL);
				2359	if (s1 == NULL)
				2360	return NULL;
				2361	patch(list1(&s->out), s1);
				2362	PUSH(frag(s, list1(&s1->out)));
				2363	break;
				2364	}
				2365
				2366	/* At least one node was emitted before NFA_MOPEN, so
				2367	* at least one node will be between NFA_MOPEN and NFA_MCLOSE */
				2368	e = POP();
				2369	s = new_state(mopen, e.start, NULL); /* `(' */
				2370	if (s == NULL)
				2371	return NULL;
				2372
				2373	s1 = new_state(mclose, NULL, NULL); /* `)' */
				2374	if (s1 == NULL)
				2375	return NULL;
				2376	patch(e.out, s1);
				2377
				2378	if (mopen == NFA_MULTIBYTE \|\| mopen == NFA_COMPOSING)
				2379	/* MULTIBYTE->out1 = END_MULTIBYTE
				2380	* COMPOSING->out1 = END_COMPOSING */
				2381	patch(list1(&s->out1), s1);
				2382
				2383	PUSH(frag(s, list1(&s1->out)));
				2384	break;
				2385
				2386	case NFA_ZSTART:
				2387	case NFA_ZEND:
				2388	default:
				2389	/* Operands */
				2390	if (nfa_calc_size == TRUE)
				2391	{
				2392	nstate ++;
				2393	break;
				2394	}
				2395	s = new_state(*p, NULL, NULL);
				2396	if (s == NULL)
				2397	return NULL;
				2398	PUSH(frag(s, list1(&s->out)));
				2399	break;
				2400
				2401	} /* switch(p) /
				2402
				2403	} /* for(p = postfix; p; ++p) /
				2404
				2405	if (nfa_calc_size == TRUE)
				2406	{
				2407	nstate ++;
				2408	return NULL; /* Return value when counting size is ignored anyway */
				2409	}
				2410
				2411	e = POP();
				2412	if (stackp != stack)
				2413	EMSG_RET_NULL(_("E875: (NFA regexp) (While converting from postfix to NFA), too many states left on stack"));
				2414
				2415	if (istate >= nstate)
				2416	EMSG_RET_NULL(_("E876: (NFA regexp) Not enough space to store the whole NFA "));
				2417
				2418	vim_free(stack);
				2419
				2420	matchstate = &state_ptr[istate++]; /* the match state */
				2421	matchstate->c = NFA_MATCH;
				2422	matchstate->out = matchstate->out1 = NULL;
				2423
				2424	patch(e.out, matchstate);
				2425	return e.start;
				2426
				2427	#undef POP1
				2428	#undef PUSH1
				2429	#undef POP2
				2430	#undef PUSH2
				2431	#undef POP
				2432	#undef PUSH
				2433	}
				2434
				2435	/****************************************************************
				2436	* NFA execution code.
				2437	****************************************************************/
				2438
				2439	/* thread_T contains runtime information of a NFA state */
				2440	struct thread
				2441	{
				2442	nfa_state_T *state;
				2443	regsub_T sub; /* submatch info */
				2444	};
				2445
				2446	typedef struct
				2447	{
				2448	thread_T *t;
				2449	int n;
				2450	} List;
				2451
				2452	static void addstate __ARGS((List l, nfa_state_T state, regsub_T m, int off, int lid, int match));
				2453
				2454	static void
				2455	addstate(l, state, m, off, lid, match)
				2456	List l; / runtime state list */
				2457	nfa_state_T state; / state to update */
				2458	regsub_T m; / pointers to subexpressions */
				2459	int off;
				2460	int lid;
				2461	int match; / found match? */
				2462	{
				2463	regsub_T save;
				2464	int subidx = 0;
				2465
				2466	if (l == NULL \|\| state == NULL)
				2467	return;
				2468
				2469	switch (state->c)
				2470	{
				2471	case NFA_SPLIT:
				2472	case NFA_NOT:
				2473	case NFA_NOPEN:
				2474	case NFA_NCLOSE:
				2475	case NFA_MCLOSE:
				2476	case NFA_MCLOSE + 1:
				2477	case NFA_MCLOSE + 2:
				2478	case NFA_MCLOSE + 3:
				2479	case NFA_MCLOSE + 4:
				2480	case NFA_MCLOSE + 5:
				2481	case NFA_MCLOSE + 6:
				2482	case NFA_MCLOSE + 7:
				2483	case NFA_MCLOSE + 8:
				2484	case NFA_MCLOSE + 9:
				2485	/* Do not remember these nodes in list "thislist" or "nextlist" */
				2486	break;
				2487
				2488	default:
				2489	if (state->lastlist == lid)
				2490	{
				2491	if (++state->visits > 2)
				2492	return;
				2493	}
				2494	else
				2495	{
				2496	/* add the state to the list */
				2497	state->lastlist = lid;
				2498	state->lastthread = &l->t[l->n++];
				2499	state->lastthread->state = state;
				2500	state->lastthread->sub = *m;
				2501	}
				2502	}
				2503
				2504	#ifdef ENABLE_LOG
				2505	nfa_set_code(state->c);
				2506	fprintf(log_fd, "> Adding state %d to list. Character %s, code %d\n",
				2507	abs(state->id), code, state->c);
				2508	#endif
				2509	switch (state->c)
				2510	{
				2511	case NFA_MATCH:
				2512	*match = TRUE;
				2513	break;
				2514
				2515	case NFA_SPLIT:
				2516	addstate(l, state->out, m, off, lid, match);
				2517	addstate(l, state->out1, m, off, lid, match);
				2518	break;
				2519
				2520	case NFA_SKIP_CHAR:
				2521	addstate(l, state->out, m, off, lid, match);
				2522	break;
				2523
				2524	#if 0
				2525	case NFA_END_NEG_RANGE:
				2526	/* Nothing to handle here. nfa_regmatch() will take care of it */
				2527	break;
				2528
				2529	case NFA_NOT:
				2530	EMSG(_("E999: (NFA regexp internal error) Should not process NOT node !"));
				2531	#ifdef ENABLE_LOG
				2532	fprintf(f, "\n\n>>> E999: Added state NFA_NOT to a list ... Something went wrong ! Why wasn't it processed already? \n\n");
				2533	#endif
				2534	break;
				2535
				2536	case NFA_COMPOSING:
				2537	/* nfa_regmatch() will match all the bytes of this composing char. */
				2538	break;
				2539
				2540	case NFA_MULTIBYTE:
				2541	/* nfa_regmatch() will match all the bytes of this multibyte char. */
				2542	break;
				2543	#endif
				2544
				2545	case NFA_END_MULTIBYTE:
				2546	/* Successfully matched this mbyte char */
				2547	addstate(l, state->out, m, off, lid, match);
				2548	break;
				2549
				2550	case NFA_NOPEN:
				2551	case NFA_NCLOSE:
				2552	addstate(l, state->out, m, off, lid, match);
				2553	break;
				2554
				2555	/* If this state is reached, then a recursive call of nfa_regmatch()
				2556	* succeeded. the next call saves the found submatches in the
				2557	* first state after the "invisible" branch. */
				2558	#if 0
				2559	case NFA_END_INVISIBLE:
				2560	break;
				2561	#endif
				2562
				2563	case NFA_MOPEN + 0:
				2564	case NFA_MOPEN + 1:
				2565	case NFA_MOPEN + 2:
				2566	case NFA_MOPEN + 3:
				2567	case NFA_MOPEN + 4:
				2568	case NFA_MOPEN + 5:
				2569	case NFA_MOPEN + 6:
				2570	case NFA_MOPEN + 7:
				2571	case NFA_MOPEN + 8:
				2572	case NFA_MOPEN + 9:
				2573	case NFA_ZSTART:
				2574	subidx = state->c - NFA_MOPEN;
				2575	if (state->c == NFA_ZSTART)
				2576	subidx = 0;
				2577
				2578	if (REG_MULTI)
				2579	{
				2580	save.startpos[subidx] = m->startpos[subidx];
				2581	save.endpos[subidx] = m->endpos[subidx];
				2582	m->startpos[subidx].lnum = reglnum;
				2583	m->startpos[subidx].col = reginput - regline + off;
				2584	}
				2585	else
				2586	{
				2587	save.start[subidx] = m->start[subidx];
				2588	save.end[subidx] = m->end[subidx];
				2589	m->start[subidx] = reginput + off;
				2590	}
				2591
				2592	addstate(l, state->out, m, off, lid, match);
				2593
				2594	if (REG_MULTI)
				2595	{
				2596	m->startpos[subidx] = save.startpos[subidx];
				2597	m->endpos[subidx] = save.endpos[subidx];
				2598	}
				2599	else
				2600	{
				2601	m->start[subidx] = save.start[subidx];
				2602	m->end[subidx] = save.end[subidx];
				2603	}
				2604	break;
				2605
				2606	case NFA_MCLOSE + 0:
				2607	if (nfa_has_zend == TRUE)
				2608	{
				2609	addstate(l, state->out, m, off, lid, match);
				2610	break;
				2611	}
				2612	case NFA_MCLOSE + 1:
				2613	case NFA_MCLOSE + 2:
				2614	case NFA_MCLOSE + 3:
				2615	case NFA_MCLOSE + 4:
				2616	case NFA_MCLOSE + 5:
				2617	case NFA_MCLOSE + 6:
				2618	case NFA_MCLOSE + 7:
				2619	case NFA_MCLOSE + 8:
				2620	case NFA_MCLOSE + 9:
				2621	case NFA_ZEND:
				2622	subidx = state->c - NFA_MCLOSE;
				2623	if (state->c == NFA_ZEND)
				2624	subidx = 0;
				2625
				2626	if (REG_MULTI)
				2627	{
				2628	save.startpos[subidx] = m->startpos[subidx];
				2629	save.endpos[subidx] = m->endpos[subidx];
				2630	m->endpos[subidx].lnum = reglnum;
				2631	m->endpos[subidx].col = reginput - regline + off;
				2632	}
				2633	else
				2634	{
				2635	save.start[subidx] = m->start[subidx];
				2636	save.end[subidx] = m->end[subidx];
				2637	m->end[subidx] = reginput + off;
				2638	}
				2639
				2640	addstate(l, state->out, m, off, lid, match);
				2641
				2642	if (REG_MULTI)
				2643	{
				2644	m->startpos[subidx] = save.startpos[subidx];
				2645	m->endpos[subidx] = save.endpos[subidx];
				2646	}
				2647	else
				2648	{
				2649	m->start[subidx] = save.start[subidx];
				2650	m->end[subidx] = save.end[subidx];
				2651	}
				2652	break;
				2653	}
				2654	}
				2655
				2656	/*
				2657	* Check character class "class" against current character c.
				2658	*/
				2659	static int
				2660	check_char_class(class, c)
				2661	int class;
				2662	int c;
				2663	{
				2664	switch (class)
				2665	{
				2666	case NFA_CLASS_ALNUM:
				2667	if (isalnum(c))
				2668	return OK;
				2669	break;
				2670	case NFA_CLASS_ALPHA:
				2671	if (isalpha(c))
				2672	return OK;
				2673	break;
				2674	case NFA_CLASS_BLANK:
				2675	if (c == ' ' \|\| c == '\t')
				2676	return OK;
				2677	break;
				2678	case NFA_CLASS_CNTRL:
				2679	if (iscntrl(c))
				2680	return OK;
				2681	break;
				2682	case NFA_CLASS_DIGIT:
				2683	if (VIM_ISDIGIT(c))
				2684	return OK;
				2685	break;
				2686	case NFA_CLASS_GRAPH:
				2687	if (isgraph(c))
				2688	return OK;
				2689	break;
				2690	case NFA_CLASS_LOWER:
				2691	if (MB_ISLOWER(c))
				2692	return OK;
				2693	break;
				2694	case NFA_CLASS_PRINT:
				2695	if (vim_isprintc(c))
				2696	return OK;
				2697	break;
				2698	case NFA_CLASS_PUNCT:
				2699	if (ispunct(c))
				2700	return OK;
				2701	break;
				2702	case NFA_CLASS_SPACE:
				2703	if ((c >=9 && c <= 13) \|\| (c == ' '))
				2704	return OK;
				2705	break;
				2706	case NFA_CLASS_UPPER:
				2707	if (MB_ISUPPER(c))
				2708	return OK;
				2709	break;
				2710	case NFA_CLASS_XDIGIT:
				2711	if (vim_isxdigit(c))
				2712	return OK;
				2713	break;
				2714	case NFA_CLASS_TAB:
				2715	if (c == '\t')
				2716	return OK;
				2717	break;
				2718	case NFA_CLASS_RETURN:
				2719	if (c == '\r')
				2720	return OK;
				2721	break;
				2722	case NFA_CLASS_BACKSPACE:
				2723	if (c == '\b')
				2724	return OK;
				2725	break;
				2726	case NFA_CLASS_ESCAPE:
				2727	if (c == '\033')
				2728	return OK;
				2729	break;
				2730
				2731	default:
				2732	/* should not be here :P */
				2733	EMSG_RET_FAIL(_("E877: (NFA regexp) Invalid character class "));
				2734	}
				2735	return FAIL;
				2736	}
				2737
				2738	/*
				2739	* Set all NFA nodes' list ID equal to -1.
				2740	*/
				2741	static void
				2742	nfa_set_neg_listids(start)
				2743	nfa_state_T *start;
				2744	{
				2745	if (start == NULL)
				2746	return;
				2747	if (start->lastlist >= 0)
				2748	{
				2749	start->lastlist = -1;
				2750	nfa_set_neg_listids(start->out);
				2751	nfa_set_neg_listids(start->out1);
				2752	}
				2753	}
				2754
				2755	/*
				2756	* Set all NFA nodes' list ID equal to 0.
				2757	*/
				2758	static void
				2759	nfa_set_null_listids(start)
				2760	nfa_state_T *start;
				2761	{
				2762	if (start == NULL)
				2763	return;
				2764	if (start->lastlist == -1)
				2765	{
				2766	start->lastlist = 0;
				2767	nfa_set_null_listids(start->out);
				2768	nfa_set_null_listids(start->out1);
				2769	}
				2770	}
				2771
				2772	/*
				2773	* Save list IDs for all NFA states in "list".
				2774	*/
				2775	static void
				2776	nfa_save_listids(start, list)
				2777	nfa_state_T *start;
				2778	int *list;
				2779	{
				2780	if (start == NULL)
				2781	return;
				2782	if (start->lastlist != -1)
				2783	{
				2784	list[abs(start->id)] = start->lastlist;
				2785	start->lastlist = -1;
				2786	nfa_save_listids(start->out, list);
				2787	nfa_save_listids(start->out1, list);
				2788	}
				2789	}
				2790
				2791	/*
				2792	* Restore list IDs from "list" to all NFA states.
				2793	*/
				2794	static void
				2795	nfa_restore_listids(start, list)
				2796	nfa_state_T *start;
				2797	int *list;
				2798	{
				2799	if (start == NULL)
				2800	return;
				2801	if (start->lastlist == -1)
				2802	{
				2803	start->lastlist = list[abs(start->id)];
				2804	nfa_restore_listids(start->out, list);
				2805	nfa_restore_listids(start->out1, list);
				2806	}
				2807	}
				2808
				2809	/*
				2810	* Main matching routine.
				2811	*
				2812	* Run NFA to determine whether it matches reginput.
				2813	*
				2814	* Return TRUE if there is a match, FALSE otherwise.
				2815	* Note: Caller must ensure that: start != NULL.
				2816	*/
				2817	static int
				2818	nfa_regmatch(start, submatch, m)
				2819	nfa_state_T *start;
				2820	regsub_T *submatch;
				2821	regsub_T *m;
				2822	{
				2823	int c = -1;
				2824	int n;
				2825	int i = 0;
				2826	int result;
				2827	int size = 0;
				2828	int match = FALSE;
				2829	int flag = 0;
				2830	int old_reglnum = -1;
				2831	int reginput_updated = FALSE;
				2832	thread_T *t;
				2833	char_u *cc;
				2834	char_u *old_reginput = NULL;
				2835	char_u *old_regline = NULL;
				2836	nfa_state_T *sta;
				2837	nfa_state_T *end;
				2838	List list[3];
				2839	List *listtbl[2][2];
				2840	List *ll;
				2841	int listid = 1;
				2842	int endnode = 0;
				2843	List *thislist;
				2844	List *nextlist;
				2845	List *neglist;
				2846	int *listids = NULL;
				2847	int j = 0;
				2848	int len = 0;
				2849	#ifdef DEBUG
				2850	FILE *debug = fopen("list.log", "a");
				2851
				2852	if (debug == NULL)
				2853	{
				2854	EMSG(_("(NFA) COULD NOT OPEN list.log !"));
				2855	return FALSE;
				2856	}
				2857	#endif
				2858
				2859	/* Allocate memory for the lists of nodes */
				2860	size = (nstate + 1) * sizeof(thread_T);
				2861	list[0].t = (thread_T *)lalloc(size, TRUE);
				2862	list[1].t = (thread_T *)lalloc(size, TRUE);
				2863	list[2].t = (thread_T *)lalloc(size, TRUE);
				2864	if (list[0].t == NULL \|\| list[1].t == NULL \|\| list[2].t == NULL)
				2865	goto theend;
				2866	vim_memset(list[0].t, 0, size);
				2867	vim_memset(list[1].t, 0, size);
				2868	vim_memset(list[2].t, 0, size);
				2869
				2870	#ifdef ENABLE_LOG
				2871	log_fd = fopen(LOG_NAME, "a");
				2872	if (log_fd != NULL)
				2873	{
				2874	fprintf(log_fd, "**********************************\n");
				2875	nfa_set_code(start->c);
				2876	fprintf(log_fd, " RUNNING nfa_regmatch() starting with state %d, code %s\n",
				2877	abs(start->id), code);
				2878	fprintf(log_fd, "**********************************\n");
				2879	}
				2880	else
				2881	{
				2882	EMSG(_("Could not open temporary log file for writing, displaying on stderr ... "));
				2883	log_fd = stderr;
				2884	}
				2885	#endif
				2886
				2887	thislist = &list[0];
				2888	thislist->n = 0;
				2889	nextlist = &list[1];
				2890	nextlist->n = 0;
				2891	neglist = &list[2];
				2892	neglist->n = 0;
				2893	#ifdef ENABLE_LOG
				2894	fprintf(log_fd, "(---) STARTSTATE\n");
				2895	#endif
				2896	addstate(thislist, start, m, 0, listid, &match);
				2897
				2898	/* There are two cases when the NFA advances: 1. input char matches the
				2899	* NFA node and 2. input char does not match the NFA node, but the next
				2900	* node is NFA_NOT. The following macro calls addstate() according to
				2901	* these rules. It is used A LOT, so use the "listtbl" table for speed */
				2902	listtbl[0][0] = NULL;
				2903	listtbl[0][1] = neglist;
				2904	listtbl[1][0] = nextlist;
				2905	listtbl[1][1] = NULL;
				2906	#define ADD_POS_NEG_STATE(node) \
				2907	ll = listtbl[result ? 1 : 0][node->negated]; \
				2908	if (ll != NULL) \
				2909	addstate(ll, node->out , &t->sub, n, listid + 1, &match);
				2910
				2911
				2912	/*
				2913	* Run for each character.
				2914	*/
				2915	do {
				2916	again:
				2917	#ifdef FEAT_MBYTE
				2918	if (has_mbyte)
				2919	{
				2920	c = (*mb_ptr2char)(reginput);
				2921	n = (*mb_ptr2len)(reginput);
				2922	}
				2923	else
				2924	#endif
				2925	{
				2926	c = *reginput;
				2927	n = 1;
				2928	}
				2929	if (c == NUL)
				2930	n = 0;
				2931	cc = (char_u *)&c;
				2932
				2933	/* swap lists */
				2934	thislist = &list[flag];
				2935	nextlist = &list[flag ^= 1];
				2936	nextlist->n = 0; /* `clear' nextlist */
				2937	listtbl[1][0] = nextlist;
				2938	++listid;
				2939
				2940	#ifdef ENABLE_LOG
				2941	fprintf(log_fd, "------------------------------------------\n");
				2942	fprintf(log_fd, ">>> Reginput is \"%s\"\n", reginput);
				2943	fprintf(log_fd, ">>> Advanced one character ... Current char is %c (code %d) \n", c, (int)c);
				2944	fprintf(log_fd, ">>> Thislist has %d states available: ", thislist->n);
				2945	for (i = 0; i< thislist->n; i++)
				2946	fprintf(log_fd, "%d ", abs(thislist->t[i].state->id));
				2947	fprintf(log_fd, "\n");
				2948	#endif
				2949
				2950	#ifdef DEBUG
				2951	fprintf(debug, "\n-------------------\n");
				2952	#endif
				2953
				2954	/* compute nextlist */
				2955	for (i = 0; i < thislist->n \|\| neglist->n > 0; ++i)
				2956	{
				2957	if (neglist->n > 0)
				2958	{
				2959	t = &neglist->t[0];
				2960	neglist->n --;
				2961	i--;
				2962	}
				2963	else
				2964	t = &thislist->t[i];
				2965
				2966	#ifdef DEBUG
				2967	nfa_set_code(t->state->c);
				2968	fprintf(debug, "%s, ", code);
				2969	#endif
				2970	#ifdef ENABLE_LOG
				2971	nfa_set_code(t->state->c);
				2972	fprintf(log_fd, "(%d) %s, code %d ... \n", abs(t->state->id),
				2973	code, (int)t->state->c);
				2974	#endif
				2975
				2976	/*
				2977	* Handle the possible codes of the current state.
				2978	* The most important is NFA_MATCH.
				2979	*/
				2980	switch (t->state->c)
				2981	{
				2982	case NFA_MATCH:
				2983	match = TRUE;
				2984	*submatch = t->sub;
				2985	#ifdef ENABLE_LOG
				2986	for (j = 0; j < 4; j++)
				2987	if (REG_MULTI)
				2988	fprintf(log_fd, "\n *** group %d, start: c=%d, l=%d, end: c=%d, l=%d",
				2989	j,
				2990	t->sub.startpos[j].col,
				2991	(int)t->sub.startpos[j].lnum,
				2992	t->sub.endpos[j].col,
				2993	(int)t->sub.endpos[j].lnum);
				2994	else
				2995	fprintf(log_fd, "\n *** group %d, start: \"%s\", end: \"%s\"",
				2996	j,
				2997	(char *)t->sub.start[j],
				2998	(char *)t->sub.end[j]);
				2999	fprintf(log_fd, "\n");
				3000	#endif
				3001	goto nextchar; /* found the left-most longest match */
				3002
				3003	case NFA_END_INVISIBLE:
				3004	/* This is only encountered after a NFA_START_INVISIBLE node.
				3005	* They surround a zero-width group, used with "\@=" and "\&".
				3006	* If we got here, it means that the current "invisible" group
				3007	* finished successfully, so return control to the parent
				3008	* nfa_regmatch(). Submatches are stored in *m, and used in
				3009	* the parent call. */
				3010	if (start->c == NFA_MOPEN + 0)
				3011	addstate(thislist, t->state->out, &t->sub, 0, listid,
				3012	&match);
				3013	else
				3014	{
				3015	*m = t->sub;
				3016	match = TRUE;
				3017	}
				3018	break;
				3019
				3020	case NFA_START_INVISIBLE:
				3021	/* Save global variables, and call nfa_regmatch() to check if
				3022	* the current concat matches at this position. The concat
				3023	* ends with the node NFA_END_INVISIBLE */
				3024	old_reginput = reginput;
				3025	old_regline = regline;
				3026	old_reglnum = reglnum;
				3027	if (listids == NULL)
				3028	{
				3029	listids = (int ) lalloc(sizeof(int) nstate, TRUE);
				3030	if (listids == NULL)
				3031	{
				3032	EMSG(_("E878: (NFA) Could not allocate memory for branch traversal!"));
				3033	return 0;
				3034	}
				3035	}
				3036	#ifdef ENABLE_LOG
				3037	if (log_fd != stderr)
				3038	fclose(log_fd);
				3039	log_fd = NULL;
				3040	#endif
				3041	/* Have to clear the listid field of the NFA nodes, so that
				3042	* nfa_regmatch() and addstate() can run properly after
				3043	* recursion. */
				3044	nfa_save_listids(start, listids);
				3045	nfa_set_null_listids(start);
				3046	result = nfa_regmatch(t->state->out, submatch, m);
				3047	nfa_set_neg_listids(start);
				3048	nfa_restore_listids(start, listids);
				3049
				3050	#ifdef ENABLE_LOG
				3051	log_fd = fopen(LOG_NAME, "a");
				3052	if (log_fd != NULL)
				3053	{
				3054	fprintf(log_fd, "****************************\n");
				3055	fprintf(log_fd, "FINISHED RUNNING nfa_regmatch() recursively\n");
				3056	fprintf(log_fd, "MATCH = %s\n", result == TRUE ? "OK" : "FALSE");
				3057	fprintf(log_fd, "****************************\n");
				3058	}
				3059	else
				3060	{
				3061	EMSG(_("Could not open temporary log file for writing, displaying on stderr ... "));
				3062	log_fd = stderr;
				3063	}
				3064	#endif
				3065	if (result == TRUE)
				3066	{
				3067	/* Restore position in input text */
				3068	reginput = old_reginput;
				3069	regline = old_regline;
				3070	reglnum = old_reglnum;
				3071	/* Copy submatch info from the recursive call */
				3072	if (REG_MULTI)
				3073	for (j = 1; j < NSUBEXP; j++)
				3074	{
				3075	t->sub.startpos[j] = m->startpos[j];
				3076	t->sub.endpos[j] = m->endpos[j];
				3077	}
				3078	else
				3079	for (j = 1; j < NSUBEXP; j++)
				3080	{
				3081	t->sub.start[j] = m->start[j];
				3082	t->sub.end[j] = m->end[j];
				3083	}
				3084	/* t->state->out1 is the corresponding END_INVISIBLE node */
				3085	addstate(thislist, t->state->out1->out, &t->sub, 0, listid,
				3086	&match);
				3087	}
				3088	else
				3089	{
				3090	/* continue with next input char */
				3091	reginput = old_reginput;
				3092	}
				3093	break;
				3094
				3095	case NFA_BOL:
				3096	if (reginput == regline)
				3097	addstate(thislist, t->state->out, &t->sub, 0, listid,
				3098	&match);
				3099	break;
				3100
				3101	case NFA_EOL:
				3102	if (c == NUL)
				3103	addstate(thislist, t->state->out, &t->sub, 0, listid,
				3104	&match);
				3105	break;
				3106
				3107	case NFA_BOW:
				3108	{
				3109	int bow = TRUE;
				3110
				3111	if (c == NUL)
				3112	bow = FALSE;
				3113	#ifdef FEAT_MBYTE
				3114	else if (has_mbyte)
				3115	{
				3116	int this_class;
				3117
				3118	/* Get class of current and previous char (if it exists). */
				3119	this_class = mb_get_class(reginput);
				3120	if (this_class <= 1)
				3121	bow = FALSE;
				3122	else if (reg_prev_class() == this_class)
				3123	bow = FALSE;
				3124	}
				3125	#endif
				3126	else if (!vim_iswordc(c)
				3127	\|\| (reginput > regline && vim_iswordc(reginput[-1])))
				3128	bow = FALSE;
				3129	if (bow)
				3130	addstate(thislist, t->state->out, &t->sub, 0, listid,
				3131	&match);
				3132	break;
				3133	}
				3134
				3135	case NFA_EOW:
				3136	{
				3137	int eow = TRUE;
				3138
				3139	if (reginput == regline)
				3140	eow = FALSE;
				3141	#ifdef FEAT_MBYTE
				3142	else if (has_mbyte)
				3143	{
				3144	int this_class, prev_class;
				3145
				3146	/* Get class of current and previous char (if it exists). */
				3147	this_class = mb_get_class(reginput);
				3148	prev_class = reg_prev_class();
				3149	if (this_class == prev_class
				3150	\|\| prev_class == 0 \|\| prev_class == 1)
				3151	eow = FALSE;
				3152	}
				3153	#endif
				3154	else if (!vim_iswordc(reginput[-1])
				3155	\|\| (reginput[0] != NUL && vim_iswordc(c)))
				3156	eow = FALSE;
				3157	if (eow)
				3158	addstate(thislist, t->state->out, &t->sub, 0, listid,
				3159	&match);
				3160	break;
				3161	}
				3162
				3163	case NFA_MULTIBYTE:
				3164	case NFA_COMPOSING:
				3165	switch (t->state->c)
				3166	{
				3167	case NFA_MULTIBYTE: endnode = NFA_END_MULTIBYTE; break;
				3168	case NFA_COMPOSING: endnode = NFA_END_COMPOSING; break;
				3169	default: endnode = 0;
				3170	}
				3171
				3172	result = OK;
				3173	sta = t->state->out;
				3174	len = 1;
				3175	while (sta->c != endnode && len <= n)
				3176	{
				3177	if (reginput[len-1] != sta->c)
				3178	{
				3179	result = OK - 1;
				3180	break;
				3181	}
				3182	len++;
				3183	sta = sta->out;
				3184	}
				3185
				3186	/* if input char length doesn't match regexp char length */
				3187	if (len -1 < n \|\| sta->c != endnode)
				3188	result = OK - 1;
				3189	end = t->state->out1; /* NFA_END_MULTIBYTE or
				3190	NFA_END_COMPOSING */
				3191	/* If \Z was present, then ignore composing characters */
				3192	if (regflags & RF_ICOMBINE)
				3193	result = 1 ^ sta->negated;
				3194	ADD_POS_NEG_STATE(end);
				3195	break;
				3196
				3197	case NFA_NEWL:
				3198	if (!reg_line_lbr && REG_MULTI
				3199	&& c == NUL && reglnum <= reg_maxline)
				3200	{
				3201	if (reginput_updated == FALSE)
				3202	{
				3203	reg_nextline();
				3204	reginput_updated = TRUE;
				3205	}
				3206	addstate(nextlist, t->state->out, &t->sub, n, listid + 1,
				3207	&match);
				3208	}
				3209	break;
				3210
				3211	case NFA_CLASS_ALNUM:
				3212	case NFA_CLASS_ALPHA:
				3213	case NFA_CLASS_BLANK:
				3214	case NFA_CLASS_CNTRL:
				3215	case NFA_CLASS_DIGIT:
				3216	case NFA_CLASS_GRAPH:
				3217	case NFA_CLASS_LOWER:
				3218	case NFA_CLASS_PRINT:
				3219	case NFA_CLASS_PUNCT:
				3220	case NFA_CLASS_SPACE:
				3221	case NFA_CLASS_UPPER:
				3222	case NFA_CLASS_XDIGIT:
				3223	case NFA_CLASS_TAB:
				3224	case NFA_CLASS_RETURN:
				3225	case NFA_CLASS_BACKSPACE:
				3226	case NFA_CLASS_ESCAPE:
				3227	result = check_char_class(t->state->c, c);
				3228	ADD_POS_NEG_STATE(t->state);
				3229	break;
				3230
				3231	case NFA_END_NEG_RANGE:
				3232	/* This follows a series of negated nodes, like:
				3233	* CHAR(x), NFA_NOT, CHAR(y), NFA_NOT etc. */
				3234	if (c > 0)
				3235	addstate(nextlist, t->state->out, &t->sub, n, listid + 1,
				3236	&match);
				3237	break;
				3238
				3239	case NFA_ANY:
				3240	/* Any printable char, not just any char. '\0' (end of input)
				3241	* must not match */
				3242	if (c > 0)
				3243	addstate(nextlist, t->state->out, &t->sub, n, listid + 1,
				3244	&match);
				3245	break;
				3246
				3247	/*
				3248	* Character classes like \a for alpha, \d for digit etc.
				3249	*/
				3250	case NFA_IDENT: /* \i */
				3251	result = vim_isIDc(c);
				3252	ADD_POS_NEG_STATE(t->state);
				3253	break;
				3254
				3255	case NFA_SIDENT: /* \I */
				3256	result = !VIM_ISDIGIT(c) && vim_isIDc(c);
				3257	ADD_POS_NEG_STATE(t->state);
				3258	break;
				3259
				3260	case NFA_KWORD: /* \k */
				3261	result = vim_iswordp(cc);
				3262	ADD_POS_NEG_STATE(t->state);
				3263	break;
				3264
				3265	case NFA_SKWORD: /* \K */
				3266	result = !VIM_ISDIGIT(c) && vim_iswordp(cc);
				3267	ADD_POS_NEG_STATE(t->state);
				3268	break;
				3269
				3270	case NFA_FNAME: /* \f */
				3271	result = vim_isfilec(c);
				3272	ADD_POS_NEG_STATE(t->state);
				3273	break;
				3274
				3275	case NFA_SFNAME: /* \F */
				3276	result = !VIM_ISDIGIT(c) && vim_isfilec(c);
				3277	ADD_POS_NEG_STATE(t->state);
				3278	break;
				3279
				3280	case NFA_PRINT: /* \p */
				3281	result = ptr2cells(cc) == 1;
				3282	ADD_POS_NEG_STATE(t->state);
				3283	break;
				3284
				3285	case NFA_SPRINT: /* \P */
				3286	result = !VIM_ISDIGIT(c) && ptr2cells(cc) == 1;
				3287	ADD_POS_NEG_STATE(t->state);
				3288	break;
				3289
				3290	case NFA_WHITE: /* \s */
				3291	result = vim_iswhite(c);
				3292	ADD_POS_NEG_STATE(t->state);
				3293	break;
				3294
				3295	case NFA_NWHITE: /* \S */
				3296	result = c != NUL && !vim_iswhite(c);
				3297	ADD_POS_NEG_STATE(t->state);
				3298	break;
				3299
				3300	case NFA_DIGIT: /* \d */
				3301	result = ri_digit(c);
				3302	ADD_POS_NEG_STATE(t->state);
				3303	break;
				3304
				3305	case NFA_NDIGIT: /* \D */
				3306	result = c != NUL && !ri_digit(c);
				3307	ADD_POS_NEG_STATE(t->state);
				3308	break;
				3309
				3310	case NFA_HEX: /* \x */
				3311	result = ri_hex(c);
				3312	ADD_POS_NEG_STATE(t->state);
				3313	break;
				3314
				3315	case NFA_NHEX: /* \X */
				3316	result = c != NUL && !ri_hex(c);
				3317	ADD_POS_NEG_STATE(t->state);
				3318	break;
				3319
				3320	case NFA_OCTAL: /* \o */
				3321	result = ri_octal(c);
				3322	ADD_POS_NEG_STATE(t->state);
				3323	break;
				3324
				3325	case NFA_NOCTAL: /* \O */
				3326	result = c != NUL && !ri_octal(c);
				3327	ADD_POS_NEG_STATE(t->state);
				3328	break;
				3329
				3330	case NFA_WORD: /* \w */
				3331	result = ri_word(c);
				3332	ADD_POS_NEG_STATE(t->state);
				3333	break;
				3334
				3335	case NFA_NWORD: /* \W */
				3336	result = c != NUL && !ri_word(c);
				3337	ADD_POS_NEG_STATE(t->state);
				3338	break;
				3339
				3340	case NFA_HEAD: /* \h */
				3341	result = ri_head(c);
				3342	ADD_POS_NEG_STATE(t->state);
				3343	break;
				3344
				3345	case NFA_NHEAD: /* \H */
				3346	result = c != NUL && !ri_head(c);
				3347	ADD_POS_NEG_STATE(t->state);
				3348	break;
				3349
				3350	case NFA_ALPHA: /* \a */
				3351	result = ri_alpha(c);
				3352	ADD_POS_NEG_STATE(t->state);
				3353	break;
				3354
				3355	case NFA_NALPHA: /* \A */
				3356	result = c != NUL && !ri_alpha(c);
				3357	ADD_POS_NEG_STATE(t->state);
				3358	break;
				3359
				3360	case NFA_LOWER: /* \l */
				3361	result = ri_lower(c);
				3362	ADD_POS_NEG_STATE(t->state);
				3363	break;
				3364
				3365	case NFA_NLOWER: /* \L */
				3366	result = c != NUL && !ri_lower(c);
				3367	ADD_POS_NEG_STATE(t->state);
				3368	break;
				3369
				3370	case NFA_UPPER: /* \u */
				3371	result = ri_upper(c);
				3372	ADD_POS_NEG_STATE(t->state);
				3373	break;
				3374
				3375	case NFA_NUPPER: /* \U */
				3376	result = c != NUL && !ri_upper(c);
				3377	ADD_POS_NEG_STATE(t->state);
				3378	break;
				3379
				3380	default: /* regular character */
				3381	result = (no_Magic(t->state->c) == c);
				3382	if (!result)
				3383	result = ireg_ic == TRUE
				3384	&& MB_TOLOWER(t->state->c) == MB_TOLOWER(c);
				3385	ADD_POS_NEG_STATE(t->state);
				3386	break;
				3387	}
				3388
				3389	} /* for (thislist = thislist; thislist->state; thislist++) */
				3390
				3391	/* The first found match is the leftmost one, but there may be a
				3392	* longer one. Keep running the NFA, but don't start from the
				3393	* beginning. Also, do not add the start state in recursive calls of
				3394	* nfa_regmatch(), because recursive calls should only start in the
				3395	* first position. */
				3396	if (match == FALSE && start->c == NFA_MOPEN + 0)
				3397	{
				3398	#ifdef ENABLE_LOG
				3399	fprintf(log_fd, "(---) STARTSTATE\n");
				3400	#endif
				3401	addstate(nextlist, start, m, n, listid + 1, &match);
				3402	}
				3403
				3404	if (reginput_updated)
				3405	{
				3406	reginput_updated = FALSE;
				3407	goto again;
				3408	}
				3409
				3410	#ifdef ENABLE_LOG
				3411	fprintf(log_fd, ">>> Thislist had %d states available: ", thislist->n);
				3412	for (i = 0; i< thislist->n; i++)
				3413	fprintf(log_fd, "%d ", abs(thislist->t[i].state->id));
				3414	fprintf(log_fd, "\n");
				3415	#endif
				3416
				3417	nextchar:
				3418	reginput += n;
				3419	} while (c \|\| reginput_updated);
				3420
				3421	#ifdef ENABLE_LOG
				3422	if (log_fd != stderr)
				3423	fclose(log_fd);
				3424	log_fd = NULL;
				3425	#endif
				3426
				3427	theend:
				3428	/* Free memory */
				3429	vim_free(list[0].t);
				3430	vim_free(list[1].t);
				3431	vim_free(list[2].t);
				3432	list[0].t = list[1].t = list[2].t = NULL;
				3433	if (listids != NULL)
				3434	vim_free(listids);
				3435	#undef ADD_POS_NEG_STATE
				3436	#ifdef DEBUG
				3437	fclose(debug);
				3438	#endif
				3439
				3440	return match;
				3441	}
				3442
				3443	/*
				3444	* Try match of "prog" with at regline["col"].
				3445	* Returns 0 for failure, number of lines contained in the match otherwise.
				3446	*/
				3447	static long
				3448	nfa_regtry(start, col)
				3449	nfa_state_T *start;
				3450	colnr_T col;
				3451	{
				3452	int i;
				3453	regsub_T sub, m;
				3454	#ifdef ENABLE_LOG
				3455	FILE *f;
				3456	#endif
				3457
				3458	reginput = regline + col;
				3459	need_clear_subexpr = TRUE;
				3460
				3461	#ifdef ENABLE_LOG
				3462	f = fopen(LOG_NAME, "a");
				3463	if (f != NULL)
				3464	{
				3465	fprintf(f, "\n\n\n\n\n\n\t\t=======================================================\n");
				3466	fprintf(f, " =======================================================\n");
				3467	#ifdef DEBUG
				3468	fprintf(f, "\tRegexp is \"%s\"\n", nfa_regengine.expr);
				3469	#endif
				3470	fprintf(f, "\tInput text is \"%s\" \n", reginput);
				3471	fprintf(f, " =======================================================\n\n\n\n\n\n\n");
				3472	nfa_print_state(f, start, 0);
				3473	fprintf(f, "\n\n");
				3474	fclose(f);
				3475	}
				3476	else
				3477	EMSG(_("Could not open temporary log file for writing "));
				3478	#endif
				3479
				3480	if (REG_MULTI)
				3481	{
				3482	/* Use 0xff to set lnum to -1 */
				3483	vim_memset(sub.startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				3484	vim_memset(sub.endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				3485	vim_memset(m.startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				3486	vim_memset(m.endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
				3487	}
				3488	else
				3489	{
				3490	vim_memset(sub.start, 0, sizeof(char_u ) NSUBEXP);
				3491	vim_memset(sub.end, 0, sizeof(char_u ) NSUBEXP);
				3492	vim_memset(m.start, 0, sizeof(char_u ) NSUBEXP);
				3493	vim_memset(m.end, 0, sizeof(char_u ) NSUBEXP);
				3494	}
				3495
				3496	if (nfa_regmatch(start, &sub, &m) == FALSE)
				3497	return 0;
				3498
				3499	cleanup_subexpr();
				3500	if (REG_MULTI)
				3501	{
				3502	for (i = 0; i < NSUBEXP; i++)
				3503	{
				3504	reg_startpos[i] = sub.startpos[i];
				3505	reg_endpos[i] = sub.endpos[i];
				3506	}
				3507
				3508	if (reg_startpos[0].lnum < 0)
				3509	{
				3510	reg_startpos[0].lnum = 0;
				3511	reg_startpos[0].col = col;
				3512	}
				3513	if (reg_endpos[0].lnum < 0)
				3514	{
				3515	reg_endpos[0].lnum = reglnum;
				3516	reg_endpos[0].col = (int)(reginput - regline);
				3517	}
				3518	else
				3519	/* Use line number of "\ze". */
				3520	reglnum = reg_endpos[0].lnum;
				3521	}
				3522	else
				3523	{
				3524	for (i = 0; i < NSUBEXP; i++)
				3525	{
				3526	reg_startp[i] = sub.start[i];
				3527	reg_endp[i] = sub.end[i];
				3528	}
				3529
				3530	if (reg_startp[0] == NULL)
				3531	reg_startp[0] = regline + col;
				3532	if (reg_endp[0] == NULL)
				3533	reg_endp[0] = reginput;
				3534	}
				3535
				3536	return 1 + reglnum;
				3537	}
				3538
				3539	/*
				3540	* Match a regexp against a string ("line" points to the string) or multiple
				3541	* lines ("line" is NULL, use reg_getline()).
				3542	*
				3543	* Returns 0 for failure, number of lines contained in the match otherwise.
				3544	*/
				3545	static long
				3546	nfa_regexec_both(line, col)
				3547	char_u *line;
				3548	colnr_T col; /* column to start looking for match */
				3549	{
				3550	nfa_regprog_T *prog;
				3551	long retval = 0L;
				3552	int i;
				3553
				3554	if (REG_MULTI)
				3555	{
				3556	prog = (nfa_regprog_T *)reg_mmatch->regprog;
				3557	line = reg_getline((linenr_T)0); /* relative to the cursor */
				3558	reg_startpos = reg_mmatch->startpos;
				3559	reg_endpos = reg_mmatch->endpos;
				3560	}
				3561	else
				3562	{
				3563	prog = (nfa_regprog_T *)reg_match->regprog;
				3564	reg_startp = reg_match->startp;
				3565	reg_endp = reg_match->endp;
				3566	}
				3567
				3568	/* Be paranoid... */
				3569	if (prog == NULL \|\| line == NULL)
				3570	{
				3571	EMSG(_(e_null));
				3572	goto theend;
				3573	}
				3574
				3575	/* If the start column is past the maximum column: no need to try. */
				3576	if (ireg_maxcol > 0 && col >= ireg_maxcol)
				3577	goto theend;
				3578
				3579	/* If pattern contains "\c" or "\C": overrule value of ireg_ic */
				3580	if (prog->regflags & RF_ICASE)
				3581	ireg_ic = TRUE;
				3582	else if (prog->regflags & RF_NOICASE)
				3583	ireg_ic = FALSE;
				3584
				3585	#ifdef FEAT_MBYTE
				3586	/* If pattern contains "\Z" overrule value of ireg_icombine */
				3587	if (prog->regflags & RF_ICOMBINE)
				3588	ireg_icombine = TRUE;
				3589	#endif
				3590
				3591	regline = line;
				3592	reglnum = 0; /* relative to line */
				3593
				3594	nstate = prog->nstate;
				3595
				3596	for (i = 0; i < nstate; ++i)
				3597	{
				3598	prog->state[i].id = i;
				3599	prog->state[i].lastlist = 0;
				3600	prog->state[i].visits = 0;
				3601	prog->state[i].lastthread = NULL;
				3602	}
				3603
				3604	retval = nfa_regtry(prog->start, col);
				3605
				3606	theend:
				3607	return retval;
				3608	}
				3609
				3610	/*
				3611	* Compile a regular expression into internal code for the NFA matcher.
				3612	* Returns the program in allocated space. Returns NULL for an error.
				3613	*/
				3614	static regprog_T *
				3615	nfa_regcomp(expr, re_flags)
				3616	char_u *expr;
				3617	int re_flags;
				3618	{
				3619	nfa_regprog_T *prog;
				3620	int prog_size;
				3621	int *postfix;
				3622
				3623	if (expr == NULL)
				3624	return NULL;
				3625
				3626	#ifdef DEBUG
				3627	nfa_regengine.expr = expr;
				3628	#endif
				3629
				3630	init_class_tab();
				3631
				3632	if (nfa_regcomp_start(expr, re_flags) == FAIL)
				3633	return NULL;
				3634
				3635	/* Space for compiled regexp */
				3636	prog_size = sizeof(nfa_regprog_T) + sizeof(nfa_state_T) * nstate_max;
				3637	prog = (nfa_regprog_T *)lalloc(prog_size, TRUE);
				3638	if (prog == NULL)
				3639	goto fail;
				3640	vim_memset(prog, 0, prog_size);
				3641
				3642	/* Build postfix form of the regexp. Needed to build the NFA
				3643	* (and count its size) */
				3644	postfix = re2post();
				3645	if (postfix == NULL)
				3646	goto fail; /* Cascaded (syntax?) error */
				3647
				3648	/*
				3649	* In order to build the NFA, we parse the input regexp twice:
				3650	* 1. first pass to count size (so we can allocate space)
				3651	* 2. second to emit code
				3652	*/
				3653	#ifdef ENABLE_LOG
				3654	{
				3655	FILE *f = fopen(LOG_NAME, "a");
				3656
				3657	if (f != NULL)
				3658	{
				3659	fprintf(f, "\n*****************************\n\n\n\n\tCompiling regexp \"%s\" ... hold on !\n", expr);
				3660	fclose(f);
				3661	}
				3662	}
				3663	#endif
				3664
				3665	/*
				3666	* PASS 1
				3667	* Count number of NFA states in "nstate". Do not build the NFA.
				3668	*/
				3669	post2nfa(postfix, post_ptr, TRUE);
				3670	state_ptr = prog->state;
				3671
				3672	/*
				3673	* PASS 2
				3674	* Build the NFA
				3675	*/
				3676	prog->start = post2nfa(postfix, post_ptr, FALSE);
				3677	if (prog->start == NULL)
				3678	goto fail;
				3679
				3680	prog->regflags = regflags;
				3681	prog->engine = &nfa_regengine;
				3682	prog->nstate = nstate;
				3683	#ifdef ENABLE_LOG
				3684	nfa_postfix_dump(expr, OK);
				3685	nfa_dump(prog);
				3686	#endif
				3687
				3688	out:
				3689	vim_free(post_start);
				3690	post_start = post_ptr = post_end = NULL;
				3691	state_ptr = NULL;
				3692	return (regprog_T *)prog;
				3693
				3694	fail:
				3695	vim_free(prog);
				3696	prog = NULL;
				3697	#ifdef ENABLE_LOG
				3698	nfa_postfix_dump(expr, FAIL);
				3699	#endif
				3700	#ifdef DEBUG
				3701	nfa_regengine.expr = NULL;
				3702	#endif
				3703	goto out;
				3704	}
				3705
				3706
				3707	/*
				3708	* Match a regexp against a string.
				3709	* "rmp->regprog" is a compiled regexp as returned by nfa_regcomp().
				3710	* Uses curbuf for line count and 'iskeyword'.
				3711	*
				3712	* Return TRUE if there is a match, FALSE if not.
				3713	*/
				3714	static int
				3715	nfa_regexec(rmp, line, col)
				3716	regmatch_T *rmp;
				3717	char_u line; / string to match against */
				3718	colnr_T col; /* column to start looking for match */
				3719	{
				3720	reg_match = rmp;
				3721	reg_mmatch = NULL;
				3722	reg_maxline = 0;
				3723	reg_line_lbr = FALSE;
				3724	reg_buf = curbuf;
				3725	reg_win = NULL;
				3726	ireg_ic = rmp->rm_ic;
				3727	#ifdef FEAT_MBYTE
				3728	ireg_icombine = FALSE;
				3729	#endif
				3730	ireg_maxcol = 0;
				3731	return (nfa_regexec_both(line, col) != 0);
				3732	}
				3733
				3734	#if defined(FEAT_MODIFY_FNAME) \|\| defined(FEAT_EVAL) \
				3735	\|\| defined(FIND_REPLACE_DIALOG) \|\| defined(PROTO)
				3736
				3737	static int nfa_regexec_nl __ARGS((regmatch_T rmp, char_u line, colnr_T col));
				3738
				3739	/*
				3740	* Like nfa_regexec(), but consider a "\n" in "line" to be a line break.
				3741	*/
				3742	static int
				3743	nfa_regexec_nl(rmp, line, col)
				3744	regmatch_T *rmp;
				3745	char_u line; / string to match against */
				3746	colnr_T col; /* column to start looking for match */
				3747	{
				3748	reg_match = rmp;
				3749	reg_mmatch = NULL;
				3750	reg_maxline = 0;
				3751	reg_line_lbr = TRUE;
				3752	reg_buf = curbuf;
				3753	reg_win = NULL;
				3754	ireg_ic = rmp->rm_ic;
				3755	#ifdef FEAT_MBYTE
				3756	ireg_icombine = FALSE;
				3757	#endif
				3758	ireg_maxcol = 0;
				3759	return (nfa_regexec_both(line, col) != 0);
				3760	}
				3761	#endif
				3762
				3763
				3764	/*
				3765	* Match a regexp against multiple lines.
				3766	* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
				3767	* Uses curbuf for line count and 'iskeyword'.
				3768	*
				3769	* Return zero if there is no match. Return number of lines contained in the
				3770	* match otherwise.
				3771	*
				3772	* Note: the body is the same as bt_regexec() except for nfa_regexec_both()
				3773	*
				3774	* ! Also NOTE : match may actually be in another line. e.g.:
				3775	* when r.e. is \nc, cursor is at 'a' and the text buffer looks like
				3776	*
				3777	* +-------------------------+
				3778	* \|a \|
				3779	* \|b \|
				3780	* \|c \|
				3781	* \| \|
				3782	* +-------------------------+
				3783	*
				3784	* then nfa_regexec_multi() returns 3. while the original
				3785	* vim_regexec_multi() returns 0 and a second call at line 2 will return 2.
				3786	*
				3787	* FIXME if this behavior is not compatible.
				3788	*/
				3789	static long
				3790	nfa_regexec_multi(rmp, win, buf, lnum, col, tm)
				3791	regmmatch_T *rmp;
				3792	win_T win; / window in which to search or NULL */
				3793	buf_T buf; / buffer in which to search */
				3794	linenr_T lnum; /* nr of line to start looking for match */
				3795	colnr_T col; /* column to start looking for match */
				3796	proftime_T tm UNUSED; / timeout limit or NULL */
				3797	{
				3798	long r;
				3799	buf_T *save_curbuf = curbuf;
				3800
				3801	reg_match = NULL;
				3802	reg_mmatch = rmp;
				3803	reg_buf = buf;
				3804	reg_win = win;
				3805	reg_firstlnum = lnum;
				3806	reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
				3807	reg_line_lbr = FALSE;
				3808	ireg_ic = rmp->rmm_ic;
				3809	#ifdef FEAT_MBYTE
				3810	ireg_icombine = FALSE;
				3811	#endif
				3812	ireg_maxcol = rmp->rmm_maxcol;
				3813
				3814	/* Need to switch to buffer "buf" to make vim_iswordc() work. */
				3815	curbuf = buf;
				3816	r = nfa_regexec_both(NULL, col);
				3817	curbuf = save_curbuf;
				3818
				3819	return r;
				3820	}
				3821
				3822	#ifdef DEBUG
				3823	# undef ENABLE_LOG
				3824	#endif