patch 9.0.0951: trying every character position for a match is inefficient
Problem: Trying every character position for a match is inefficient.
Solution: Use the start position of the match ignoring "\zs".
diff --git a/src/regexp.c b/src/regexp.c
index 74ed13f..af8b375 100644
--- a/src/regexp.c
+++ b/src/regexp.c
@@ -1123,10 +1123,12 @@
typedef struct {
regmatch_T *reg_match;
regmmatch_T *reg_mmatch;
+
char_u **reg_startp;
char_u **reg_endp;
lpos_T *reg_startpos;
lpos_T *reg_endpos;
+
win_T *reg_win;
buf_T *reg_buf;
linenr_T reg_firstlnum;
diff --git a/src/regexp.h b/src/regexp.h
index 89c41c9..d6c8f48 100644
--- a/src/regexp.h
+++ b/src/regexp.h
@@ -133,6 +133,8 @@
regprog_T *regprog;
char_u *startp[NSUBEXP];
char_u *endp[NSUBEXP];
+
+ colnr_T rm_matchcol; // match start without "\zs"
int rm_ic;
} regmatch_T;
@@ -149,6 +151,8 @@
regprog_T *regprog;
lpos_T startpos[NSUBEXP];
lpos_T endpos[NSUBEXP];
+
+ colnr_T rmm_matchcol; // match start without "\zs"
int rmm_ic;
colnr_T rmm_maxcol; // when not zero: maximum column
} regmmatch_T;
diff --git a/src/regexp_bt.c b/src/regexp_bt.c
index 5206567..42b6ac2 100644
--- a/src/regexp_bt.c
+++ b/src/regexp_bt.c
@@ -4842,11 +4842,12 @@
static long
bt_regexec_both(
char_u *line,
- colnr_T col, // column to start looking for match
+ colnr_T startcol, // column to start looking for match
int *timed_out) // flag set on timeout or NULL
{
bt_regprog_T *prog;
char_u *s;
+ colnr_T col = startcol;
long retval = 0L;
// Create "regstack" and "backpos" if they are not allocated yet.
@@ -5042,11 +5043,19 @@
if (end->lnum < start->lnum
|| (end->lnum == start->lnum && end->col < start->col))
rex.reg_mmatch->endpos[0] = rex.reg_mmatch->startpos[0];
+
+ // startpos[0] may be set by "\zs", also return the column where
+ // the whole pattern matched.
+ rex.reg_mmatch->rmm_matchcol = col;
}
else
{
if (rex.reg_match->endp[0] < rex.reg_match->startp[0])
rex.reg_match->endp[0] = rex.reg_match->startp[0];
+
+ // startpos[0] may be set by "\zs", also return the column where
+ // the whole pattern matched.
+ rex.reg_match->rm_matchcol = col;
}
}
diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c
index 0228cfc..1702510 100644
--- a/src/regexp_nfa.c
+++ b/src/regexp_nfa.c
@@ -7378,7 +7378,14 @@
// If match_text is set it contains the full text that must match.
// Nothing else to try. Doesn't handle combining chars well.
if (prog->match_text != NULL && !rex.reg_icombine)
- return find_match_text(col, prog->regstart, prog->match_text);
+ {
+ retval = find_match_text(col, prog->regstart, prog->match_text);
+ if (REG_MULTI)
+ rex.reg_mmatch->rmm_matchcol = col;
+ else
+ rex.reg_match->rm_matchcol = col;
+ return retval;
+ }
}
// If the start column is past the maximum column: no need to try.
@@ -7414,11 +7421,19 @@
if (end->lnum < start->lnum
|| (end->lnum == start->lnum && end->col < start->col))
rex.reg_mmatch->endpos[0] = rex.reg_mmatch->startpos[0];
+
+ // startpos[0] may be set by "\zs", also return the column where
+ // the whole pattern matched.
+ rex.reg_mmatch->rmm_matchcol = col;
}
else
{
if (rex.reg_match->endp[0] < rex.reg_match->startp[0])
rex.reg_match->endp[0] = rex.reg_match->startp[0];
+
+ // startpos[0] may be set by "\zs", also return the column where
+ // the whole pattern matched.
+ rex.reg_match->rm_matchcol = col;
}
}
diff --git a/src/version.c b/src/version.c
index 9566edd..829e71d 100644
--- a/src/version.c
+++ b/src/version.c
@@ -696,6 +696,8 @@
static int included_patches[] =
{ /* Add new patch number below this line */
/**/
+ 951,
+/**/
950,
/**/
949,