patch 8.2.2278: falling back to old regexp engine can some patterns
Problem: Falling back to old regexp engine can some patterns.
Solution: Do not fall back once [[:lower:]] or [[:upper:]] is used.
(Christian Brabandt, closes #7572)
diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c
index ad47142..064d90a 100644
--- a/src/regexp_nfa.c
+++ b/src/regexp_nfa.c
@@ -253,6 +253,12 @@
static int *post_start; // holds the postfix form of r.e.
static int *post_end;
static int *post_ptr;
+
+// Set when the pattern should use the NFA engine.
+// E.g. [[:upper:]] only allows 8bit characters for BT engine,
+// while NFA engine handles multibyte characters correctly.
+static int wants_nfa;
+
static int nstate; // Number of states in the NFA.
static int istate; // Index in the state vector, used in alloc_state()
@@ -306,6 +312,7 @@
return FAIL;
post_ptr = post_start;
post_end = post_start + nstate_max;
+ wants_nfa = FALSE;
rex.nfa_has_zend = FALSE;
rex.nfa_has_backref = FALSE;
@@ -1707,6 +1714,7 @@
EMIT(NFA_CLASS_GRAPH);
break;
case CLASS_LOWER:
+ wants_nfa = TRUE;
EMIT(NFA_CLASS_LOWER);
break;
case CLASS_PRINT:
@@ -1719,6 +1727,7 @@
EMIT(NFA_CLASS_SPACE);
break;
case CLASS_UPPER:
+ wants_nfa = TRUE;
EMIT(NFA_CLASS_UPPER);
break;
case CLASS_XDIGIT:
@@ -2137,9 +2146,15 @@
// The engine is very inefficient (uses too many states) when the
// maximum is much larger than the minimum and when the maximum is
- // large. Bail out if we can use the other engine.
+ // large. However, when maxval is MAX_LIMIT, it is okay, as this
+ // will emit NFA_STAR.
+ // Bail out if we can use the other engine, but only, when the
+ // pattern does not need the NFA engine like (e.g. [[:upper:]]\{2,\}
+ // does not work with with characters > 8 bit with the BT engine)
if ((nfa_re_flags & RE_AUTO)
- && (maxval > 500 || maxval > minval + 200))
+ && (maxval > 500 || maxval > minval + 200)
+ && (maxval != MAX_LIMIT && minval < 200)
+ && !wants_nfa)
return FAIL;
// Ignore previous call to nfa_regatom()