patch 9.0.1771: regex: combining chars in collections not handled
Problem: regex: combining chars in collections not handled
Solution: Check for following combining characters for NFA and BT engine
closes: #10459
closes: #10286
Signed-off-by: Christian Brabandt <cb@256bit.org>
diff --git a/src/regexp_bt.c b/src/regexp_bt.c
index 522cf37..198946e 100644
--- a/src/regexp_bt.c
+++ b/src/regexp_bt.c
@@ -3743,13 +3743,38 @@
case ANYOF:
case ANYBUT:
- if (c == NUL)
- status = RA_NOMATCH;
- else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
- status = RA_NOMATCH;
- else
- ADVANCE_REGINPUT();
- break;
+ {
+ char_u *q = OPERAND(scan);
+
+ if (c == NUL)
+ status = RA_NOMATCH;
+ else if ((cstrchr(q, c) == NULL) == (op == ANYOF))
+ status = RA_NOMATCH;
+ else
+ {
+ // Check following combining characters
+ int len = 0;
+ int i;
+
+ if (enc_utf8)
+ len = utfc_ptr2len(q) - utf_ptr2len(q);
+
+ MB_CPTR_ADV(rex.input);
+ MB_CPTR_ADV(q);
+
+ if (!enc_utf8 || len == 0)
+ break;
+
+ for (i = 0; i < len; ++i)
+ if (q[i] != rex.input[i])
+ {
+ status = RA_NOMATCH;
+ break;
+ }
+ rex.input += len;
+ }
+ break;
+ }
case MULTIBYTECODE:
if (has_mbyte)