patch 9.1.0297: Patch 9.1.0296 causes too many issues Problem: Patch 9.1.0296 causes too many issues (Tony Mechelynck, @chdiza, CI) Solution: Back out the change for now Revert "patch 9.1.0296: regexp: engines do not handle case-folding well" This reverts commit 7a27c108e0509f3255ebdcb6558e896c223e4d23 it causes issues with syntax highlighting and breaks the FreeBSD and MacOS CI. It needs more work. fixes: #14487 Signed-off-by: Christian Brabandt <cb@256bit.org>

commit: c97f4d61cde24030f2f7d2318e1b409a0ccc3e43 [log] [tgz]
author: Christian Brabandt <cb@256bit.org> Wed Apr 10 16:18:15 2024 +0200
committer: Christian Brabandt <cb@256bit.org> Wed Apr 10 16:22:17 2024 +0200
tree: f656142296f4b44236674cdb6258d96acab0a6ee
parent: 49f1e1979f9c3a4d7b28f0961bca0e41227a0557 [diff]
diff --git a/src/mbyte.c b/src/mbyte.c
index 3be7509..d6fb7ec 100644
--- a/src/mbyte.c
+++ b/src/mbyte.c

@@ -3801,15 +3801,6 @@
  * two characters otherwise.
  */
     int
-mb_strnicmp2(char_u *s1, char_u *s2, int n1, int n2)
-{
-    if (n1 == n2 || !enc_utf8)
-	return mb_strnicmp(s1, s2, n1);
-    else
-	return utf_strnicmp(s1, s2, n1, n2);
-}
-
-    int
 mb_strnicmp(char_u *s1, char_u *s2, size_t nn)
 {
     int		i, l;

diff --git a/src/proto/mbyte.pro b/src/proto/mbyte.pro
index c49f7e7..7883b3b 100644
--- a/src/proto/mbyte.pro
+++ b/src/proto/mbyte.pro

@@ -48,7 +48,6 @@
 int utf_tolower(int a);
 int utf_isupper(int a);
 int mb_strnicmp(char_u *s1, char_u *s2, size_t nn);
-int mb_strnicmp2(char_u *s1, char_u *s2, int n1, int n2);
 void show_utf8(void);
 int latin_head_off(char_u *base, char_u *p);
 int dbcs_screen_head_off(char_u *base, char_u *p);

diff --git a/src/regexp.c b/src/regexp.c
index 4e85ebc..4373ae0 100644
--- a/src/regexp.c
+++ b/src/regexp.c

@@ -1606,9 +1606,7 @@
 /*
  * Compare two strings, ignore case if rex.reg_ic set.
  * Return 0 if strings match, non-zero otherwise.
- * Correct the length "*n" when composing characters are ignored
- * or for utf8 when both utf codepoints are considered equal because of
- * case-folding but have different length (e.g. 's' and 'ſ')
+ * Correct the length "*n" when composing characters are ignored.
  */
     static int
 cstrncmp(char_u *s1, char_u *s2, int *n)
@@ -1617,13 +1615,6 @@
 
     if (!rex.reg_ic)
 	result = STRNCMP(s1, s2, *n);
-    else if (enc_utf8)
-    {
-	int l2 = mb_ptr2len(s2);
-	result = MB_STRNICMP2(s1, s2, *n, l2);
-	if (result == 0 && l2 < *n)
-	    *n = l2;
-    }
     else
 	result = MB_STRNICMP(s1, s2, *n);
 

diff --git a/src/regexp_bt.c b/src/regexp_bt.c
index 2a03fec..5d9450d 100644
--- a/src/regexp_bt.c
+++ b/src/regexp_bt.c

@@ -3816,14 +3816,6 @@
 			}
 		    }
 		}
-		else if (enc_utf8)
-		{
-		    if (cstrncmp(opnd, rex.input, &len) != 0)
-		    {
-			status = RA_NOMATCH;
-			break;
-		    }
-		}
 		else
 		    for (i = 0; i < len; ++i)
 			if (opnd[i] != rex.input[i])

diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c
index 451720a..5e4fadd 100644
--- a/src/regexp_nfa.c
+++ b/src/regexp_nfa.c

@@ -5666,12 +5666,7 @@
     for (;;)
     {
 	match = TRUE;
-	// skip regstart
-	len2 = MB_CHAR2LEN(regstart);
-	if (enc_utf8 && len2 > 1 && MB_CHAR2LEN(PTR2CHAR(rex.line + col)) != len2)
-	    // because of case-folding of the previously matched text, we may need
-	    // to skip fewer bytes than mb_char2len(regstart)
-	    len2 = mb_char2len(utf_fold(regstart));
+	len2 = MB_CHAR2LEN(regstart); // skip regstart
 	for (len1 = 0; match_text[len1] != NUL; len1 += MB_CHAR2LEN(c1))
 	{
 	    c1 = PTR2CHAR(match_text + len1);
@@ -7508,7 +7503,7 @@
 
 	// If match_text is set it contains the full text that must match.
 	// Nothing else to try. Doesn't handle combining chars well.
-	if (prog->match_text != NULL && *prog->match_text != NUL && !rex.reg_icombine)
+	if (prog->match_text != NULL && !rex.reg_icombine)
 	{
 	    retval = find_match_text(&col, prog->regstart, prog->match_text);
 	    if (REG_MULTI)

diff --git a/src/testdir/test_regexp_utf8.vim b/src/testdir/test_regexp_utf8.vim
index 9980e5b..6669dee 100644
--- a/src/testdir/test_regexp_utf8.vim
+++ b/src/testdir/test_regexp_utf8.vim

@@ -587,32 +587,4 @@
   bw!
 endfunc
 
-func Test_search_multibyte_match_ascii()
-  new
-  " Match single 'ſ' and 's'
-  call setline(1,  'das abc heraus abc ſich abc ſind')
-  for i in range(0, 2)
-    exe "set re="..i
-    let ic_match = matchbufline('%', '\c\%u17f', 1, '$')->mapnew({idx, val -> val.text})
-    let noic_match = matchbufline('%', '\C\%u17f', 1, '$')->mapnew({idx, val -> val.text})
-    call assert_equal(['s', 's', 'ſ','ſ'], ic_match, "Ignorecase Regex-engine: " .. &re)
-    call assert_equal(['ſ','ſ'], noic_match, "No-Ignorecase Regex-engine: " .. &re)
-  endfor
-  " Match several 'ſſ' and 'ss'
-  call setline(1,  'das abc herauss abc ſſich abc ſind')
-  for i in range(0, 2)
-    exe "set re="..i
-    let ic_match = matchbufline('%', '\c\%u17f\%u17f', 1, '$')->mapnew({idx, val -> val.text})
-    let noic_match = matchbufline('%', '\C\%u17f\%u17f', 1, '$')->mapnew({idx, val -> val.text})
-    let ic_match2 = matchbufline('%', '\c\%u17f\+', 1, '$')->mapnew({idx, val -> val.text})
-    let noic_match2 = matchbufline('%', '\C\%u17f\+', 1, '$')->mapnew({idx, val -> val.text})
-
-    call assert_equal(['ss', 'ſſ'], ic_match, "Ignorecase Regex-engine: " .. &re)
-    call assert_equal(['ſſ'], noic_match, "No-Ignorecase Regex-engine: " .. &re)
-    call assert_equal(['s', 'ss', 'ſſ', 'ſ'], ic_match2, "Ignorecase Regex-engine: " .. &re)
-    call assert_equal(['ſſ','ſ'], noic_match2, "No-Ignorecase Regex-engine: " .. &re)
-  endfor
-  bw!
-endfunc
-
 " vim: shiftwidth=2 sts=2 expandtab

diff --git a/src/version.c b/src/version.c
index c63b141..5bf9581 100644
--- a/src/version.c
+++ b/src/version.c

@@ -705,6 +705,8 @@
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    297,
+/**/
     296,
 /**/
     295,

diff --git a/src/vim.h b/src/vim.h
index 33d5920..4507674 100644
--- a/src/vim.h
+++ b/src/vim.h

@@ -1751,7 +1751,6 @@
 
 # define MB_STRICMP(d, s)	mb_strnicmp((char_u *)(d), (char_u *)(s), (int)MAXCOL)
 # define MB_STRNICMP(d, s, n)	mb_strnicmp((char_u *)(d), (char_u *)(s), (int)(n))
-# define MB_STRNICMP2(d, s, n1, n2)	mb_strnicmp2((char_u *)(d), (char_u *)(s), (int)(n1), (int)(n2))
 
 #define STRCAT(d, s)	    strcat((char *)(d), (char *)(s))
 #define STRNCAT(d, s, n)    strncat((char *)(d), (char *)(s), (size_t)(n))
commit	c97f4d61cde24030f2f7d2318e1b409a0ccc3e43	[log] [tgz]
author	Christian Brabandt <cb@256bit.org>	Wed Apr 10 16:18:15 2024 +0200
committer	Christian Brabandt <cb@256bit.org>	Wed Apr 10 16:22:17 2024 +0200
tree	f656142296f4b44236674cdb6258d96acab0a6ee
parent	49f1e1979f9c3a4d7b28f0961bca0e41227a0557 [diff]