patch 9.1.0352: Finding cmd modifiers and cmdline-specials is inefficient

Problem:  Finding cmd modifiers and cmdline-specials is inefficient
Solution: Use binary search to find ex command modifiers and
          cmdline-special characters and reduce the number of strlen()
          (John Marriott)

closes: #14534

Signed-off-by: John Marriott <basilisk@internode.on.net>
Signed-off-by: Christian Brabandt <cb@256bit.org>
diff --git a/src/ex_docmd.c b/src/ex_docmd.c
index cc42e52..a588f26 100644
--- a/src/ex_docmd.c
+++ b/src/ex_docmd.c
@@ -19,6 +19,10 @@
 # define ex_hardcopy	ex_ni
 #endif
 
+#if defined(FEAT_EVAL) || defined(PROTO)
+static int cmp_cmdmod_info(const void *a, const void *b);
+#endif
+
 #ifdef FEAT_EVAL
 static char_u	*do_one_cmd(char_u **, int, cstack_T *, char_u *(*fgetline)(int, void *, int, getline_opt_T), void *cookie);
 #else
@@ -82,7 +86,7 @@
 #ifdef FEAT_QUICKFIX
 static char_u	*replace_makeprg(exarg_T *eap, char_u *p, char_u **cmdlinep);
 #endif
-static char_u	*repl_cmdline(exarg_T *eap, char_u *src, int srclen, char_u *repl, char_u **cmdlinep);
+static char_u	*repl_cmdline(exarg_T *eap, char_u *src, size_t srclen, char_u *repl, char_u **cmdlinep);
 static void	ex_highlight(exarg_T *eap);
 static void	ex_colorscheme(exarg_T *eap);
 static void	ex_cquit(exarg_T *eap);
@@ -2945,7 +2949,7 @@
 
 	switch (*p)
 	{
-	    // When adding an entry, also modify cmdmods[].
+	    // When adding an entry, also modify cmdmod_info_tab[].
 	    case 'a':	if (!checkforcmd_noparen(&eap->cmd, "aboveleft", 3))
 			    break;
 			cmod->cmod_split |= WSP_ABOVE;
@@ -3955,7 +3959,7 @@
 	    if (eap->cmdidx == CMD_mode || eap->cmdidx == CMD_Print)
 		eap->cmdidx = CMD_SIZE;
 	    else if ((cmdnames[eap->cmdidx].cmd_argt & EX_WHOLE)
-			  && len < (int)STRLEN(cmdnames[eap->cmdidx].cmd_name))
+			  && len < (int)cmdnames[eap->cmdidx].cmd_namelen)
 	    {
 		semsg(_(e_command_cannot_be_shortened_str), eap->cmd);
 		eap->cmdidx = CMD_SIZE;
@@ -4004,12 +4008,14 @@
 }
 
 #if defined(FEAT_EVAL) || defined(PROTO)
-static struct cmdmod
+typedef struct
 {
     char	*name;
     int		minlen;
     int		has_count;  // :123verbose  :3tab
-} cmdmods[] = {
+} cmdmod_info_T;
+
+static cmdmod_info_T cmdmod_info_tab[] = {
     {"aboveleft", 3, FALSE},
     {"belowright", 3, FALSE},
     {"botright", 2, FALSE},
@@ -4035,8 +4041,18 @@
     {"unsilent", 3, FALSE},
     {"verbose", 4, TRUE},
     {"vertical", 4, FALSE},
-    {"vim9cmd", 4, FALSE},
-};
+    {"vim9cmd", 4, FALSE}
+};	// cmdmod_info_tab
+
+// compare two cmdmod_info_T structs by case sensitive name with length
+    static int
+cmp_cmdmod_info(const void *a, const void *b)
+{
+    cmdmod_info_T *cm1 = (cmdmod_info_T *)a;
+    cmdmod_info_T *cm2 = (cmdmod_info_T *)b;
+
+    return STRNCMP(cm1->name, cm2->name, cm2->minlen);
+}
 
 /*
  * Return length of a command modifier (including optional count).
@@ -4045,20 +4061,36 @@
     int
 modifier_len(char_u *cmd)
 {
-    int		i, j;
     char_u	*p = cmd;
+    cmdmod_info_T	target;
+    cmdmod_info_T	*entry;
 
     if (VIM_ISDIGIT(*cmd))
 	p = skipwhite(skipdigits(cmd + 1));
-    for (i = 0; i < (int)ARRAY_LENGTH(cmdmods); ++i)
+
+    // only lowercase characters can match
+    if (!ASCII_ISLOWER(*p))
+	return 0;
+
+    target.name = (char *)p;
+    target.minlen = 0;		// not used, see cmp_cmdmod_info()
+    target.has_count = FALSE;
+
+    entry = (cmdmod_info_T *)bsearch(&target, &cmdmod_info_tab, ARRAY_LENGTH(cmdmod_info_tab), sizeof(cmdmod_info_tab[0]), cmp_cmdmod_info);
+    if (entry != NULL)
     {
-	for (j = 0; p[j] != NUL; ++j)
-	    if (p[j] != cmdmods[i].name[j])
+	int i;
+
+	for (i = entry->minlen; p[i] != NUL; ++i)
+	{
+	    if (p[i] != entry->name[i])
 		break;
-	if (!ASCII_ISALPHA(p[j]) && j >= cmdmods[i].minlen
-					&& (p == cmd || cmdmods[i].has_count))
-	    return j + (int)(p - cmd);
+	}
+
+	if (!ASCII_ISALPHA(p[i]) && i >= entry->minlen && (p == cmd || entry->has_count))
+	    return i + (int)(p - cmd);
     }
+
     return 0;
 }
 
@@ -4072,18 +4104,33 @@
 {
     exarg_T	ea;
     int		full = FALSE;
-    int		i;
-    int		j;
     char_u	*p;
 
-    // Check command modifiers.
-    for (i = 0; i < (int)ARRAY_LENGTH(cmdmods); ++i)
+    // only lowercase characters can match
+    if (ASCII_ISLOWER(*name))
     {
-	for (j = 0; name[j] != NUL; ++j)
-	    if (name[j] != cmdmods[i].name[j])
-		break;
-	if (name[j] == NUL && j >= cmdmods[i].minlen)
-	    return (cmdmods[i].name[j] == NUL ? 2 : 1);
+	cmdmod_info_T	target;
+	cmdmod_info_T	*entry;
+
+	target.name = (char *)name;
+	target.minlen = 0;		// not used, see cmp_cmdmod_info()
+	target.has_count = FALSE;
+
+	// Check command modifiers.
+	entry = (cmdmod_info_T *)bsearch(&target, &cmdmod_info_tab, ARRAY_LENGTH(cmdmod_info_tab), sizeof(cmdmod_info_tab[0]), cmp_cmdmod_info);
+	if (entry != NULL)
+	{
+	    int i;
+
+	    for (i = entry->minlen; name[i] != NUL; ++i)
+	    {
+		if (name[i] != entry->name[i])
+		    break;
+	    }
+
+	    if (name[i] == NUL && i >= entry->minlen)
+		return (entry->name[i] == NUL ? 2 : 1);
+	}
     }
 
     // Check built-in commands and user defined commands.
@@ -4997,12 +5044,14 @@
 	}
 	else
 	{
-	    new_cmdline = alloc(STRLEN(program) + STRLEN(p) + 2);
+	    size_t program_len = STRLEN(program);
+
+	    new_cmdline = alloc(program_len + STRLEN(p) + 2);
 	    if (new_cmdline == NULL)
 		return NULL;			// out of memory
 	    STRCPY(new_cmdline, program);
-	    STRCAT(new_cmdline, " ");
-	    STRCAT(new_cmdline, p);
+	    STRCPY(new_cmdline + program_len, " ");
+	    STRCPY(new_cmdline + program_len + 1, p);
 	}
 	msg_make(p);
 
@@ -5152,7 +5201,7 @@
 	    }
 	}
 
-	p = repl_cmdline(eap, p, srclen, repl, cmdlinep);
+	p = repl_cmdline(eap, p, (size_t)srclen, repl, cmdlinep);
 	vim_free(repl);
 	if (p == NULL)
 	    return FAIL;
@@ -5223,7 +5272,7 @@
 		}
 		if (p != NULL)
 		{
-		    (void)repl_cmdline(eap, eap->arg, (int)STRLEN(eap->arg),
+		    (void)repl_cmdline(eap, eap->arg, STRLEN(eap->arg),
 								 p, cmdlinep);
 		    if (n == 2)	// p came from ExpandOne()
 			vim_free(p);
@@ -5246,23 +5295,26 @@
 repl_cmdline(
     exarg_T	*eap,
     char_u	*src,
-    int		srclen,
+    size_t	srclen,
     char_u	*repl,
     char_u	**cmdlinep)
 {
-    int		len;
-    int		i;
+    size_t	repllen;
+    size_t	taillen;
+    size_t	i;
     char_u	*new_cmdline;
+    size_t	new_cmdlinelen;
 
     /*
      * The new command line is build in new_cmdline[].
      * First allocate it.
      * Careful: a "+cmd" argument may have been NUL terminated.
      */
-    len = (int)STRLEN(repl);
-    i = (int)(src - *cmdlinep) + (int)STRLEN(src + srclen) + len + 3;
+    repllen = STRLEN(repl);
+    taillen = STRLEN(src + srclen);
+    i = (src - *cmdlinep) + repllen + taillen + 3;
     if (eap->nextcmd != NULL)
-	i += (int)STRLEN(eap->nextcmd);// add space for next command
+	i += STRLEN(eap->nextcmd);	// add space for next command
     if ((new_cmdline = alloc(i)) == NULL)
 	return NULL;			// out of memory!
 
@@ -5272,19 +5324,19 @@
      * Copy what came after the expanded part.
      * Copy the next commands, if there are any.
      */
-    i = (int)(src - *cmdlinep);	// length of part before match
-    mch_memmove(new_cmdline, *cmdlinep, (size_t)i);
+    new_cmdlinelen = src - *cmdlinep;	// length of part before replacement
+    mch_memmove(new_cmdline, *cmdlinep, new_cmdlinelen);
 
-    mch_memmove(new_cmdline + i, repl, (size_t)len);
-    i += len;				// remember the end of the string
-    STRCPY(new_cmdline + i, src + srclen);
-    src = new_cmdline + i;		// remember where to continue
+    mch_memmove(new_cmdline + new_cmdlinelen, repl, repllen);
+    new_cmdlinelen += repllen;		// remember the end of the string
+    STRCPY(new_cmdline + new_cmdlinelen, src + srclen);
+    src = new_cmdline + new_cmdlinelen;	// remember where to continue
 
     if (eap->nextcmd != NULL)		// append next command
     {
-	i = (int)STRLEN(new_cmdline) + 1;
-	STRCPY(new_cmdline + i, eap->nextcmd);
-	eap->nextcmd = new_cmdline + i;
+	new_cmdlinelen += taillen + 1;
+	STRCPY(new_cmdline + new_cmdlinelen, eap->nextcmd);
+	eap->nextcmd = new_cmdline + new_cmdlinelen;
     }
     eap->cmd = new_cmdline + (eap->cmd - *cmdlinep);
     eap->arg = new_cmdline + (eap->arg - *cmdlinep);
@@ -5455,9 +5507,10 @@
 	"drop",
     };
 
-    if (idx < (int)ARRAY_LENGTH(p_bad_values))
-	return (char_u*)p_bad_values[idx];
-    return NULL;
+    if (idx < 0 || idx >= (int)ARRAY_LENGTH(p_bad_values))
+	return NULL;
+
+    return (char_u*)p_bad_values[idx];
 }
 
 /*
@@ -5571,9 +5624,10 @@
 	"edit",
     };
 
-    if (idx < (int)ARRAY_LENGTH(p_opt_values))
-	return (char_u*)p_opt_values[idx];
-    return NULL;
+    if (idx < 0 || idx >= (int)ARRAY_LENGTH(p_opt_values))
+	return NULL;
+
+    return (char_u*)p_opt_values[idx];
 }
 
 /*
@@ -9282,6 +9336,27 @@
 							  eap->forceit, TRUE);
 }
 
+enum {
+    SPEC_PERC = 0,
+    SPEC_HASH,
+    SPEC_CWORD,	    // cursor word
+    SPEC_CCWORD,    // cursor WORD
+    SPEC_CEXPR,	    // expr under cursor
+    SPEC_CFILE,	    // cursor path name
+    SPEC_SFILE,	    // ":so" file name
+    SPEC_SLNUM,	    // ":so" file line number
+    SPEC_STACK,	    // call stack
+    SPEC_SCRIPT,    // script file name
+    SPEC_AFILE,	    // autocommand file name
+    SPEC_ABUF,	    // autocommand buffer number
+    SPEC_AMATCH,    // autocommand match name
+    SPEC_SFLNUM,    // script file line number
+    SPEC_SID	    // script ID: <SNR>123_
+#ifdef FEAT_CLIENTSERVER
+    , SPEC_CLIENT
+#endif
+};
+
 /*
  * Check "str" for starting with a special cmdline variable.
  * If found return one of the SPEC_ values and set "*usedlen" to the length of
@@ -9290,54 +9365,54 @@
     int
 find_cmdline_var(char_u *src, int *usedlen)
 {
-    int		len;
-    int		i;
-    static char *(spec_str[]) = {
-		    "%",
-#define SPEC_PERC   0
-		    "#",
-#define SPEC_HASH   (SPEC_PERC + 1)
-		    "<cword>",		// cursor word
-#define SPEC_CWORD  (SPEC_HASH + 1)
-		    "<cWORD>",		// cursor WORD
-#define SPEC_CCWORD (SPEC_CWORD + 1)
-		    "<cexpr>",		// expr under cursor
-#define SPEC_CEXPR  (SPEC_CCWORD + 1)
-		    "<cfile>",		// cursor path name
-#define SPEC_CFILE  (SPEC_CEXPR + 1)
-		    "<sfile>",		// ":so" file name
-#define SPEC_SFILE  (SPEC_CFILE + 1)
-		    "<slnum>",		// ":so" file line number
-#define SPEC_SLNUM  (SPEC_SFILE + 1)
-		    "<stack>",		// call stack
-#define SPEC_STACK  (SPEC_SLNUM + 1)
-		    "<script>",		// script file name
-#define SPEC_SCRIPT (SPEC_STACK + 1)
-		    "<afile>",		// autocommand file name
-#define SPEC_AFILE  (SPEC_SCRIPT + 1)
-		    "<abuf>",		// autocommand buffer number
-#define SPEC_ABUF   (SPEC_AFILE + 1)
-		    "<amatch>",		// autocommand match name
-#define SPEC_AMATCH (SPEC_ABUF + 1)
-		    "<sflnum>",		// script file line number
-#define SPEC_SFLNUM  (SPEC_AMATCH + 1)
-		    "<SID>",		// script ID: <SNR>123_
-#define SPEC_SID  (SPEC_SFLNUM + 1)
+    // must be sorted by the 'value' field because it is used by bsearch()!
+    static keyvalue_T spec_str_tab[] = {
+	KEYVALUE_ENTRY(SPEC_SID, "SID>"),	    // script ID: <SNR>123_
+	KEYVALUE_ENTRY(SPEC_ABUF, "abuf>"),	    // autocommand buffer number
+	KEYVALUE_ENTRY(SPEC_AFILE, "afile>"),	    // autocommand file name
+	KEYVALUE_ENTRY(SPEC_AMATCH, "amatch>"),	    // autocommand match name
+	KEYVALUE_ENTRY(SPEC_CCWORD, "cWORD>"),	    // cursor WORD
+	KEYVALUE_ENTRY(SPEC_CEXPR, "cexpr>"),	    // expr under cursor
+	KEYVALUE_ENTRY(SPEC_CFILE, "cfile>"),	    // cursor path name
 #ifdef FEAT_CLIENTSERVER
-		    "<client>"
-# define SPEC_CLIENT (SPEC_SID + 1)
+	KEYVALUE_ENTRY(SPEC_CLIENT, "client>"),
 #endif
+	KEYVALUE_ENTRY(SPEC_CWORD, "cword>"),	    // cursor word
+	KEYVALUE_ENTRY(SPEC_SCRIPT, "script>"),	    // script file name
+	KEYVALUE_ENTRY(SPEC_SFILE, "sfile>"),	    // ":so" file name
+	KEYVALUE_ENTRY(SPEC_SFLNUM, "sflnum>"),	    // script file line number
+	KEYVALUE_ENTRY(SPEC_SLNUM, "slnum>"),	    // ":so" file line number
+	KEYVALUE_ENTRY(SPEC_STACK, "stack>")	    // call stack
     };
+    keyvalue_T target;
+    keyvalue_T *entry;
 
-    for (i = 0; i < (int)ARRAY_LENGTH(spec_str); ++i)
+    switch (*src)
     {
-	len = (int)STRLEN(spec_str[i]);
-	if (STRNCMP(src, spec_str[i], len) == 0)
-	{
-	    *usedlen = len;
-	    return i;
-	}
+    case '%':
+	*usedlen = 1;
+	return SPEC_PERC;
+
+    case '#':
+	*usedlen = 1;
+	return SPEC_HASH;
+
+    case '<':
+	target.key = 0;
+	target.value = (char *)src + 1;	    // skip over '<'
+	target.length = 0;		    // not used, see cmp_keyvalue_value_n()
+
+	entry = (keyvalue_T *)bsearch(&target, &spec_str_tab, ARRAY_LENGTH(spec_str_tab), sizeof(spec_str_tab[0]), cmp_keyvalue_value_n);
+	if (entry == NULL)
+	    return -1;
+
+	*usedlen = entry->length + 1;
+	return entry->key;
+
+    default:
+	break;
     }
+
     return -1;
 }
 
@@ -9700,14 +9775,17 @@
 expand_sfile(char_u *arg)
 {
     char	*errormsg;
-    int		len;
     char_u	*result;
+    size_t	resultlen;
     char_u	*newres;
+    size_t	len;
     char_u	*repl;
+    size_t	repllen;
     int		srclen;
     char_u	*p;
 
-    result = vim_strsave(arg);
+    resultlen = STRLEN(arg);
+    result = vim_strnsave(arg, resultlen);
     if (result == NULL)
 	return NULL;
 
@@ -9731,18 +9809,20 @@
 		p += srclen;
 		continue;
 	    }
-	    len = (int)STRLEN(result) - srclen + (int)STRLEN(repl) + 1;
-	    newres = alloc(len);
+	    repllen = STRLEN(repl);
+	    resultlen += repllen - srclen;
+	    newres = alloc(resultlen + 1);
 	    if (newres == NULL)
 	    {
 		vim_free(repl);
 		vim_free(result);
 		return NULL;
 	    }
-	    mch_memmove(newres, result, (size_t)(p - result));
-	    STRCPY(newres + (p - result), repl);
-	    len = (int)STRLEN(newres);
-	    STRCAT(newres, p + srclen);
+	    len = p - result;
+	    mch_memmove(newres, result, len);
+	    STRCPY(newres + len, repl);
+	    len += repllen;
+	    STRCPY(newres + len, p + srclen);
 	    vim_free(repl);
 	    vim_free(result);
 	    result = newres;