blob: 33de175f298bd4e30822c4f23c1b2f911489a080 [file] [log] [blame]
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001/* vi:set ts=8 sts=4 sw=4 noet:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * strings.c: string manipulation functions
12 */
13
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +020014#define USING_FLOAT_STUFF
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020015#include "vim.h"
16
17/*
18 * Copy "string" into newly allocated memory.
19 */
20 char_u *
21vim_strsave(char_u *string)
22{
23 char_u *p;
24 size_t len;
25
26 len = STRLEN(string) + 1;
27 p = alloc(len);
28 if (p != NULL)
29 mch_memmove(p, string, len);
30 return p;
31}
32
33/*
34 * Copy up to "len" bytes of "string" into newly allocated memory and
35 * terminate with a NUL.
36 * The allocated memory always has size "len + 1", also when "string" is
37 * shorter.
38 */
39 char_u *
40vim_strnsave(char_u *string, size_t len)
41{
42 char_u *p;
43
44 p = alloc(len + 1);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +000045 if (p == NULL)
46 return NULL;
47
48 STRNCPY(p, string, len);
49 p[len] = NUL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020050 return p;
51}
52
53/*
54 * Same as vim_strsave(), but any characters found in esc_chars are preceded
55 * by a backslash.
56 */
57 char_u *
58vim_strsave_escaped(char_u *string, char_u *esc_chars)
59{
60 return vim_strsave_escaped_ext(string, esc_chars, '\\', FALSE);
61}
62
63/*
64 * Same as vim_strsave_escaped(), but when "bsl" is TRUE also escape
65 * characters where rem_backslash() would remove the backslash.
66 * Escape the characters with "cc".
67 */
68 char_u *
69vim_strsave_escaped_ext(
70 char_u *string,
71 char_u *esc_chars,
72 int cc,
73 int bsl)
74{
75 char_u *p;
76 char_u *p2;
77 char_u *escaped_string;
78 unsigned length;
79 int l;
80
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +020081 // First count the number of backslashes required.
82 // Then allocate the memory and insert them.
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020083 length = 1; // count the trailing NUL
84 for (p = string; *p; p++)
85 {
86 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
87 {
88 length += l; // count a multibyte char
89 p += l - 1;
90 continue;
91 }
92 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
93 ++length; // count a backslash
94 ++length; // count an ordinary char
95 }
96 escaped_string = alloc(length);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +000097 if (escaped_string == NULL)
98 return NULL;
99 p2 = escaped_string;
100 for (p = string; *p; p++)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200101 {
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000102 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200103 {
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000104 mch_memmove(p2, p, (size_t)l);
105 p2 += l;
106 p += l - 1; // skip multibyte char
107 continue;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200108 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000109 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
110 *p2++ = cc;
111 *p2++ = *p;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200112 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000113 *p2 = NUL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200114 return escaped_string;
115}
116
117/*
118 * Return TRUE when 'shell' has "csh" in the tail.
119 */
120 int
121csh_like_shell(void)
122{
123 return (strstr((char *)gettail(p_sh), "csh") != NULL);
124}
125
126/*
Jason Cox6e823512021-08-29 12:36:49 +0200127 * Return TRUE when 'shell' has "fish" in the tail.
128 */
Dominique Pellede05ae72021-08-30 19:57:34 +0200129 static int
Jason Cox6e823512021-08-29 12:36:49 +0200130fish_like_shell(void)
131{
132 return (strstr((char *)gettail(p_sh), "fish") != NULL);
133}
134
135/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200136 * Escape "string" for use as a shell argument with system().
137 * This uses single quotes, except when we know we need to use double quotes
138 * (MS-DOS and MS-Windows not using PowerShell and without 'shellslash' set).
139 * PowerShell also uses a novel escaping for enclosed single quotes - double
140 * them up.
141 * Escape a newline, depending on the 'shell' option.
142 * When "do_special" is TRUE also replace "!", "%", "#" and things starting
143 * with "<" like "<cfile>".
144 * When "do_newline" is FALSE do not escape newline unless it is csh shell.
145 * Returns the result in allocated memory, NULL if we have run out.
146 */
147 char_u *
148vim_strsave_shellescape(char_u *string, int do_special, int do_newline)
149{
150 unsigned length;
151 char_u *p;
152 char_u *d;
153 char_u *escaped_string;
154 int l;
155 int csh_like;
Jason Cox6e823512021-08-29 12:36:49 +0200156 int fish_like;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200157 char_u *shname;
158 int powershell;
159# ifdef MSWIN
160 int double_quotes;
161# endif
162
163 // Only csh and similar shells expand '!' within single quotes. For sh and
164 // the like we must not put a backslash before it, it will be taken
165 // literally. If do_special is set the '!' will be escaped twice.
166 // Csh also needs to have "\n" escaped twice when do_special is set.
167 csh_like = csh_like_shell();
168
Jason Cox6e823512021-08-29 12:36:49 +0200169 // Fish shell uses '\' as an escape character within single quotes, so '\'
170 // itself must be escaped to get a literal '\'.
171 fish_like = fish_like_shell();
172
Dominique Pelleaf4a61a2021-12-27 17:21:41 +0000173 // PowerShell uses its own version for quoting single quotes
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200174 shname = gettail(p_sh);
175 powershell = strstr((char *)shname, "pwsh") != NULL;
176# ifdef MSWIN
177 powershell = powershell || strstr((char *)shname, "powershell") != NULL;
178 // PowerShell only accepts single quotes so override shellslash.
179 double_quotes = !powershell && !p_ssl;
180# endif
181
182 // First count the number of extra bytes required.
183 length = (unsigned)STRLEN(string) + 3; // two quotes and a trailing NUL
184 for (p = string; *p != NUL; MB_PTR_ADV(p))
185 {
186# ifdef MSWIN
187 if (double_quotes)
188 {
189 if (*p == '"')
190 ++length; // " -> ""
191 }
192 else
193# endif
194 if (*p == '\'')
195 {
196 if (powershell)
197 length +=2; // ' => ''
198 else
199 length += 3; // ' => '\''
200 }
201 if ((*p == '\n' && (csh_like || do_newline))
202 || (*p == '!' && (csh_like || do_special)))
203 {
204 ++length; // insert backslash
205 if (csh_like && do_special)
206 ++length; // insert backslash
207 }
208 if (do_special && find_cmdline_var(p, &l) >= 0)
209 {
210 ++length; // insert backslash
211 p += l - 1;
212 }
Jason Cox6e823512021-08-29 12:36:49 +0200213 if (*p == '\\' && fish_like)
214 ++length; // insert backslash
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200215 }
216
217 // Allocate memory for the result and fill it.
218 escaped_string = alloc(length);
219 if (escaped_string != NULL)
220 {
221 d = escaped_string;
222
223 // add opening quote
224# ifdef MSWIN
225 if (double_quotes)
226 *d++ = '"';
227 else
228# endif
229 *d++ = '\'';
230
231 for (p = string; *p != NUL; )
232 {
233# ifdef MSWIN
234 if (double_quotes)
235 {
236 if (*p == '"')
237 {
238 *d++ = '"';
239 *d++ = '"';
240 ++p;
241 continue;
242 }
243 }
244 else
245# endif
246 if (*p == '\'')
247 {
248 if (powershell)
249 {
250 *d++ = '\'';
251 *d++ = '\'';
252 }
253 else
254 {
255 *d++ = '\'';
256 *d++ = '\\';
257 *d++ = '\'';
258 *d++ = '\'';
259 }
260 ++p;
261 continue;
262 }
263 if ((*p == '\n' && (csh_like || do_newline))
264 || (*p == '!' && (csh_like || do_special)))
265 {
266 *d++ = '\\';
267 if (csh_like && do_special)
268 *d++ = '\\';
269 *d++ = *p++;
270 continue;
271 }
272 if (do_special && find_cmdline_var(p, &l) >= 0)
273 {
274 *d++ = '\\'; // insert backslash
275 while (--l >= 0) // copy the var
276 *d++ = *p++;
277 continue;
278 }
Jason Cox6e823512021-08-29 12:36:49 +0200279 if (*p == '\\' && fish_like)
280 {
281 *d++ = '\\';
282 *d++ = *p++;
Bram Moolenaar66315972021-09-01 14:31:51 +0200283 continue;
Jason Cox6e823512021-08-29 12:36:49 +0200284 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200285
286 MB_COPY_CHAR(p, d);
287 }
288
289 // add terminating quote and finish with a NUL
290# ifdef MSWIN
291 if (double_quotes)
292 *d++ = '"';
293 else
294# endif
295 *d++ = '\'';
296 *d = NUL;
297 }
298
299 return escaped_string;
300}
301
302/*
303 * Like vim_strsave(), but make all characters uppercase.
304 * This uses ASCII lower-to-upper case translation, language independent.
305 */
306 char_u *
307vim_strsave_up(char_u *string)
308{
309 char_u *p1;
310
311 p1 = vim_strsave(string);
312 vim_strup(p1);
313 return p1;
314}
315
316/*
317 * Like vim_strnsave(), but make all characters uppercase.
318 * This uses ASCII lower-to-upper case translation, language independent.
319 */
320 char_u *
321vim_strnsave_up(char_u *string, size_t len)
322{
323 char_u *p1;
324
325 p1 = vim_strnsave(string, len);
326 vim_strup(p1);
327 return p1;
328}
329
330/*
331 * ASCII lower-to-upper case translation, language independent.
332 */
333 void
334vim_strup(
335 char_u *p)
336{
337 char_u *p2;
338 int c;
339
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000340 if (p == NULL)
341 return;
342
343 p2 = p;
344 while ((c = *p2) != NUL)
345 *p2++ = (c < 'a' || c > 'z') ? c : (c - 0x20);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200346}
347
348#if defined(FEAT_EVAL) || defined(FEAT_SPELL) || defined(PROTO)
349/*
350 * Make string "s" all upper-case and return it in allocated memory.
351 * Handles multi-byte characters as well as possible.
352 * Returns NULL when out of memory.
353 */
354 static char_u *
355strup_save(char_u *orig)
356{
357 char_u *p;
358 char_u *res;
359
360 res = p = vim_strsave(orig);
361
362 if (res != NULL)
363 while (*p != NUL)
364 {
365 int l;
366
367 if (enc_utf8)
368 {
369 int c, uc;
370 int newl;
371 char_u *s;
372
373 c = utf_ptr2char(p);
374 l = utf_ptr2len(p);
375 if (c == 0)
376 {
377 // overlong sequence, use only the first byte
378 c = *p;
379 l = 1;
380 }
381 uc = utf_toupper(c);
382
383 // Reallocate string when byte count changes. This is rare,
384 // thus it's OK to do another malloc()/free().
385 newl = utf_char2len(uc);
386 if (newl != l)
387 {
388 s = alloc(STRLEN(res) + 1 + newl - l);
389 if (s == NULL)
390 {
391 vim_free(res);
392 return NULL;
393 }
394 mch_memmove(s, res, p - res);
395 STRCPY(s + (p - res) + newl, p + l);
396 p = s + (p - res);
397 vim_free(res);
398 res = s;
399 }
400
401 utf_char2bytes(uc, p);
402 p += newl;
403 }
404 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
405 p += l; // skip multi-byte character
406 else
407 {
408 *p = TOUPPER_LOC(*p); // note that toupper() can be a macro
409 p++;
410 }
411 }
412
413 return res;
414}
415
416/*
417 * Make string "s" all lower-case and return it in allocated memory.
418 * Handles multi-byte characters as well as possible.
419 * Returns NULL when out of memory.
420 */
421 char_u *
422strlow_save(char_u *orig)
423{
424 char_u *p;
425 char_u *res;
426
427 res = p = vim_strsave(orig);
428
429 if (res != NULL)
430 while (*p != NUL)
431 {
432 int l;
433
434 if (enc_utf8)
435 {
436 int c, lc;
437 int newl;
438 char_u *s;
439
440 c = utf_ptr2char(p);
441 l = utf_ptr2len(p);
442 if (c == 0)
443 {
444 // overlong sequence, use only the first byte
445 c = *p;
446 l = 1;
447 }
448 lc = utf_tolower(c);
449
450 // Reallocate string when byte count changes. This is rare,
451 // thus it's OK to do another malloc()/free().
452 newl = utf_char2len(lc);
453 if (newl != l)
454 {
455 s = alloc(STRLEN(res) + 1 + newl - l);
456 if (s == NULL)
457 {
458 vim_free(res);
459 return NULL;
460 }
461 mch_memmove(s, res, p - res);
462 STRCPY(s + (p - res) + newl, p + l);
463 p = s + (p - res);
464 vim_free(res);
465 res = s;
466 }
467
468 utf_char2bytes(lc, p);
469 p += newl;
470 }
471 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
472 p += l; // skip multi-byte character
473 else
474 {
475 *p = TOLOWER_LOC(*p); // note that tolower() can be a macro
476 p++;
477 }
478 }
479
480 return res;
481}
482#endif
483
484/*
485 * delete spaces at the end of a string
486 */
487 void
488del_trailing_spaces(char_u *ptr)
489{
490 char_u *q;
491
492 q = ptr + STRLEN(ptr);
493 while (--q > ptr && VIM_ISWHITE(q[0]) && q[-1] != '\\' && q[-1] != Ctrl_V)
494 *q = NUL;
495}
496
497/*
498 * Like strncpy(), but always terminate the result with one NUL.
499 * "to" must be "len + 1" long!
500 */
501 void
502vim_strncpy(char_u *to, char_u *from, size_t len)
503{
504 STRNCPY(to, from, len);
505 to[len] = NUL;
506}
507
508/*
509 * Like strcat(), but make sure the result fits in "tosize" bytes and is
510 * always NUL terminated. "from" and "to" may overlap.
511 */
512 void
513vim_strcat(char_u *to, char_u *from, size_t tosize)
514{
515 size_t tolen = STRLEN(to);
516 size_t fromlen = STRLEN(from);
517
518 if (tolen + fromlen + 1 > tosize)
519 {
520 mch_memmove(to + tolen, from, tosize - tolen - 1);
521 to[tosize - 1] = NUL;
522 }
523 else
524 mch_memmove(to + tolen, from, fromlen + 1);
525}
526
Bram Moolenaarc32949b2023-01-04 15:56:51 +0000527/*
528 * A version of strlen() that has a maximum length.
529 */
530 size_t
531vim_strlen_maxlen(char *s, size_t maxlen)
532{
533 size_t i;
534 for (i = 0; i < maxlen; ++i)
535 if (s[i] == NUL)
536 break;
537 return i;
538}
539
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200540#if (!defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)) || defined(PROTO)
541/*
542 * Compare two strings, ignoring case, using current locale.
543 * Doesn't work for multi-byte characters.
544 * return 0 for match, < 0 for smaller, > 0 for bigger
545 */
546 int
547vim_stricmp(char *s1, char *s2)
548{
549 int i;
550
551 for (;;)
552 {
553 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
554 if (i != 0)
555 return i; // this character different
556 if (*s1 == NUL)
557 break; // strings match until NUL
558 ++s1;
559 ++s2;
560 }
561 return 0; // strings match
562}
563#endif
564
565#if (!defined(HAVE_STRNCASECMP) && !defined(HAVE_STRNICMP)) || defined(PROTO)
566/*
567 * Compare two strings, for length "len", ignoring case, using current locale.
568 * Doesn't work for multi-byte characters.
569 * return 0 for match, < 0 for smaller, > 0 for bigger
570 */
571 int
572vim_strnicmp(char *s1, char *s2, size_t len)
573{
574 int i;
575
576 while (len > 0)
577 {
578 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
579 if (i != 0)
580 return i; // this character different
581 if (*s1 == NUL)
582 break; // strings match until NUL
583 ++s1;
584 ++s2;
585 --len;
586 }
587 return 0; // strings match
588}
589#endif
590
591/*
592 * Search for first occurrence of "c" in "string".
593 * Version of strchr() that handles unsigned char strings with characters from
594 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
595 * end of the string.
596 */
Bram Moolenaarc32949b2023-01-04 15:56:51 +0000597 char_u *
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200598vim_strchr(char_u *string, int c)
599{
600 char_u *p;
601 int b;
602
603 p = string;
604 if (enc_utf8 && c >= 0x80)
605 {
606 while (*p != NUL)
607 {
608 int l = utfc_ptr2len(p);
609
610 // Avoid matching an illegal byte here.
611 if (utf_ptr2char(p) == c && l > 1)
612 return p;
613 p += l;
614 }
615 return NULL;
616 }
617 if (enc_dbcs != 0 && c > 255)
618 {
619 int n2 = c & 0xff;
620
621 c = ((unsigned)c >> 8) & 0xff;
622 while ((b = *p) != NUL)
623 {
624 if (b == c && p[1] == n2)
625 return p;
626 p += (*mb_ptr2len)(p);
627 }
628 return NULL;
629 }
630 if (has_mbyte)
631 {
632 while ((b = *p) != NUL)
633 {
634 if (b == c)
635 return p;
636 p += (*mb_ptr2len)(p);
637 }
638 return NULL;
639 }
640 while ((b = *p) != NUL)
641 {
642 if (b == c)
643 return p;
644 ++p;
645 }
646 return NULL;
647}
648
649/*
650 * Version of strchr() that only works for bytes and handles unsigned char
651 * strings with characters above 128 correctly. It also doesn't return a
652 * pointer to the NUL at the end of the string.
653 */
654 char_u *
655vim_strbyte(char_u *string, int c)
656{
657 char_u *p = string;
658
659 while (*p != NUL)
660 {
661 if (*p == c)
662 return p;
663 ++p;
664 }
665 return NULL;
666}
667
668/*
669 * Search for last occurrence of "c" in "string".
670 * Version of strrchr() that handles unsigned char strings with characters from
671 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
672 * end of the string.
673 * Return NULL if not found.
674 * Does not handle multi-byte char for "c"!
675 */
676 char_u *
677vim_strrchr(char_u *string, int c)
678{
679 char_u *retval = NULL;
680 char_u *p = string;
681
682 while (*p)
683 {
684 if (*p == c)
685 retval = p;
686 MB_PTR_ADV(p);
687 }
688 return retval;
689}
690
691/*
692 * Vim's version of strpbrk(), in case it's missing.
693 * Don't generate a prototype for this, causes problems when it's not used.
694 */
695#ifndef PROTO
696# ifndef HAVE_STRPBRK
697# ifdef vim_strpbrk
698# undef vim_strpbrk
699# endif
700 char_u *
701vim_strpbrk(char_u *s, char_u *charset)
702{
703 while (*s)
704 {
705 if (vim_strchr(charset, *s) != NULL)
706 return s;
707 MB_PTR_ADV(s);
708 }
709 return NULL;
710}
711# endif
712#endif
713
714/*
715 * Sort an array of strings.
716 */
717static int sort_compare(const void *s1, const void *s2);
718
719 static int
720sort_compare(const void *s1, const void *s2)
721{
722 return STRCMP(*(char **)s1, *(char **)s2);
723}
724
725 void
726sort_strings(
727 char_u **files,
728 int count)
729{
730 qsort((void *)files, (size_t)count, sizeof(char_u *), sort_compare);
731}
732
733#if defined(FEAT_QUICKFIX) || defined(FEAT_SPELL) || defined(PROTO)
734/*
735 * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
736 * When "s" is NULL FALSE is returned.
737 */
738 int
739has_non_ascii(char_u *s)
740{
741 char_u *p;
742
743 if (s != NULL)
744 for (p = s; *p != NUL; ++p)
745 if (*p >= 128)
746 return TRUE;
747 return FALSE;
748}
749#endif
750
751/*
752 * Concatenate two strings and return the result in allocated memory.
753 * Returns NULL when out of memory.
754 */
755 char_u *
756concat_str(char_u *str1, char_u *str2)
757{
758 char_u *dest;
759 size_t l = str1 == NULL ? 0 : STRLEN(str1);
760
761 dest = alloc(l + (str2 == NULL ? 0 : STRLEN(str2)) + 1L);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000762 if (dest == NULL)
763 return NULL;
764 if (str1 == NULL)
765 *dest = NUL;
766 else
767 STRCPY(dest, str1);
768 if (str2 != NULL)
769 STRCPY(dest + l, str2);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200770 return dest;
771}
772
zeertzjq4dd266c2023-08-19 11:35:03 +0200773#if defined(FEAT_EVAL) || defined(FEAT_RIGHTLEFT) || defined(PROTO)
774/*
775 * Reverse text into allocated memory.
776 * Returns the allocated string, NULL when out of memory.
777 */
778 char_u *
779reverse_text(char_u *s)
780{
781 size_t len = STRLEN(s);
782 char_u *rev = alloc(len + 1);
783 if (rev == NULL)
784 return NULL;
785
786 for (size_t s_i = 0, rev_i = len; s_i < len; ++s_i)
787 {
788 if (has_mbyte)
789 {
790 int mb_len = (*mb_ptr2len)(s + s_i);
791 rev_i -= mb_len;
792 mch_memmove(rev + rev_i, s + s_i, mb_len);
793 s_i += mb_len - 1;
794 }
795 else
796 rev[--rev_i] = s[s_i];
797 }
798 rev[len] = NUL;
799 return rev;
800}
801#endif
802
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200803#if defined(FEAT_EVAL) || defined(PROTO)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200804/*
805 * Return string "str" in ' quotes, doubling ' characters.
806 * If "str" is NULL an empty string is assumed.
807 * If "function" is TRUE make it function('string').
808 */
809 char_u *
810string_quote(char_u *str, int function)
811{
812 unsigned len;
813 char_u *p, *r, *s;
814
815 len = (function ? 13 : 3);
816 if (str != NULL)
817 {
818 len += (unsigned)STRLEN(str);
819 for (p = str; *p != NUL; MB_PTR_ADV(p))
820 if (*p == '\'')
821 ++len;
822 }
823 s = r = alloc(len);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000824 if (r == NULL)
825 return NULL;
826
827 if (function)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200828 {
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000829 STRCPY(r, "function('");
830 r += 10;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200831 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000832 else
833 *r++ = '\'';
834 if (str != NULL)
835 for (p = str; *p != NUL; )
836 {
837 if (*p == '\'')
838 *r++ = '\'';
839 MB_COPY_CHAR(p, r);
840 }
841 *r++ = '\'';
842 if (function)
843 *r++ = ')';
844 *r++ = NUL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200845 return s;
846}
847
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000848/*
849 * Count the number of times "needle" occurs in string "haystack". Case is
850 * ignored if "ic" is TRUE.
851 */
852 long
853string_count(char_u *haystack, char_u *needle, int ic)
854{
855 long n = 0;
856 char_u *p = haystack;
857 char_u *next;
858
859 if (p == NULL || needle == NULL || *needle == NUL)
860 return 0;
861
862 if (ic)
863 {
864 size_t len = STRLEN(needle);
865
866 while (*p != NUL)
867 {
868 if (MB_STRNICMP(p, needle, len) == 0)
869 {
870 ++n;
871 p += len;
872 }
873 else
874 MB_PTR_ADV(p);
875 }
876 }
877 else
878 while ((next = (char_u *)strstr((char *)p, (char *)needle)) != NULL)
879 {
880 ++n;
881 p = next + STRLEN(needle);
882 }
883
884 return n;
885}
886
887/*
888 * Make a typval_T of the first character of "input" and store it in "output".
889 * Return OK or FAIL.
890 */
891 static int
892copy_first_char_to_tv(char_u *input, typval_T *output)
893{
894 char_u buf[MB_MAXBYTES + 1];
895 int len;
896
897 if (input == NULL || output == NULL)
898 return FAIL;
899
900 len = has_mbyte ? mb_ptr2len(input) : 1;
901 STRNCPY(buf, input, len);
902 buf[len] = NUL;
903 output->v_type = VAR_STRING;
904 output->vval.v_string = vim_strsave(buf);
905
906 return output->vval.v_string == NULL ? FAIL : OK;
907}
908
909/*
910 * Implementation of map() and filter() for a String. Apply "expr" to every
911 * character in string "str" and return the result in "rettv".
912 */
913 void
914string_filter_map(
915 char_u *str,
916 filtermap_T filtermap,
917 typval_T *expr,
918 typval_T *rettv)
919{
920 char_u *p;
921 typval_T tv;
922 garray_T ga;
923 int len = 0;
924 int idx = 0;
925 int rem;
Bram Moolenaar82418262022-09-28 16:16:15 +0100926 typval_T newtv;
927 funccall_T *fc;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000928
929 rettv->v_type = VAR_STRING;
930 rettv->vval.v_string = NULL;
931
932 // set_vim_var_nr() doesn't set the type
933 set_vim_var_type(VV_KEY, VAR_NUMBER);
934
zeertzjqe7d49462023-04-16 20:53:55 +0100935 // Create one funccall_T for all eval_expr_typval() calls.
Bram Moolenaar82418262022-09-28 16:16:15 +0100936 fc = eval_expr_get_funccal(expr, &newtv);
937
Bram Moolenaar04935fb2022-01-08 16:19:22 +0000938 ga_init2(&ga, sizeof(char), 80);
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000939 for (p = str; *p != NUL; p += len)
940 {
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000941 if (copy_first_char_to_tv(p, &tv) == FAIL)
942 break;
943 len = (int)STRLEN(tv.vval.v_string);
944
945 set_vim_var_nr(VV_KEY, idx);
Bram Moolenaar82418262022-09-28 16:16:15 +0100946 if (filter_map_one(&tv, expr, filtermap, fc, &newtv, &rem) == FAIL
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000947 || did_emsg)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000948 {
949 clear_tv(&newtv);
950 clear_tv(&tv);
951 break;
952 }
Ernie Raele79e2072024-01-13 11:47:33 +0100953 if (filtermap == FILTERMAP_MAP || filtermap == FILTERMAP_MAPNEW)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000954 {
955 if (newtv.v_type != VAR_STRING)
956 {
957 clear_tv(&newtv);
958 clear_tv(&tv);
Bram Moolenaare70cec92022-01-01 14:25:55 +0000959 emsg(_(e_string_required));
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000960 break;
961 }
962 else
963 ga_concat(&ga, newtv.vval.v_string);
964 }
Ernie Raele79e2072024-01-13 11:47:33 +0100965 else if (filtermap == FILTERMAP_FOREACH || !rem)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000966 ga_concat(&ga, tv.vval.v_string);
967
968 clear_tv(&newtv);
969 clear_tv(&tv);
970
971 ++idx;
972 }
973 ga_append(&ga, NUL);
974 rettv->vval.v_string = ga.ga_data;
Bram Moolenaar82418262022-09-28 16:16:15 +0100975 if (fc != NULL)
976 remove_funccal();
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000977}
978
979/*
Bram Moolenaarf1c60d42022-09-22 17:07:00 +0100980 * Implementation of reduce() for String "argvars[0]" using the function "expr"
981 * starting with the optional initial value "argvars[2]" and return the result
982 * in "rettv".
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000983 */
984 void
985string_reduce(
986 typval_T *argvars,
Bram Moolenaarf1c60d42022-09-22 17:07:00 +0100987 typval_T *expr,
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000988 typval_T *rettv)
989{
990 char_u *p = tv_get_string(&argvars[0]);
991 int len;
992 typval_T argv[3];
993 int r;
994 int called_emsg_start = called_emsg;
Bram Moolenaar82418262022-09-28 16:16:15 +0100995 funccall_T *fc;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000996
997 if (argvars[2].v_type == VAR_UNKNOWN)
998 {
999 if (*p == NUL)
1000 {
Bram Moolenaare70cec92022-01-01 14:25:55 +00001001 semsg(_(e_reduce_of_an_empty_str_with_no_initial_value), "String");
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001002 return;
1003 }
1004 if (copy_first_char_to_tv(p, rettv) == FAIL)
1005 return;
1006 p += STRLEN(rettv->vval.v_string);
1007 }
Yegappan Lakshmanan8deb2b32022-09-02 15:15:27 +01001008 else if (check_for_string_arg(argvars, 2) == FAIL)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001009 return;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001010 else
1011 copy_tv(&argvars[2], rettv);
1012
zeertzjqe7d49462023-04-16 20:53:55 +01001013 // Create one funccall_T for all eval_expr_typval() calls.
Bram Moolenaar82418262022-09-28 16:16:15 +01001014 fc = eval_expr_get_funccal(expr, rettv);
1015
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001016 for ( ; *p != NUL; p += len)
1017 {
1018 argv[0] = *rettv;
1019 if (copy_first_char_to_tv(p, &argv[1]) == FAIL)
1020 break;
1021 len = (int)STRLEN(argv[1].vval.v_string);
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001022
zeertzjqad0c4422023-08-17 22:15:47 +02001023 r = eval_expr_typval(expr, TRUE, argv, 2, fc, rettv);
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001024
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001025 clear_tv(&argv[0]);
1026 clear_tv(&argv[1]);
1027 if (r == FAIL || called_emsg != called_emsg_start)
1028 return;
1029 }
Bram Moolenaar82418262022-09-28 16:16:15 +01001030
1031 if (fc != NULL)
1032 remove_funccal();
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001033}
1034
Bram Moolenaare4098452023-05-07 18:53:49 +01001035/*
1036 * Implementation of "byteidx()" and "byteidxcomp()" functions
1037 */
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001038 static void
Bram Moolenaare4098452023-05-07 18:53:49 +01001039byteidx_common(typval_T *argvars, typval_T *rettv, int comp UNUSED)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001040{
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001041 rettv->vval.v_number = -1;
1042
1043 if (in_vim9script()
1044 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001045 || check_for_number_arg(argvars, 1) == FAIL
1046 || check_for_opt_bool_arg(argvars, 2) == FAIL))
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001047 return;
1048
Christian Brabandt67672ef2023-04-24 21:09:54 +01001049 char_u *str = tv_get_string_chk(&argvars[0]);
1050 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001051 if (str == NULL || idx < 0)
1052 return;
1053
Christian Brabandt67672ef2023-04-24 21:09:54 +01001054 varnumber_T utf16idx = FALSE;
1055 if (argvars[2].v_type != VAR_UNKNOWN)
1056 {
zeertzjq8cf51372023-05-08 15:31:38 +01001057 int error = FALSE;
1058 utf16idx = tv_get_bool_chk(&argvars[2], &error);
1059 if (error)
1060 return;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001061 if (utf16idx < 0 || utf16idx > 1)
1062 {
zeertzjq8cf51372023-05-08 15:31:38 +01001063 semsg(_(e_using_number_as_bool_nr), utf16idx);
Christian Brabandt67672ef2023-04-24 21:09:54 +01001064 return;
1065 }
1066 }
1067
1068 int (*ptr2len)(char_u *);
1069 if (enc_utf8 && comp)
1070 ptr2len = utf_ptr2len;
1071 else
1072 ptr2len = mb_ptr2len;
1073
1074 char_u *t = str;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001075 for ( ; idx > 0; idx--)
1076 {
1077 if (*t == NUL) // EOL reached
1078 return;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001079 if (utf16idx)
1080 {
1081 int clen = ptr2len(t);
1082 int c = (clen > 1) ? utf_ptr2char(t) : *t;
1083 if (c > 0xFFFF)
1084 idx--;
1085 }
1086 if (idx > 0)
1087 t += ptr2len(t);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001088 }
1089 rettv->vval.v_number = (varnumber_T)(t - str);
1090}
1091
1092/*
1093 * "byteidx()" function
1094 */
1095 void
1096f_byteidx(typval_T *argvars, typval_T *rettv)
1097{
Bram Moolenaare4098452023-05-07 18:53:49 +01001098 byteidx_common(argvars, rettv, FALSE);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001099}
1100
1101/*
1102 * "byteidxcomp()" function
1103 */
1104 void
1105f_byteidxcomp(typval_T *argvars, typval_T *rettv)
1106{
Bram Moolenaare4098452023-05-07 18:53:49 +01001107 byteidx_common(argvars, rettv, TRUE);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001108}
1109
1110/*
1111 * "charidx()" function
1112 */
1113 void
1114f_charidx(typval_T *argvars, typval_T *rettv)
1115{
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001116 rettv->vval.v_number = -1;
1117
Christian Brabandt67672ef2023-04-24 21:09:54 +01001118 if (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001119 || check_for_number_arg(argvars, 1) == FAIL
Christian Brabandt67672ef2023-04-24 21:09:54 +01001120 || check_for_opt_bool_arg(argvars, 2) == FAIL
1121 || (argvars[2].v_type != VAR_UNKNOWN
1122 && check_for_opt_bool_arg(argvars, 3) == FAIL))
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001123 return;
1124
Christian Brabandt67672ef2023-04-24 21:09:54 +01001125 char_u *str = tv_get_string_chk(&argvars[0]);
1126 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001127 if (str == NULL || idx < 0)
1128 return;
1129
Christian Brabandt67672ef2023-04-24 21:09:54 +01001130 varnumber_T countcc = FALSE;
1131 varnumber_T utf16idx = FALSE;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001132 if (argvars[2].v_type != VAR_UNKNOWN)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001133 {
Christian Brabandt67672ef2023-04-24 21:09:54 +01001134 countcc = tv_get_bool(&argvars[2]);
1135 if (argvars[3].v_type != VAR_UNKNOWN)
1136 utf16idx = tv_get_bool(&argvars[3]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001137 }
1138
Christian Brabandt67672ef2023-04-24 21:09:54 +01001139 int (*ptr2len)(char_u *);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001140 if (enc_utf8 && countcc)
1141 ptr2len = utf_ptr2len;
1142 else
1143 ptr2len = mb_ptr2len;
1144
Christian Brabandt67672ef2023-04-24 21:09:54 +01001145 char_u *p;
1146 int len;
1147 for (p = str, len = 0; utf16idx ? idx >= 0 : p <= str + idx; len++)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001148 {
1149 if (*p == NUL)
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001150 {
1151 // If the index is exactly the number of bytes or utf-16 code units
1152 // in the string then return the length of the string in
1153 // characters.
1154 if (utf16idx ? (idx == 0) : (p == (str + idx)))
1155 rettv->vval.v_number = len;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001156 return;
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001157 }
Christian Brabandt67672ef2023-04-24 21:09:54 +01001158 if (utf16idx)
1159 {
1160 idx--;
1161 int clen = ptr2len(p);
1162 int c = (clen > 1) ? utf_ptr2char(p) : *p;
1163 if (c > 0xFFFF)
1164 idx--;
1165 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001166 p += ptr2len(p);
1167 }
1168
1169 rettv->vval.v_number = len > 0 ? len - 1 : 0;
1170}
1171
1172/*
1173 * "str2list()" function
1174 */
1175 void
1176f_str2list(typval_T *argvars, typval_T *rettv)
1177{
1178 char_u *p;
1179 int utf8 = FALSE;
1180
1181 if (rettv_list_alloc(rettv) == FAIL)
1182 return;
1183
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001184 if (in_vim9script()
1185 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001186 || check_for_opt_bool_arg(argvars, 1) == FAIL))
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001187 return;
1188
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001189 if (argvars[1].v_type != VAR_UNKNOWN)
1190 utf8 = (int)tv_get_bool_chk(&argvars[1], NULL);
1191
1192 p = tv_get_string(&argvars[0]);
1193
1194 if (has_mbyte || utf8)
1195 {
1196 int (*ptr2len)(char_u *);
1197 int (*ptr2char)(char_u *);
1198
1199 if (utf8 || enc_utf8)
1200 {
1201 ptr2len = utf_ptr2len;
1202 ptr2char = utf_ptr2char;
1203 }
1204 else
1205 {
1206 ptr2len = mb_ptr2len;
1207 ptr2char = mb_ptr2char;
1208 }
1209
1210 for ( ; *p != NUL; p += (*ptr2len)(p))
1211 list_append_number(rettv->vval.v_list, (*ptr2char)(p));
1212 }
1213 else
1214 for ( ; *p != NUL; ++p)
1215 list_append_number(rettv->vval.v_list, *p);
1216}
1217
1218/*
1219 * "str2nr()" function
1220 */
1221 void
1222f_str2nr(typval_T *argvars, typval_T *rettv)
1223{
1224 int base = 10;
1225 char_u *p;
1226 varnumber_T n;
1227 int what = 0;
1228 int isneg;
1229
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001230 if (in_vim9script()
1231 && (check_for_string_arg(argvars, 0) == FAIL
1232 || check_for_opt_number_arg(argvars, 1) == FAIL
1233 || (argvars[1].v_type != VAR_UNKNOWN
1234 && check_for_opt_bool_arg(argvars, 2) == FAIL)))
1235 return;
1236
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001237 if (argvars[1].v_type != VAR_UNKNOWN)
1238 {
1239 base = (int)tv_get_number(&argvars[1]);
1240 if (base != 2 && base != 8 && base != 10 && base != 16)
1241 {
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00001242 emsg(_(e_invalid_argument));
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001243 return;
1244 }
1245 if (argvars[2].v_type != VAR_UNKNOWN && tv_get_bool(&argvars[2]))
1246 what |= STR2NR_QUOTE;
1247 }
1248
1249 p = skipwhite(tv_get_string_strict(&argvars[0]));
1250 isneg = (*p == '-');
1251 if (*p == '+' || *p == '-')
1252 p = skipwhite(p + 1);
1253 switch (base)
1254 {
1255 case 2: what |= STR2NR_BIN + STR2NR_FORCE; break;
1256 case 8: what |= STR2NR_OCT + STR2NR_OOCT + STR2NR_FORCE; break;
1257 case 16: what |= STR2NR_HEX + STR2NR_FORCE; break;
1258 }
Bram Moolenaar5fb78c32023-03-04 20:47:39 +00001259 vim_str2nr(p, NULL, NULL, what, &n, NULL, 0, FALSE, NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001260 // Text after the number is silently ignored.
1261 if (isneg)
1262 rettv->vval.v_number = -n;
1263 else
1264 rettv->vval.v_number = n;
1265
1266}
1267
1268/*
1269 * "strgetchar()" function
1270 */
1271 void
1272f_strgetchar(typval_T *argvars, typval_T *rettv)
1273{
1274 char_u *str;
1275 int len;
1276 int error = FALSE;
1277 int charidx;
1278 int byteidx = 0;
1279
1280 rettv->vval.v_number = -1;
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001281
1282 if (in_vim9script()
1283 && (check_for_string_arg(argvars, 0) == FAIL
1284 || check_for_number_arg(argvars, 1) == FAIL))
1285 return;
1286
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001287 str = tv_get_string_chk(&argvars[0]);
1288 if (str == NULL)
1289 return;
1290 len = (int)STRLEN(str);
1291 charidx = (int)tv_get_number_chk(&argvars[1], &error);
1292 if (error)
1293 return;
1294
1295 while (charidx >= 0 && byteidx < len)
1296 {
1297 if (charidx == 0)
1298 {
1299 rettv->vval.v_number = mb_ptr2char(str + byteidx);
1300 break;
1301 }
1302 --charidx;
1303 byteidx += MB_CPTR2LEN(str + byteidx);
1304 }
1305}
1306
1307/*
1308 * "stridx()" function
1309 */
1310 void
1311f_stridx(typval_T *argvars, typval_T *rettv)
1312{
1313 char_u buf[NUMBUFLEN];
1314 char_u *needle;
1315 char_u *haystack;
1316 char_u *save_haystack;
1317 char_u *pos;
1318 int start_idx;
1319
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001320 if (in_vim9script()
1321 && (check_for_string_arg(argvars, 0) == FAIL
1322 || check_for_string_arg(argvars, 1) == FAIL
1323 || check_for_opt_number_arg(argvars, 2) == FAIL))
1324 return;
1325
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001326 needle = tv_get_string_chk(&argvars[1]);
1327 save_haystack = haystack = tv_get_string_buf_chk(&argvars[0], buf);
1328 rettv->vval.v_number = -1;
1329 if (needle == NULL || haystack == NULL)
1330 return; // type error; errmsg already given
1331
1332 if (argvars[2].v_type != VAR_UNKNOWN)
1333 {
1334 int error = FALSE;
1335
1336 start_idx = (int)tv_get_number_chk(&argvars[2], &error);
1337 if (error || start_idx >= (int)STRLEN(haystack))
1338 return;
1339 if (start_idx >= 0)
1340 haystack += start_idx;
1341 }
1342
1343 pos = (char_u *)strstr((char *)haystack, (char *)needle);
1344 if (pos != NULL)
1345 rettv->vval.v_number = (varnumber_T)(pos - save_haystack);
1346}
1347
1348/*
1349 * "string()" function
1350 */
1351 void
1352f_string(typval_T *argvars, typval_T *rettv)
1353{
1354 char_u *tofree;
1355 char_u numbuf[NUMBUFLEN];
1356
1357 rettv->v_type = VAR_STRING;
1358 rettv->vval.v_string = tv2string(&argvars[0], &tofree, numbuf,
1359 get_copyID());
1360 // Make a copy if we have a value but it's not in allocated memory.
1361 if (rettv->vval.v_string != NULL && tofree == NULL)
1362 rettv->vval.v_string = vim_strsave(rettv->vval.v_string);
1363}
1364
1365/*
1366 * "strlen()" function
1367 */
1368 void
1369f_strlen(typval_T *argvars, typval_T *rettv)
1370{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001371 if (in_vim9script()
1372 && check_for_string_or_number_arg(argvars, 0) == FAIL)
1373 return;
1374
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001375 rettv->vval.v_number = (varnumber_T)(STRLEN(
1376 tv_get_string(&argvars[0])));
1377}
1378
1379 static void
1380strchar_common(typval_T *argvars, typval_T *rettv, int skipcc)
1381{
1382 char_u *s = tv_get_string(&argvars[0]);
1383 varnumber_T len = 0;
1384 int (*func_mb_ptr2char_adv)(char_u **pp);
1385
1386 func_mb_ptr2char_adv = skipcc ? mb_ptr2char_adv : mb_cptr2char_adv;
1387 while (*s != NUL)
1388 {
1389 func_mb_ptr2char_adv(&s);
1390 ++len;
1391 }
1392 rettv->vval.v_number = len;
1393}
1394
1395/*
1396 * "strcharlen()" function
1397 */
1398 void
1399f_strcharlen(typval_T *argvars, typval_T *rettv)
1400{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001401 if (in_vim9script()
1402 && check_for_string_or_number_arg(argvars, 0) == FAIL)
1403 return;
1404
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001405 strchar_common(argvars, rettv, TRUE);
1406}
1407
1408/*
1409 * "strchars()" function
1410 */
1411 void
1412f_strchars(typval_T *argvars, typval_T *rettv)
1413{
1414 varnumber_T skipcc = FALSE;
1415
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001416 if (in_vim9script()
1417 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001418 || check_for_opt_bool_arg(argvars, 1) == FAIL))
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001419 return;
1420
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001421 if (argvars[1].v_type != VAR_UNKNOWN)
Bram Moolenaare4098452023-05-07 18:53:49 +01001422 {
zeertzjq8cf51372023-05-08 15:31:38 +01001423 int error = FALSE;
1424 skipcc = tv_get_bool_chk(&argvars[1], &error);
1425 if (error)
1426 return;
1427 if (skipcc < 0 || skipcc > 1)
1428 {
Bram Moolenaare4098452023-05-07 18:53:49 +01001429 semsg(_(e_using_number_as_bool_nr), skipcc);
zeertzjq8cf51372023-05-08 15:31:38 +01001430 return;
1431 }
Bram Moolenaare4098452023-05-07 18:53:49 +01001432 }
zeertzjq8cf51372023-05-08 15:31:38 +01001433
1434 strchar_common(argvars, rettv, skipcc);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001435}
1436
1437/*
Christian Brabandt67672ef2023-04-24 21:09:54 +01001438 * "strutf16len()" function
1439 */
1440 void
1441f_strutf16len(typval_T *argvars, typval_T *rettv)
1442{
1443 rettv->vval.v_number = -1;
1444
1445 if (check_for_string_arg(argvars, 0) == FAIL
1446 || check_for_opt_bool_arg(argvars, 1) == FAIL)
1447 return;
1448
1449 varnumber_T countcc = FALSE;
1450 if (argvars[1].v_type != VAR_UNKNOWN)
1451 countcc = tv_get_bool(&argvars[1]);
1452
1453 char_u *s = tv_get_string(&argvars[0]);
1454 varnumber_T len = 0;
1455 int (*func_mb_ptr2char_adv)(char_u **pp);
1456 int ch;
1457
1458 func_mb_ptr2char_adv = countcc ? mb_cptr2char_adv : mb_ptr2char_adv;
1459 while (*s != NUL)
1460 {
1461 ch = func_mb_ptr2char_adv(&s);
1462 if (ch > 0xFFFF)
1463 ++len;
1464 ++len;
1465 }
1466 rettv->vval.v_number = len;
1467}
1468
1469/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001470 * "strdisplaywidth()" function
1471 */
1472 void
1473f_strdisplaywidth(typval_T *argvars, typval_T *rettv)
1474{
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001475 char_u *s;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001476 int col = 0;
1477
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001478 rettv->vval.v_number = -1;
1479
1480 if (in_vim9script()
1481 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001482 || check_for_opt_number_arg(argvars, 1) == FAIL))
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001483 return;
1484
1485 s = tv_get_string(&argvars[0]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001486 if (argvars[1].v_type != VAR_UNKNOWN)
1487 col = (int)tv_get_number(&argvars[1]);
1488
1489 rettv->vval.v_number = (varnumber_T)(linetabsize_col(col, s) - col);
1490}
1491
1492/*
1493 * "strwidth()" function
1494 */
1495 void
1496f_strwidth(typval_T *argvars, typval_T *rettv)
1497{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001498 char_u *s;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001499
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001500 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1501 return;
1502
1503 s = tv_get_string_strict(&argvars[0]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001504 rettv->vval.v_number = (varnumber_T)(mb_string2cells(s, -1));
1505}
1506
1507/*
1508 * "strcharpart()" function
1509 */
1510 void
1511f_strcharpart(typval_T *argvars, typval_T *rettv)
1512{
1513 char_u *p;
1514 int nchar;
1515 int nbyte = 0;
1516 int charlen;
1517 int skipcc = FALSE;
1518 int len = 0;
1519 int slen;
1520 int error = FALSE;
1521
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001522 if (in_vim9script()
1523 && (check_for_string_arg(argvars, 0) == FAIL
1524 || check_for_number_arg(argvars, 1) == FAIL
1525 || check_for_opt_number_arg(argvars, 2) == FAIL
1526 || (argvars[2].v_type != VAR_UNKNOWN
1527 && check_for_opt_bool_arg(argvars, 3) == FAIL)))
1528 return;
1529
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001530 p = tv_get_string(&argvars[0]);
1531 slen = (int)STRLEN(p);
1532
1533 nchar = (int)tv_get_number_chk(&argvars[1], &error);
1534 if (!error)
1535 {
1536 if (argvars[2].v_type != VAR_UNKNOWN
1537 && argvars[3].v_type != VAR_UNKNOWN)
1538 {
zeertzjq8cf51372023-05-08 15:31:38 +01001539 skipcc = tv_get_bool_chk(&argvars[3], &error);
1540 if (error)
1541 return;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001542 if (skipcc < 0 || skipcc > 1)
1543 {
zeertzjq8cf51372023-05-08 15:31:38 +01001544 semsg(_(e_using_number_as_bool_nr), skipcc);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001545 return;
1546 }
1547 }
1548
1549 if (nchar > 0)
1550 while (nchar > 0 && nbyte < slen)
1551 {
1552 if (skipcc)
1553 nbyte += mb_ptr2len(p + nbyte);
1554 else
1555 nbyte += MB_CPTR2LEN(p + nbyte);
1556 --nchar;
1557 }
1558 else
1559 nbyte = nchar;
1560 if (argvars[2].v_type != VAR_UNKNOWN)
1561 {
1562 charlen = (int)tv_get_number(&argvars[2]);
1563 while (charlen > 0 && nbyte + len < slen)
1564 {
1565 int off = nbyte + len;
1566
1567 if (off < 0)
1568 len += 1;
1569 else
1570 {
1571 if (skipcc)
1572 len += mb_ptr2len(p + off);
1573 else
1574 len += MB_CPTR2LEN(p + off);
1575 }
1576 --charlen;
1577 }
1578 }
1579 else
1580 len = slen - nbyte; // default: all bytes that are available.
1581 }
1582
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02001583 // Only return the overlap between the specified part and the actual
1584 // string.
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001585 if (nbyte < 0)
1586 {
1587 len += nbyte;
1588 nbyte = 0;
1589 }
1590 else if (nbyte > slen)
1591 nbyte = slen;
1592 if (len < 0)
1593 len = 0;
1594 else if (nbyte + len > slen)
1595 len = slen - nbyte;
1596
1597 rettv->v_type = VAR_STRING;
1598 rettv->vval.v_string = vim_strnsave(p + nbyte, len);
1599}
1600
1601/*
1602 * "strpart()" function
1603 */
1604 void
1605f_strpart(typval_T *argvars, typval_T *rettv)
1606{
1607 char_u *p;
1608 int n;
1609 int len;
1610 int slen;
1611 int error = FALSE;
1612
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001613 if (in_vim9script()
1614 && (check_for_string_arg(argvars, 0) == FAIL
1615 || check_for_number_arg(argvars, 1) == FAIL
1616 || check_for_opt_number_arg(argvars, 2) == FAIL
1617 || (argvars[2].v_type != VAR_UNKNOWN
1618 && check_for_opt_bool_arg(argvars, 3) == FAIL)))
1619 return;
1620
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001621 p = tv_get_string(&argvars[0]);
1622 slen = (int)STRLEN(p);
1623
1624 n = (int)tv_get_number_chk(&argvars[1], &error);
1625 if (error)
1626 len = 0;
1627 else if (argvars[2].v_type != VAR_UNKNOWN)
1628 len = (int)tv_get_number(&argvars[2]);
1629 else
1630 len = slen - n; // default len: all bytes that are available.
1631
1632 // Only return the overlap between the specified part and the actual
1633 // string.
1634 if (n < 0)
1635 {
1636 len += n;
1637 n = 0;
1638 }
1639 else if (n > slen)
1640 n = slen;
1641 if (len < 0)
1642 len = 0;
1643 else if (n + len > slen)
1644 len = slen - n;
1645
1646 if (argvars[2].v_type != VAR_UNKNOWN && argvars[3].v_type != VAR_UNKNOWN)
1647 {
1648 int off;
1649
1650 // length in characters
1651 for (off = n; off < slen && len > 0; --len)
1652 off += mb_ptr2len(p + off);
1653 len = off - n;
1654 }
1655
1656 rettv->v_type = VAR_STRING;
1657 rettv->vval.v_string = vim_strnsave(p + n, len);
1658}
1659
1660/*
1661 * "strridx()" function
1662 */
1663 void
1664f_strridx(typval_T *argvars, typval_T *rettv)
1665{
1666 char_u buf[NUMBUFLEN];
1667 char_u *needle;
1668 char_u *haystack;
1669 char_u *rest;
1670 char_u *lastmatch = NULL;
1671 int haystack_len, end_idx;
1672
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001673 if (in_vim9script()
1674 && (check_for_string_arg(argvars, 0) == FAIL
1675 || check_for_string_arg(argvars, 1) == FAIL
1676 || check_for_opt_number_arg(argvars, 2) == FAIL))
1677 return;
1678
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001679 needle = tv_get_string_chk(&argvars[1]);
1680 haystack = tv_get_string_buf_chk(&argvars[0], buf);
1681
1682 rettv->vval.v_number = -1;
1683 if (needle == NULL || haystack == NULL)
1684 return; // type error; errmsg already given
1685
1686 haystack_len = (int)STRLEN(haystack);
1687 if (argvars[2].v_type != VAR_UNKNOWN)
1688 {
1689 // Third argument: upper limit for index
1690 end_idx = (int)tv_get_number_chk(&argvars[2], NULL);
1691 if (end_idx < 0)
1692 return; // can never find a match
1693 }
1694 else
1695 end_idx = haystack_len;
1696
1697 if (*needle == NUL)
1698 {
1699 // Empty string matches past the end.
1700 lastmatch = haystack + end_idx;
1701 }
1702 else
1703 {
1704 for (rest = haystack; *rest != '\0'; ++rest)
1705 {
1706 rest = (char_u *)strstr((char *)rest, (char *)needle);
1707 if (rest == NULL || rest > haystack + end_idx)
1708 break;
1709 lastmatch = rest;
1710 }
1711 }
1712
1713 if (lastmatch == NULL)
1714 rettv->vval.v_number = -1;
1715 else
1716 rettv->vval.v_number = (varnumber_T)(lastmatch - haystack);
1717}
1718
1719/*
1720 * "strtrans()" function
1721 */
1722 void
1723f_strtrans(typval_T *argvars, typval_T *rettv)
1724{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001725 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1726 return;
1727
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001728 rettv->v_type = VAR_STRING;
1729 rettv->vval.v_string = transstr(tv_get_string(&argvars[0]));
1730}
1731
Christian Brabandt67672ef2023-04-24 21:09:54 +01001732
1733/*
Christian Brabandt67672ef2023-04-24 21:09:54 +01001734 * "utf16idx()" function
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01001735 *
1736 * Converts a byte or character offset in a string to the corresponding UTF-16
1737 * code unit offset.
Christian Brabandt67672ef2023-04-24 21:09:54 +01001738 */
1739 void
1740f_utf16idx(typval_T *argvars, typval_T *rettv)
1741{
1742 rettv->vval.v_number = -1;
1743
1744 if (check_for_string_arg(argvars, 0) == FAIL
1745 || check_for_opt_number_arg(argvars, 1) == FAIL
1746 || check_for_opt_bool_arg(argvars, 2) == FAIL
1747 || (argvars[2].v_type != VAR_UNKNOWN
1748 && check_for_opt_bool_arg(argvars, 3) == FAIL))
1749 return;
1750
1751 char_u *str = tv_get_string_chk(&argvars[0]);
1752 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
1753 if (str == NULL || idx < 0)
1754 return;
1755
1756 varnumber_T countcc = FALSE;
1757 varnumber_T charidx = FALSE;
1758 if (argvars[2].v_type != VAR_UNKNOWN)
1759 {
1760 countcc = tv_get_bool(&argvars[2]);
1761 if (argvars[3].v_type != VAR_UNKNOWN)
1762 charidx = tv_get_bool(&argvars[3]);
1763 }
1764
1765 int (*ptr2len)(char_u *);
1766 if (enc_utf8 && countcc)
1767 ptr2len = utf_ptr2len;
1768 else
1769 ptr2len = mb_ptr2len;
1770
1771 char_u *p;
1772 int len;
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01001773 int utf16idx = 0;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001774 for (p = str, len = 0; charidx ? idx >= 0 : p <= str + idx; len++)
1775 {
1776 if (*p == NUL)
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001777 {
1778 // If the index is exactly the number of bytes or characters in the
1779 // string then return the length of the string in utf-16 code
1780 // units.
1781 if (charidx ? (idx == 0) : (p == (str + idx)))
1782 rettv->vval.v_number = len;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001783 return;
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001784 }
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01001785 utf16idx = len;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001786 int clen = ptr2len(p);
1787 int c = (clen > 1) ? utf_ptr2char(p) : *p;
1788 if (c > 0xFFFF)
1789 len++;
1790 p += ptr2len(p);
1791 if (charidx)
1792 idx--;
1793 }
1794
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01001795 rettv->vval.v_number = utf16idx;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001796}
1797
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001798/*
1799 * "tolower(string)" function
1800 */
1801 void
1802f_tolower(typval_T *argvars, typval_T *rettv)
1803{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001804 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1805 return;
1806
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001807 rettv->v_type = VAR_STRING;
1808 rettv->vval.v_string = strlow_save(tv_get_string(&argvars[0]));
1809}
1810
1811/*
1812 * "toupper(string)" function
1813 */
1814 void
1815f_toupper(typval_T *argvars, typval_T *rettv)
1816{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001817 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1818 return;
1819
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001820 rettv->v_type = VAR_STRING;
1821 rettv->vval.v_string = strup_save(tv_get_string(&argvars[0]));
1822}
1823
1824/*
1825 * "tr(string, fromstr, tostr)" function
1826 */
1827 void
1828f_tr(typval_T *argvars, typval_T *rettv)
1829{
1830 char_u *in_str;
1831 char_u *fromstr;
1832 char_u *tostr;
1833 char_u *p;
1834 int inlen;
1835 int fromlen;
1836 int tolen;
1837 int idx;
1838 char_u *cpstr;
1839 int cplen;
1840 int first = TRUE;
1841 char_u buf[NUMBUFLEN];
1842 char_u buf2[NUMBUFLEN];
1843 garray_T ga;
1844
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001845 if (in_vim9script()
1846 && (check_for_string_arg(argvars, 0) == FAIL
1847 || check_for_string_arg(argvars, 1) == FAIL
1848 || check_for_string_arg(argvars, 2) == FAIL))
1849 return;
1850
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001851 in_str = tv_get_string(&argvars[0]);
1852 fromstr = tv_get_string_buf_chk(&argvars[1], buf);
1853 tostr = tv_get_string_buf_chk(&argvars[2], buf2);
1854
1855 // Default return value: empty string.
1856 rettv->v_type = VAR_STRING;
1857 rettv->vval.v_string = NULL;
1858 if (fromstr == NULL || tostr == NULL)
1859 return; // type error; errmsg already given
Bram Moolenaar04935fb2022-01-08 16:19:22 +00001860 ga_init2(&ga, sizeof(char), 80);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001861
1862 if (!has_mbyte)
1863 // not multi-byte: fromstr and tostr must be the same length
1864 if (STRLEN(fromstr) != STRLEN(tostr))
1865 {
1866error:
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00001867 semsg(_(e_invalid_argument_str), fromstr);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001868 ga_clear(&ga);
1869 return;
1870 }
1871
1872 // fromstr and tostr have to contain the same number of chars
1873 while (*in_str != NUL)
1874 {
1875 if (has_mbyte)
1876 {
1877 inlen = (*mb_ptr2len)(in_str);
1878 cpstr = in_str;
1879 cplen = inlen;
1880 idx = 0;
1881 for (p = fromstr; *p != NUL; p += fromlen)
1882 {
1883 fromlen = (*mb_ptr2len)(p);
1884 if (fromlen == inlen && STRNCMP(in_str, p, inlen) == 0)
1885 {
1886 for (p = tostr; *p != NUL; p += tolen)
1887 {
1888 tolen = (*mb_ptr2len)(p);
1889 if (idx-- == 0)
1890 {
1891 cplen = tolen;
1892 cpstr = p;
1893 break;
1894 }
1895 }
1896 if (*p == NUL) // tostr is shorter than fromstr
1897 goto error;
1898 break;
1899 }
1900 ++idx;
1901 }
1902
1903 if (first && cpstr == in_str)
1904 {
1905 // Check that fromstr and tostr have the same number of
1906 // (multi-byte) characters. Done only once when a character
1907 // of in_str doesn't appear in fromstr.
1908 first = FALSE;
1909 for (p = tostr; *p != NUL; p += tolen)
1910 {
1911 tolen = (*mb_ptr2len)(p);
1912 --idx;
1913 }
1914 if (idx != 0)
1915 goto error;
1916 }
1917
1918 (void)ga_grow(&ga, cplen);
1919 mch_memmove((char *)ga.ga_data + ga.ga_len, cpstr, (size_t)cplen);
1920 ga.ga_len += cplen;
1921
1922 in_str += inlen;
1923 }
1924 else
1925 {
1926 // When not using multi-byte chars we can do it faster.
1927 p = vim_strchr(fromstr, *in_str);
1928 if (p != NULL)
1929 ga_append(&ga, tostr[p - fromstr]);
1930 else
1931 ga_append(&ga, *in_str);
1932 ++in_str;
1933 }
1934 }
1935
1936 // add a terminating NUL
1937 (void)ga_grow(&ga, 1);
1938 ga_append(&ga, NUL);
1939
1940 rettv->vval.v_string = ga.ga_data;
1941}
1942
1943/*
1944 * "trim({expr})" function
1945 */
1946 void
1947f_trim(typval_T *argvars, typval_T *rettv)
1948{
1949 char_u buf1[NUMBUFLEN];
1950 char_u buf2[NUMBUFLEN];
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001951 char_u *head;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001952 char_u *mask = NULL;
1953 char_u *tail;
1954 char_u *prev;
1955 char_u *p;
1956 int c1;
1957 int dir = 0;
1958
1959 rettv->v_type = VAR_STRING;
1960 rettv->vval.v_string = NULL;
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001961
1962 if (in_vim9script()
1963 && (check_for_string_arg(argvars, 0) == FAIL
Illia Bobyr80799172023-10-17 18:00:50 +02001964 || check_for_opt_string_arg(argvars, 1) == FAIL
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001965 || (argvars[1].v_type != VAR_UNKNOWN
1966 && check_for_opt_number_arg(argvars, 2) == FAIL)))
1967 return;
1968
1969 head = tv_get_string_buf_chk(&argvars[0], buf1);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001970 if (head == NULL)
1971 return;
1972
Illia Bobyr80799172023-10-17 18:00:50 +02001973 if (check_for_opt_string_arg(argvars, 1) == FAIL)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001974 return;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001975
1976 if (argvars[1].v_type == VAR_STRING)
Illia Bobyr6e638672023-10-17 11:09:45 +02001977 {
Illia Bobyr80799172023-10-17 18:00:50 +02001978 mask = tv_get_string_buf_chk(&argvars[1], buf2);
1979 if (*mask == NUL)
1980 mask = NULL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001981
Illia Bobyr80799172023-10-17 18:00:50 +02001982 if (argvars[2].v_type != VAR_UNKNOWN)
Illia Bobyr6e638672023-10-17 11:09:45 +02001983 {
Illia Bobyr80799172023-10-17 18:00:50 +02001984 int error = 0;
1985
1986 // leading or trailing characters to trim
1987 dir = (int)tv_get_number_chk(&argvars[2], &error);
1988 if (error)
1989 return;
1990 if (dir < 0 || dir > 2)
1991 {
1992 semsg(_(e_invalid_argument_str), tv_get_string(&argvars[2]));
1993 return;
1994 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001995 }
1996 }
1997
1998 if (dir == 0 || dir == 1)
1999 {
2000 // Trim leading characters
2001 while (*head != NUL)
2002 {
2003 c1 = PTR2CHAR(head);
2004 if (mask == NULL)
2005 {
2006 if (c1 > ' ' && c1 != 0xa0)
2007 break;
2008 }
2009 else
2010 {
2011 for (p = mask; *p != NUL; MB_PTR_ADV(p))
2012 if (c1 == PTR2CHAR(p))
2013 break;
2014 if (*p == NUL)
2015 break;
2016 }
2017 MB_PTR_ADV(head);
2018 }
2019 }
2020
2021 tail = head + STRLEN(head);
2022 if (dir == 0 || dir == 2)
2023 {
2024 // Trim trailing characters
2025 for (; tail > head; tail = prev)
2026 {
2027 prev = tail;
2028 MB_PTR_BACK(head, prev);
2029 c1 = PTR2CHAR(prev);
2030 if (mask == NULL)
2031 {
2032 if (c1 > ' ' && c1 != 0xa0)
2033 break;
2034 }
2035 else
2036 {
2037 for (p = mask; *p != NUL; MB_PTR_ADV(p))
2038 if (c1 == PTR2CHAR(p))
2039 break;
2040 if (*p == NUL)
2041 break;
2042 }
2043 }
2044 }
2045 rettv->vval.v_string = vim_strnsave(head, tail - head);
2046}
2047
Bram Moolenaar677658a2022-01-05 16:09:06 +00002048static char *e_printf = N_(e_insufficient_arguments_for_printf);
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002049
2050/*
2051 * Get number argument from "idxp" entry in "tvs". First entry is 1.
2052 */
2053 static varnumber_T
2054tv_nr(typval_T *tvs, int *idxp)
2055{
2056 int idx = *idxp - 1;
2057 varnumber_T n = 0;
2058 int err = FALSE;
2059
2060 if (tvs[idx].v_type == VAR_UNKNOWN)
2061 emsg(_(e_printf));
2062 else
2063 {
2064 ++*idxp;
2065 n = tv_get_number_chk(&tvs[idx], &err);
2066 if (err)
2067 n = 0;
2068 }
2069 return n;
2070}
2071
2072/*
2073 * Get string argument from "idxp" entry in "tvs". First entry is 1.
2074 * If "tofree" is NULL tv_get_string_chk() is used. Some types (e.g. List)
2075 * are not converted to a string.
2076 * If "tofree" is not NULL echo_string() is used. All types are converted to
2077 * a string with the same format as ":echo". The caller must free "*tofree".
2078 * Returns NULL for an error.
2079 */
2080 static char *
2081tv_str(typval_T *tvs, int *idxp, char_u **tofree)
2082{
2083 int idx = *idxp - 1;
2084 char *s = NULL;
2085 static char_u numbuf[NUMBUFLEN];
2086
2087 if (tvs[idx].v_type == VAR_UNKNOWN)
2088 emsg(_(e_printf));
2089 else
2090 {
2091 ++*idxp;
2092 if (tofree != NULL)
2093 s = (char *)echo_string(&tvs[idx], tofree, numbuf, get_copyID());
2094 else
2095 s = (char *)tv_get_string_chk(&tvs[idx]);
2096 }
2097 return s;
2098}
2099
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002100/*
2101 * Get float argument from "idxp" entry in "tvs". First entry is 1.
2102 */
2103 static double
2104tv_float(typval_T *tvs, int *idxp)
2105{
2106 int idx = *idxp - 1;
2107 double f = 0;
2108
2109 if (tvs[idx].v_type == VAR_UNKNOWN)
2110 emsg(_(e_printf));
2111 else
2112 {
2113 ++*idxp;
2114 if (tvs[idx].v_type == VAR_FLOAT)
2115 f = tvs[idx].vval.v_float;
2116 else if (tvs[idx].v_type == VAR_NUMBER)
2117 f = (double)tvs[idx].vval.v_number;
2118 else
Bram Moolenaar9d00e4a2022-01-05 17:49:15 +00002119 emsg(_(e_expected_float_argument_for_printf));
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002120 }
2121 return f;
2122}
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00002123
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002124#endif
2125
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002126/*
2127 * Return the representation of infinity for printf() function:
2128 * "-inf", "inf", "+inf", " inf", "-INF", "INF", "+INF" or " INF".
2129 */
2130 static const char *
2131infinity_str(int positive,
2132 char fmt_spec,
2133 int force_sign,
2134 int space_for_positive)
2135{
2136 static const char *table[] =
2137 {
2138 "-inf", "inf", "+inf", " inf",
2139 "-INF", "INF", "+INF", " INF"
2140 };
2141 int idx = positive * (1 + force_sign + force_sign * space_for_positive);
2142
2143 if (ASCII_ISUPPER(fmt_spec))
2144 idx += 4;
2145 return table[idx];
2146}
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002147
2148/*
2149 * This code was included to provide a portable vsnprintf() and snprintf().
2150 * Some systems may provide their own, but we always use this one for
2151 * consistency.
2152 *
2153 * This code is based on snprintf.c - a portable implementation of snprintf
2154 * by Mark Martinec <mark.martinec@ijs.si>, Version 2.2, 2000-10-06.
2155 * Included with permission. It was heavily modified to fit in Vim.
2156 * The original code, including useful comments, can be found here:
2157 * http://www.ijs.si/software/snprintf/
2158 *
2159 * This snprintf() only supports the following conversion specifiers:
2160 * s, c, d, u, o, x, X, p (and synonyms: i, D, U, O - see below)
2161 * with flags: '-', '+', ' ', '0' and '#'.
2162 * An asterisk is supported for field width as well as precision.
2163 *
2164 * Limited support for floating point was added: 'f', 'F', 'e', 'E', 'g', 'G'.
2165 *
2166 * Length modifiers 'h' (short int) and 'l' (long int) and 'll' (long long int)
2167 * are supported. NOTE: for 'll' the argument is varnumber_T or uvarnumber_T.
2168 *
2169 * The locale is not used, the string is used as a byte string. This is only
2170 * relevant for double-byte encodings where the second byte may be '%'.
2171 *
2172 * It is permitted for "str_m" to be zero, and it is permitted to specify NULL
2173 * pointer for resulting string argument if "str_m" is zero (as per ISO C99).
2174 *
2175 * The return value is the number of characters which would be generated
2176 * for the given input, excluding the trailing NUL. If this value
2177 * is greater or equal to "str_m", not all characters from the result
2178 * have been stored in str, output bytes beyond the ("str_m"-1) -th character
2179 * are discarded. If "str_m" is greater than zero it is guaranteed
2180 * the resulting string will be NUL-terminated.
2181 */
2182
2183/*
2184 * When va_list is not supported we only define vim_snprintf().
2185 *
2186 * vim_vsnprintf_typval() can be invoked with either "va_list" or a list of
2187 * "typval_T". When the latter is not used it must be NULL.
2188 */
2189
2190// When generating prototypes all of this is skipped, cproto doesn't
2191// understand this.
2192#ifndef PROTO
2193
2194// Like vim_vsnprintf() but append to the string.
2195 int
2196vim_snprintf_add(char *str, size_t str_m, const char *fmt, ...)
2197{
2198 va_list ap;
2199 int str_l;
2200 size_t len = STRLEN(str);
2201 size_t space;
2202
2203 if (str_m <= len)
2204 space = 0;
2205 else
2206 space = str_m - len;
2207 va_start(ap, fmt);
2208 str_l = vim_vsnprintf(str + len, space, fmt, ap);
2209 va_end(ap);
2210 return str_l;
2211}
2212
2213 int
2214vim_snprintf(char *str, size_t str_m, const char *fmt, ...)
2215{
2216 va_list ap;
2217 int str_l;
2218
2219 va_start(ap, fmt);
2220 str_l = vim_vsnprintf(str, str_m, fmt, ap);
2221 va_end(ap);
2222 return str_l;
2223}
2224
2225 int
2226vim_vsnprintf(
2227 char *str,
2228 size_t str_m,
2229 const char *fmt,
2230 va_list ap)
2231{
2232 return vim_vsnprintf_typval(str, str_m, fmt, ap, NULL);
2233}
2234
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002235enum
2236{
2237 TYPE_UNKNOWN = -1,
2238 TYPE_INT,
2239 TYPE_LONGINT,
2240 TYPE_LONGLONGINT,
2241 TYPE_UNSIGNEDINT,
2242 TYPE_UNSIGNEDLONGINT,
2243 TYPE_UNSIGNEDLONGLONGINT,
2244 TYPE_POINTER,
2245 TYPE_PERCENT,
2246 TYPE_CHAR,
2247 TYPE_STRING,
2248 TYPE_FLOAT
2249};
2250
2251/* Types that can be used in a format string
2252 */
zeertzjq7772c932023-08-15 22:48:40 +02002253 static int
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002254format_typeof(
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002255 const char *type)
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002256{
2257 // allowed values: \0, h, l, L
2258 char length_modifier = '\0';
2259
2260 // current conversion specifier character
2261 char fmt_spec = '\0';
2262
2263 // parse 'h', 'l' and 'll' length modifiers
2264 if (*type == 'h' || *type == 'l')
2265 {
2266 length_modifier = *type;
2267 type++;
2268 if (length_modifier == 'l' && *type == 'l')
2269 {
2270 // double l = __int64 / varnumber_T
2271 length_modifier = 'L';
2272 type++;
2273 }
2274 }
2275 fmt_spec = *type;
2276
2277 // common synonyms:
2278 switch (fmt_spec)
2279 {
2280 case 'i': fmt_spec = 'd'; break;
2281 case '*': fmt_spec = 'd'; length_modifier = 'h'; break;
2282 case 'D': fmt_spec = 'd'; length_modifier = 'l'; break;
2283 case 'U': fmt_spec = 'u'; length_modifier = 'l'; break;
2284 case 'O': fmt_spec = 'o'; length_modifier = 'l'; break;
2285 default: break;
2286 }
2287
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002288 // get parameter value, do initial processing
2289 switch (fmt_spec)
2290 {
2291 // '%' and 'c' behave similar to 's' regarding flags and field
2292 // widths
2293 case '%':
2294 return TYPE_PERCENT;
2295
2296 case 'c':
2297 return TYPE_CHAR;
2298
2299 case 's':
2300 case 'S':
2301 return TYPE_STRING;
2302
2303 case 'd': case 'u':
2304 case 'b': case 'B':
2305 case 'o':
2306 case 'x': case 'X':
2307 case 'p':
2308 {
2309 // NOTE: the u, b, o, x, X and p conversion specifiers
2310 // imply the value is unsigned; d implies a signed
2311 // value
2312
2313 // 0 if numeric argument is zero (or if pointer is
2314 // NULL for 'p'), +1 if greater than zero (or nonzero
2315 // for unsigned arguments), -1 if negative (unsigned
2316 // argument is never negative)
2317
2318 if (fmt_spec == 'p')
2319 return TYPE_POINTER;
2320 else if (fmt_spec == 'b' || fmt_spec == 'B')
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002321 return TYPE_UNSIGNEDLONGLONGINT;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002322 else if (fmt_spec == 'd')
2323 {
2324 // signed
2325 switch (length_modifier)
2326 {
2327 case '\0':
2328 case 'h':
2329 // char and short arguments are passed as int.
2330 return TYPE_INT;
2331 case 'l':
2332 return TYPE_LONGINT;
2333 case 'L':
2334 return TYPE_LONGLONGINT;
2335 }
2336 }
2337 else
2338 {
2339 // unsigned
2340 switch (length_modifier)
2341 {
2342 case '\0':
2343 case 'h':
2344 return TYPE_UNSIGNEDINT;
2345 case 'l':
2346 return TYPE_UNSIGNEDLONGINT;
2347 case 'L':
2348 return TYPE_UNSIGNEDLONGLONGINT;
2349 }
2350 }
2351 }
2352 break;
2353
2354 case 'f':
2355 case 'F':
2356 case 'e':
2357 case 'E':
2358 case 'g':
2359 case 'G':
2360 return TYPE_FLOAT;
2361 }
2362
2363 return TYPE_UNKNOWN;
2364}
2365
zeertzjq7772c932023-08-15 22:48:40 +02002366 static char *
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002367format_typename(
2368 const char *type)
2369{
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002370 switch (format_typeof(type))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002371 {
2372 case TYPE_INT:
2373 return _(typename_int);
2374
2375 case TYPE_LONGINT:
2376 return _(typename_longint);
2377
2378 case TYPE_LONGLONGINT:
2379 return _(typename_longlongint);
2380
2381 case TYPE_UNSIGNEDINT:
2382 return _(typename_unsignedint);
2383
2384 case TYPE_UNSIGNEDLONGINT:
2385 return _(typename_unsignedlongint);
2386
2387 case TYPE_UNSIGNEDLONGLONGINT:
2388 return _(typename_unsignedlonglongint);
2389
2390 case TYPE_POINTER:
2391 return _(typename_pointer);
2392
2393 case TYPE_PERCENT:
2394 return _(typename_percent);
2395
2396 case TYPE_CHAR:
2397 return _(typename_char);
2398
2399 case TYPE_STRING:
2400 return _(typename_string);
2401
2402 case TYPE_FLOAT:
2403 return _(typename_float);
2404 }
2405
2406 return _(typename_unknown);
2407}
2408
zeertzjq7772c932023-08-15 22:48:40 +02002409 static int
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002410adjust_types(
2411 const char ***ap_types,
2412 int arg,
2413 int *num_posarg,
2414 const char *type)
2415{
2416 if (*ap_types == NULL || *num_posarg < arg)
2417 {
2418 int idx;
2419 const char **new_types;
2420
2421 if (*ap_types == NULL)
2422 new_types = ALLOC_CLEAR_MULT(const char *, arg);
2423 else
K.Takata4c215ec2023-08-26 18:05:08 +02002424 new_types = vim_realloc((char **)*ap_types,
2425 arg * sizeof(const char *));
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002426
2427 if (new_types == NULL)
2428 return FAIL;
2429
2430 for (idx = *num_posarg; idx < arg; ++idx)
2431 new_types[idx] = NULL;
2432
2433 *ap_types = new_types;
2434 *num_posarg = arg;
2435 }
2436
2437 if ((*ap_types)[arg - 1] != NULL)
2438 {
2439 if ((*ap_types)[arg - 1][0] == '*' || type[0] == '*')
2440 {
2441 const char *pt = type;
2442 if (pt[0] == '*')
2443 pt = (*ap_types)[arg - 1];
2444
2445 if (pt[0] != '*')
2446 {
2447 switch (pt[0])
2448 {
2449 case 'd': case 'i': break;
2450 default:
2451 semsg(_(e_positional_num_field_spec_reused_str_str), arg, format_typename((*ap_types)[arg - 1]), format_typename(type));
2452 return FAIL;
2453 }
2454 }
2455 }
2456 else
2457 {
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002458 if (format_typeof(type) != format_typeof((*ap_types)[arg - 1]))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002459 {
2460 semsg(_( e_positional_arg_num_type_inconsistent_str_str), arg, format_typename(type), format_typename((*ap_types)[arg - 1]));
2461 return FAIL;
2462 }
2463 }
2464 }
2465
2466 (*ap_types)[arg - 1] = type;
2467
2468 return OK;
2469}
2470
Christ van Willegenc35fc032024-03-14 18:30:41 +01002471 static void
2472format_overflow_error(const char *pstart)
2473{
2474 size_t arglen = 0;
2475 char *argcopy = NULL;
2476 const char *p = pstart;
2477
2478 while (VIM_ISDIGIT((int)(*p)))
2479 ++p;
2480
2481 arglen = p - pstart;
2482 argcopy = ALLOC_CLEAR_MULT(char, arglen + 1);
2483 if (argcopy != NULL)
2484 {
2485 strncpy(argcopy, pstart, arglen);
2486 semsg(_( e_val_too_large), argcopy);
2487 free(argcopy);
2488 }
2489 else
2490 semsg(_(e_out_of_memory_allocating_nr_bytes), arglen);
2491}
2492
2493#define MAX_ALLOWED_STRING_WIDTH 6400
2494
2495 static int
2496get_unsigned_int(
2497 const char *pstart,
2498 const char **p,
2499 unsigned int *uj)
2500{
2501 *uj = **p - '0';
2502 ++*p;
2503
2504 while (VIM_ISDIGIT((int)(**p)) && *uj < MAX_ALLOWED_STRING_WIDTH)
2505 {
2506 *uj = 10 * *uj + (unsigned int)(**p - '0');
2507 ++*p;
2508 }
2509
2510 if (*uj > MAX_ALLOWED_STRING_WIDTH)
2511 {
2512 format_overflow_error(pstart);
2513 return FAIL;
2514 }
2515
2516 return OK;
2517}
2518
2519
zeertzjq7772c932023-08-15 22:48:40 +02002520 static int
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002521parse_fmt_types(
2522 const char ***ap_types,
2523 int *num_posarg,
2524 const char *fmt,
2525 typval_T *tvs UNUSED
2526 )
2527{
2528 const char *p = fmt;
2529 const char *arg = NULL;
2530
2531 int any_pos = 0;
2532 int any_arg = 0;
2533 int arg_idx;
2534
2535#define CHECK_POS_ARG do { \
2536 if (any_pos && any_arg) \
2537 { \
2538 semsg(_( e_cannot_mix_positional_and_non_positional_str), fmt); \
2539 goto error; \
2540 } \
2541} while (0);
2542
2543 if (p == NULL)
2544 return OK;
2545
2546 while (*p != NUL)
2547 {
2548 if (*p != '%')
2549 {
2550 char *q = strchr(p + 1, '%');
2551 size_t n = (q == NULL) ? STRLEN(p) : (size_t)(q - p);
2552
2553 p += n;
2554 }
2555 else
2556 {
2557 // allowed values: \0, h, l, L
2558 char length_modifier = '\0';
2559
2560 // variable for positional arg
2561 int pos_arg = -1;
2562 const char *ptype = NULL;
Christ van Willegenc35fc032024-03-14 18:30:41 +01002563 const char *pstart = p+1;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002564
2565 p++; // skip '%'
2566
2567 // First check to see if we find a positional
2568 // argument specifier
2569 ptype = p;
2570
2571 while (VIM_ISDIGIT(*ptype))
2572 ++ptype;
2573
2574 if (*ptype == '$')
2575 {
2576 if (*p == '0')
2577 {
2578 // 0 flag at the wrong place
2579 semsg(_( e_invalid_format_specifier_str), fmt);
2580 goto error;
2581 }
2582
2583 // Positional argument
Christ van Willegenc35fc032024-03-14 18:30:41 +01002584 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002585
Christ van Willegenc35fc032024-03-14 18:30:41 +01002586 if (get_unsigned_int(pstart, &p, &uj) == FAIL)
2587 goto error;
2588
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002589 pos_arg = uj;
2590
2591 any_pos = 1;
2592 CHECK_POS_ARG;
2593
2594 ++p;
2595 }
2596
2597 // parse flags
2598 while (*p == '0' || *p == '-' || *p == '+' || *p == ' '
2599 || *p == '#' || *p == '\'')
2600 {
2601 switch (*p)
2602 {
2603 case '0': break;
2604 case '-': break;
2605 case '+': break;
2606 case ' ': // If both the ' ' and '+' flags appear, the ' '
2607 // flag should be ignored
2608 break;
2609 case '#': break;
2610 case '\'': break;
2611 }
2612 p++;
2613 }
2614 // If the '0' and '-' flags both appear, the '0' flag should be
2615 // ignored.
2616
2617 // parse field width
2618 if (*(arg = p) == '*')
2619 {
2620 p++;
2621
2622 if (VIM_ISDIGIT((int)(*p)))
2623 {
2624 // Positional argument field width
Christ van Willegenc35fc032024-03-14 18:30:41 +01002625 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002626
Christ van Willegenc35fc032024-03-14 18:30:41 +01002627 if (get_unsigned_int(arg + 1, &p, &uj) == FAIL)
2628 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002629
2630 if (*p != '$')
2631 {
2632 semsg(_( e_invalid_format_specifier_str), fmt);
2633 goto error;
2634 }
2635 else
2636 {
2637 ++p;
2638 any_pos = 1;
2639 CHECK_POS_ARG;
2640
2641 if (adjust_types(ap_types, uj, num_posarg, arg) == FAIL)
2642 goto error;
2643 }
2644 }
2645 else
2646 {
2647 any_arg = 1;
2648 CHECK_POS_ARG;
2649 }
2650 }
dundargoc580c1fc2023-10-06 19:41:14 +02002651 else if (VIM_ISDIGIT((int)(*p)))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002652 {
2653 // size_t could be wider than unsigned int; make sure we treat
2654 // argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01002655 const char *digstart = p;
2656 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002657
Christ van Willegenc35fc032024-03-14 18:30:41 +01002658 if (get_unsigned_int(digstart, &p, &uj) == FAIL)
2659 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002660
2661 if (*p == '$')
2662 {
2663 semsg(_( e_invalid_format_specifier_str), fmt);
2664 goto error;
2665 }
2666 }
2667
2668 // parse precision
2669 if (*p == '.')
2670 {
2671 p++;
2672
2673 if (*(arg = p) == '*')
2674 {
2675 p++;
2676
2677 if (VIM_ISDIGIT((int)(*p)))
2678 {
2679 // Parse precision
Christ van Willegenc35fc032024-03-14 18:30:41 +01002680 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002681
Christ van Willegenc35fc032024-03-14 18:30:41 +01002682 if (get_unsigned_int(arg + 1, &p, &uj) == FAIL)
2683 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002684
2685 if (*p == '$')
2686 {
2687 any_pos = 1;
2688 CHECK_POS_ARG;
2689
2690 ++p;
2691
2692 if (adjust_types(ap_types, uj, num_posarg, arg) == FAIL)
2693 goto error;
2694 }
2695 else
2696 {
2697 semsg(_( e_invalid_format_specifier_str), fmt);
2698 goto error;
2699 }
2700 }
2701 else
2702 {
2703 any_arg = 1;
2704 CHECK_POS_ARG;
2705 }
2706 }
dundargoc580c1fc2023-10-06 19:41:14 +02002707 else if (VIM_ISDIGIT((int)(*p)))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002708 {
2709 // size_t could be wider than unsigned int; make sure we
2710 // treat argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01002711 const char *digstart = p;
2712 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002713
Christ van Willegenc35fc032024-03-14 18:30:41 +01002714 if (get_unsigned_int(digstart, &p, &uj) == FAIL)
2715 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002716
2717 if (*p == '$')
2718 {
2719 semsg(_( e_invalid_format_specifier_str), fmt);
2720 goto error;
2721 }
2722 }
2723 }
2724
2725 if (pos_arg != -1)
2726 {
2727 any_pos = 1;
2728 CHECK_POS_ARG;
2729
2730 ptype = p;
2731 }
2732
2733 // parse 'h', 'l' and 'll' length modifiers
2734 if (*p == 'h' || *p == 'l')
2735 {
2736 length_modifier = *p;
2737 p++;
2738 if (length_modifier == 'l' && *p == 'l')
2739 {
2740 // double l = __int64 / varnumber_T
dundargoc580c1fc2023-10-06 19:41:14 +02002741 // length_modifier = 'L';
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002742 p++;
2743 }
2744 }
2745
2746 switch (*p)
2747 {
2748 // Check for known format specifiers. % is special!
2749 case 'i':
2750 case '*':
2751 case 'd':
2752 case 'u':
2753 case 'o':
2754 case 'D':
2755 case 'U':
2756 case 'O':
2757 case 'x':
2758 case 'X':
2759 case 'b':
2760 case 'B':
2761 case 'c':
2762 case 's':
2763 case 'S':
2764 case 'p':
2765 case 'f':
2766 case 'F':
2767 case 'e':
2768 case 'E':
2769 case 'g':
2770 case 'G':
2771 if (pos_arg != -1)
2772 {
2773 if (adjust_types(ap_types, pos_arg, num_posarg, ptype) == FAIL)
2774 goto error;
2775 }
2776 else
2777 {
2778 any_arg = 1;
2779 CHECK_POS_ARG;
2780 }
2781 break;
2782
2783 default:
2784 if (pos_arg != -1)
2785 {
2786 semsg(_( e_cannot_mix_positional_and_non_positional_str), fmt);
2787 goto error;
2788 }
2789 }
2790
2791 if (*p != NUL)
2792 p++; // step over the just processed conversion specifier
2793 }
2794 }
2795
2796 for (arg_idx = 0; arg_idx < *num_posarg; ++arg_idx)
2797 {
2798 if ((*ap_types)[arg_idx] == NULL)
2799 {
2800 semsg(_(e_fmt_arg_nr_unused_str), arg_idx + 1, fmt);
2801 goto error;
2802 }
2803
2804# if defined(FEAT_EVAL)
2805 if (tvs != NULL && tvs[arg_idx].v_type == VAR_UNKNOWN)
2806 {
2807 semsg(_(e_positional_nr_out_of_bounds_str), arg_idx + 1, fmt);
2808 goto error;
2809 }
2810# endif
2811 }
2812
2813 return OK;
2814
2815error:
K.Takata4c215ec2023-08-26 18:05:08 +02002816 vim_free((char**)*ap_types);
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002817 *ap_types = NULL;
2818 *num_posarg = 0;
2819 return FAIL;
2820}
2821
zeertzjq7772c932023-08-15 22:48:40 +02002822 static void
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002823skip_to_arg(
2824 const char **ap_types,
2825 va_list ap_start,
2826 va_list *ap,
2827 int *arg_idx,
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002828 int *arg_cur,
2829 const char *fmt)
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002830{
2831 int arg_min = 0;
2832
2833 if (*arg_cur + 1 == *arg_idx)
2834 {
2835 ++*arg_cur;
2836 ++*arg_idx;
2837 return;
2838 }
2839
2840 if (*arg_cur >= *arg_idx)
2841 {
2842 // Reset ap to ap_start and skip arg_idx - 1 types
2843 va_end(*ap);
2844 va_copy(*ap, ap_start);
2845 }
2846 else
2847 {
2848 // Skip over any we should skip
2849 arg_min = *arg_cur;
2850 }
2851
2852 for (*arg_cur = arg_min; *arg_cur < *arg_idx - 1; ++*arg_cur)
2853 {
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002854 const char *p;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002855
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002856 if (ap_types == NULL || ap_types[*arg_cur] == NULL)
2857 {
Christ van Willegen1bd2cb12023-09-08 19:17:09 +02002858 siemsg(e_aptypes_is_null_nr_str, *arg_cur, fmt);
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002859 return;
2860 }
2861
2862 p = ap_types[*arg_cur];
2863
2864 int fmt_type = format_typeof(p);
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002865
2866 // get parameter value, do initial processing
2867 switch (fmt_type)
2868 {
2869 case TYPE_PERCENT:
2870 case TYPE_UNKNOWN:
2871 break;
2872
2873 case TYPE_CHAR:
2874 va_arg(*ap, int);
2875 break;
2876
2877 case TYPE_STRING:
2878 va_arg(*ap, char *);
2879 break;
2880
2881 case TYPE_POINTER:
2882 va_arg(*ap, void *);
2883 break;
2884
2885 case TYPE_INT:
2886 va_arg(*ap, int);
2887 break;
2888
2889 case TYPE_LONGINT:
2890 va_arg(*ap, long int);
2891 break;
2892
2893 case TYPE_LONGLONGINT:
2894 va_arg(*ap, varnumber_T);
2895 break;
2896
2897 case TYPE_UNSIGNEDINT:
2898 va_arg(*ap, unsigned int);
2899 break;
2900
2901 case TYPE_UNSIGNEDLONGINT:
2902 va_arg(*ap, unsigned long int);
2903 break;
2904
2905 case TYPE_UNSIGNEDLONGLONGINT:
2906 va_arg(*ap, uvarnumber_T);
2907 break;
2908
2909 case TYPE_FLOAT:
2910 va_arg(*ap, double);
2911 break;
2912 }
2913 }
2914
2915 // Because we know that after we return from this call,
2916 // a va_arg() call is made, we can pre-emptively
2917 // increment the current argument index.
2918 ++*arg_cur;
2919 ++*arg_idx;
2920
2921 return;
2922}
2923
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002924 int
2925vim_vsnprintf_typval(
2926 char *str,
2927 size_t str_m,
2928 const char *fmt,
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002929 va_list ap_start,
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002930 typval_T *tvs)
2931{
2932 size_t str_l = 0;
2933 const char *p = fmt;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002934 int arg_cur = 0;
2935 int num_posarg = 0;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002936 int arg_idx = 1;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002937 va_list ap;
2938 const char **ap_types = NULL;
2939
2940 if (parse_fmt_types(&ap_types, &num_posarg, fmt, tvs) == FAIL)
2941 return 0;
2942
2943 va_copy(ap, ap_start);
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002944
2945 if (p == NULL)
2946 p = "";
2947 while (*p != NUL)
2948 {
2949 if (*p != '%')
2950 {
2951 char *q = strchr(p + 1, '%');
2952 size_t n = (q == NULL) ? STRLEN(p) : (size_t)(q - p);
2953
2954 // Copy up to the next '%' or NUL without any changes.
2955 if (str_l < str_m)
2956 {
2957 size_t avail = str_m - str_l;
2958
2959 mch_memmove(str + str_l, p, n > avail ? avail : n);
2960 }
2961 p += n;
2962 str_l += n;
2963 }
2964 else
2965 {
2966 size_t min_field_width = 0, precision = 0;
2967 int zero_padding = 0, precision_specified = 0, justify_left = 0;
2968 int alternate_form = 0, force_sign = 0;
2969
2970 // If both the ' ' and '+' flags appear, the ' ' flag should be
2971 // ignored.
2972 int space_for_positive = 1;
2973
2974 // allowed values: \0, h, l, L
2975 char length_modifier = '\0';
2976
2977 // temporary buffer for simple numeric->string conversion
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01002978# define TMP_LEN 350 // On my system 1e308 is the biggest number possible.
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002979 // That sounds reasonable to use as the maximum
2980 // printable.
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002981 char tmp[TMP_LEN];
2982
2983 // string address in case of string argument
2984 const char *str_arg = NULL;
2985
2986 // natural field width of arg without padding and sign
2987 size_t str_arg_l;
2988
2989 // unsigned char argument value - only defined for c conversion.
2990 // N.B. standard explicitly states the char argument for the c
2991 // conversion is unsigned
2992 unsigned char uchar_arg;
2993
2994 // number of zeros to be inserted for numeric conversions as
2995 // required by the precision or minimal field width
2996 size_t number_of_zeros_to_pad = 0;
2997
2998 // index into tmp where zero padding is to be inserted
2999 size_t zero_padding_insertion_ind = 0;
3000
3001 // current conversion specifier character
3002 char fmt_spec = '\0';
3003
3004 // buffer for 's' and 'S' specs
3005 char_u *tofree = NULL;
3006
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003007 // variables for positional arg
3008 int pos_arg = -1;
3009 const char *ptype;
3010
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003011
3012 p++; // skip '%'
3013
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003014 // First check to see if we find a positional
3015 // argument specifier
3016 ptype = p;
3017
3018 while (VIM_ISDIGIT(*ptype))
3019 ++ptype;
3020
3021 if (*ptype == '$')
3022 {
3023 // Positional argument
Christ van Willegenc35fc032024-03-14 18:30:41 +01003024 const char *digstart = p;
3025 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003026
Christ van Willegenc35fc032024-03-14 18:30:41 +01003027 if (get_unsigned_int(digstart, &p, &uj) == FAIL)
3028 goto error;
3029
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003030 pos_arg = uj;
3031
3032 ++p;
3033 }
3034
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003035 // parse flags
3036 while (*p == '0' || *p == '-' || *p == '+' || *p == ' '
3037 || *p == '#' || *p == '\'')
3038 {
3039 switch (*p)
3040 {
3041 case '0': zero_padding = 1; break;
3042 case '-': justify_left = 1; break;
3043 case '+': force_sign = 1; space_for_positive = 0; break;
3044 case ' ': force_sign = 1;
3045 // If both the ' ' and '+' flags appear, the ' '
3046 // flag should be ignored
3047 break;
3048 case '#': alternate_form = 1; break;
3049 case '\'': break;
3050 }
3051 p++;
3052 }
3053 // If the '0' and '-' flags both appear, the '0' flag should be
3054 // ignored.
3055
3056 // parse field width
3057 if (*p == '*')
3058 {
3059 int j;
Christ van Willegenc35fc032024-03-14 18:30:41 +01003060 const char *digstart = p + 1;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003061
3062 p++;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003063
3064 if (VIM_ISDIGIT((int)(*p)))
3065 {
3066 // Positional argument field width
Christ van Willegenc35fc032024-03-14 18:30:41 +01003067 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003068
Christ van Willegenc35fc032024-03-14 18:30:41 +01003069 if (get_unsigned_int(digstart, &p, &uj) == FAIL)
3070 goto error;
3071
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003072 arg_idx = uj;
3073
3074 ++p;
3075 }
3076
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003077 j =
3078# if defined(FEAT_EVAL)
3079 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3080# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003081 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3082 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003083 va_arg(ap, int));
3084
Christ van Willegenc35fc032024-03-14 18:30:41 +01003085 if (j > MAX_ALLOWED_STRING_WIDTH)
3086 {
3087 format_overflow_error(digstart);
3088 goto error;
3089 }
3090
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003091 if (j >= 0)
3092 min_field_width = j;
3093 else
3094 {
3095 min_field_width = -j;
3096 justify_left = 1;
3097 }
3098 }
3099 else if (VIM_ISDIGIT((int)(*p)))
3100 {
3101 // size_t could be wider than unsigned int; make sure we treat
3102 // argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01003103 const char *digstart = p;
3104 unsigned int uj;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003105
Christ van Willegenc35fc032024-03-14 18:30:41 +01003106 if (get_unsigned_int(digstart, &p, &uj) == FAIL)
3107 goto error;
3108
3109 if (uj > MAX_ALLOWED_STRING_WIDTH)
3110 {
3111 format_overflow_error(digstart);
3112 goto error;
3113 }
3114
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003115 min_field_width = uj;
3116 }
3117
3118 // parse precision
3119 if (*p == '.')
3120 {
3121 p++;
3122 precision_specified = 1;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003123
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003124 if (VIM_ISDIGIT((int)(*p)))
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003125 {
3126 // size_t could be wider than unsigned int; make sure we
3127 // treat argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01003128 const char *digstart = p;
3129 unsigned int uj;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003130
Christ van Willegenc35fc032024-03-14 18:30:41 +01003131 if (get_unsigned_int(digstart, &p, &uj) == FAIL)
3132 goto error;
3133
3134 if (uj > MAX_ALLOWED_STRING_WIDTH)
3135 {
3136 format_overflow_error(digstart);
3137 goto error;
3138 }
3139
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003140 precision = uj;
3141 }
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003142 else if (*p == '*')
3143 {
3144 int j;
Christ van Willegenc35fc032024-03-14 18:30:41 +01003145 const char *digstart = p;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003146
3147 p++;
3148
3149 if (VIM_ISDIGIT((int)(*p)))
3150 {
3151 // positional argument
Christ van Willegenc35fc032024-03-14 18:30:41 +01003152 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003153
Christ van Willegenc35fc032024-03-14 18:30:41 +01003154 if (get_unsigned_int(digstart, &p, &uj) == FAIL)
3155 goto error;
3156
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003157 arg_idx = uj;
3158
3159 ++p;
3160 }
3161
3162 j =
3163# if defined(FEAT_EVAL)
3164 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3165# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003166 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3167 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003168 va_arg(ap, int));
3169
Christ van Willegenc35fc032024-03-14 18:30:41 +01003170 if (j > MAX_ALLOWED_STRING_WIDTH)
3171 {
3172 format_overflow_error(digstart);
3173 goto error;
3174 }
3175
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003176 if (j >= 0)
3177 precision = j;
3178 else
3179 {
3180 precision_specified = 0;
3181 precision = 0;
3182 }
3183 }
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003184 }
3185
3186 // parse 'h', 'l' and 'll' length modifiers
3187 if (*p == 'h' || *p == 'l')
3188 {
3189 length_modifier = *p;
3190 p++;
3191 if (length_modifier == 'l' && *p == 'l')
3192 {
3193 // double l = __int64 / varnumber_T
3194 length_modifier = 'L';
3195 p++;
3196 }
3197 }
3198 fmt_spec = *p;
3199
3200 // common synonyms:
3201 switch (fmt_spec)
3202 {
3203 case 'i': fmt_spec = 'd'; break;
3204 case 'D': fmt_spec = 'd'; length_modifier = 'l'; break;
3205 case 'U': fmt_spec = 'u'; length_modifier = 'l'; break;
3206 case 'O': fmt_spec = 'o'; length_modifier = 'l'; break;
3207 default: break;
3208 }
3209
3210# if defined(FEAT_EVAL)
3211 switch (fmt_spec)
3212 {
3213 case 'd': case 'u': case 'o': case 'x': case 'X':
3214 if (tvs != NULL && length_modifier == '\0')
3215 length_modifier = 'L';
3216 }
3217# endif
3218
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003219 if (pos_arg != -1)
3220 arg_idx = pos_arg;
3221
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003222 // get parameter value, do initial processing
3223 switch (fmt_spec)
3224 {
3225 // '%' and 'c' behave similar to 's' regarding flags and field
3226 // widths
3227 case '%':
3228 case 'c':
3229 case 's':
3230 case 'S':
3231 str_arg_l = 1;
3232 switch (fmt_spec)
3233 {
3234 case '%':
3235 str_arg = p;
3236 break;
3237
3238 case 'c':
3239 {
3240 int j;
3241
3242 j =
3243# if defined(FEAT_EVAL)
3244 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3245# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003246 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3247 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003248 va_arg(ap, int));
3249
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003250 // standard demands unsigned char
3251 uchar_arg = (unsigned char)j;
3252 str_arg = (char *)&uchar_arg;
3253 break;
3254 }
3255
3256 case 's':
3257 case 'S':
3258 str_arg =
3259# if defined(FEAT_EVAL)
3260 tvs != NULL ? tv_str(tvs, &arg_idx, &tofree) :
3261# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003262 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3263 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003264 va_arg(ap, char *));
3265
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003266 if (str_arg == NULL)
3267 {
3268 str_arg = "[NULL]";
3269 str_arg_l = 6;
3270 }
3271 // make sure not to address string beyond the specified
3272 // precision !!!
3273 else if (!precision_specified)
3274 str_arg_l = strlen(str_arg);
3275 // truncate string if necessary as requested by precision
3276 else if (precision == 0)
3277 str_arg_l = 0;
3278 else
3279 {
3280 // Don't put the #if inside memchr(), it can be a
3281 // macro.
3282 // memchr on HP does not like n > 2^31 !!!
3283 char *q = memchr(str_arg, '\0',
3284 precision <= (size_t)0x7fffffffL ? precision
3285 : (size_t)0x7fffffffL);
presukud85fccd2021-11-20 19:38:31 +00003286
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003287 str_arg_l = (q == NULL) ? precision
3288 : (size_t)(q - str_arg);
3289 }
3290 if (fmt_spec == 'S')
3291 {
presuku1f2453f2021-11-24 15:32:57 +00003292 char_u *p1;
3293 size_t i;
3294 int cell;
presukud85fccd2021-11-20 19:38:31 +00003295
presuku1f2453f2021-11-24 15:32:57 +00003296 for (i = 0, p1 = (char_u *)str_arg; *p1;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003297 p1 += mb_ptr2len(p1))
presuku1f2453f2021-11-24 15:32:57 +00003298 {
3299 cell = mb_ptr2cells(p1);
3300 if (precision_specified && i + cell > precision)
3301 break;
3302 i += cell;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003303 }
presuku1f2453f2021-11-24 15:32:57 +00003304
3305 str_arg_l = p1 - (char_u *)str_arg;
presukud85fccd2021-11-20 19:38:31 +00003306 if (min_field_width != 0)
presuku1f2453f2021-11-24 15:32:57 +00003307 min_field_width += str_arg_l - i;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003308 }
3309 break;
3310
3311 default:
3312 break;
3313 }
3314 break;
3315
3316 case 'd': case 'u':
3317 case 'b': case 'B':
3318 case 'o':
3319 case 'x': case 'X':
3320 case 'p':
3321 {
3322 // NOTE: the u, b, o, x, X and p conversion specifiers
3323 // imply the value is unsigned; d implies a signed
3324 // value
3325
3326 // 0 if numeric argument is zero (or if pointer is
3327 // NULL for 'p'), +1 if greater than zero (or nonzero
3328 // for unsigned arguments), -1 if negative (unsigned
3329 // argument is never negative)
3330 int arg_sign = 0;
3331
3332 // only set for length modifier h, or for no length
3333 // modifiers
3334 int int_arg = 0;
3335 unsigned int uint_arg = 0;
3336
3337 // only set for length modifier l
3338 long int long_arg = 0;
3339 unsigned long int ulong_arg = 0;
3340
3341 // only set for length modifier ll
3342 varnumber_T llong_arg = 0;
3343 uvarnumber_T ullong_arg = 0;
3344
3345 // only set for b conversion
3346 uvarnumber_T bin_arg = 0;
3347
3348 // pointer argument value -only defined for p
3349 // conversion
3350 void *ptr_arg = NULL;
3351
3352 if (fmt_spec == 'p')
3353 {
3354 length_modifier = '\0';
3355 ptr_arg =
3356# if defined(FEAT_EVAL)
3357 tvs != NULL ? (void *)tv_str(tvs, &arg_idx,
3358 NULL) :
3359# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003360 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3361 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003362 va_arg(ap, void *));
3363
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003364 if (ptr_arg != NULL)
3365 arg_sign = 1;
3366 }
3367 else if (fmt_spec == 'b' || fmt_spec == 'B')
3368 {
3369 bin_arg =
3370# if defined(FEAT_EVAL)
3371 tvs != NULL ?
3372 (uvarnumber_T)tv_nr(tvs, &arg_idx) :
3373# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003374 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3375 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003376 va_arg(ap, uvarnumber_T));
3377
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003378 if (bin_arg != 0)
3379 arg_sign = 1;
3380 }
3381 else if (fmt_spec == 'd')
3382 {
3383 // signed
3384 switch (length_modifier)
3385 {
3386 case '\0':
3387 case 'h':
3388 // char and short arguments are passed as int.
3389 int_arg =
3390# if defined(FEAT_EVAL)
3391 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3392# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003393 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3394 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003395 va_arg(ap, int));
3396
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003397 if (int_arg > 0)
3398 arg_sign = 1;
3399 else if (int_arg < 0)
3400 arg_sign = -1;
3401 break;
3402 case 'l':
3403 long_arg =
3404# if defined(FEAT_EVAL)
3405 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3406# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003407 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3408 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003409 va_arg(ap, long int));
3410
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003411 if (long_arg > 0)
3412 arg_sign = 1;
3413 else if (long_arg < 0)
3414 arg_sign = -1;
3415 break;
3416 case 'L':
3417 llong_arg =
3418# if defined(FEAT_EVAL)
3419 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3420# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003421 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3422 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003423 va_arg(ap, varnumber_T));
3424
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003425 if (llong_arg > 0)
3426 arg_sign = 1;
3427 else if (llong_arg < 0)
3428 arg_sign = -1;
3429 break;
3430 }
3431 }
3432 else
3433 {
3434 // unsigned
3435 switch (length_modifier)
3436 {
3437 case '\0':
3438 case 'h':
3439 uint_arg =
3440# if defined(FEAT_EVAL)
3441 tvs != NULL ? (unsigned)
3442 tv_nr(tvs, &arg_idx) :
3443# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003444 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3445 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003446 va_arg(ap, unsigned int));
3447
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003448 if (uint_arg != 0)
3449 arg_sign = 1;
3450 break;
3451 case 'l':
3452 ulong_arg =
3453# if defined(FEAT_EVAL)
3454 tvs != NULL ? (unsigned long)
3455 tv_nr(tvs, &arg_idx) :
3456# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003457 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3458 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003459 va_arg(ap, unsigned long int));
3460
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003461 if (ulong_arg != 0)
3462 arg_sign = 1;
3463 break;
3464 case 'L':
3465 ullong_arg =
3466# if defined(FEAT_EVAL)
3467 tvs != NULL ? (uvarnumber_T)
3468 tv_nr(tvs, &arg_idx) :
3469# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003470 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3471 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003472 va_arg(ap, uvarnumber_T));
3473
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003474 if (ullong_arg != 0)
3475 arg_sign = 1;
3476 break;
3477 }
3478 }
3479
3480 str_arg = tmp;
3481 str_arg_l = 0;
3482
3483 // NOTE:
3484 // For d, i, u, o, x, and X conversions, if precision is
3485 // specified, the '0' flag should be ignored. This is so
3486 // with Solaris 2.6, Digital UNIX 4.0, HPUX 10, Linux,
3487 // FreeBSD, NetBSD; but not with Perl.
3488 if (precision_specified)
3489 zero_padding = 0;
3490 if (fmt_spec == 'd')
3491 {
3492 if (force_sign && arg_sign >= 0)
3493 tmp[str_arg_l++] = space_for_positive ? ' ' : '+';
3494 // leave negative numbers for sprintf to handle, to
3495 // avoid handling tricky cases like (short int)-32768
3496 }
3497 else if (alternate_form)
3498 {
3499 if (arg_sign != 0
3500 && (fmt_spec == 'b' || fmt_spec == 'B'
3501 || fmt_spec == 'x' || fmt_spec == 'X') )
3502 {
3503 tmp[str_arg_l++] = '0';
3504 tmp[str_arg_l++] = fmt_spec;
3505 }
3506 // alternate form should have no effect for p
3507 // conversion, but ...
3508 }
3509
3510 zero_padding_insertion_ind = str_arg_l;
3511 if (!precision_specified)
3512 precision = 1; // default precision is 1
3513 if (precision == 0 && arg_sign == 0)
3514 {
3515 // When zero value is formatted with an explicit
3516 // precision 0, the resulting formatted string is
3517 // empty (d, i, u, b, B, o, x, X, p).
3518 }
3519 else
3520 {
3521 char f[6];
3522 int f_l = 0;
3523
3524 // construct a simple format string for sprintf
3525 f[f_l++] = '%';
3526 if (!length_modifier)
3527 ;
3528 else if (length_modifier == 'L')
3529 {
3530# ifdef MSWIN
3531 f[f_l++] = 'I';
3532 f[f_l++] = '6';
3533 f[f_l++] = '4';
3534# else
3535 f[f_l++] = 'l';
3536 f[f_l++] = 'l';
3537# endif
3538 }
3539 else
3540 f[f_l++] = length_modifier;
3541 f[f_l++] = fmt_spec;
3542 f[f_l++] = '\0';
3543
3544 if (fmt_spec == 'p')
3545 str_arg_l += sprintf(tmp + str_arg_l, f, ptr_arg);
3546 else if (fmt_spec == 'b' || fmt_spec == 'B')
3547 {
3548 char b[8 * sizeof(uvarnumber_T)];
3549 size_t b_l = 0;
3550 uvarnumber_T bn = bin_arg;
3551
3552 do
3553 {
3554 b[sizeof(b) - ++b_l] = '0' + (bn & 0x1);
3555 bn >>= 1;
3556 }
3557 while (bn != 0);
3558
3559 memcpy(tmp + str_arg_l, b + sizeof(b) - b_l, b_l);
3560 str_arg_l += b_l;
3561 }
3562 else if (fmt_spec == 'd')
3563 {
3564 // signed
3565 switch (length_modifier)
3566 {
3567 case '\0': str_arg_l += sprintf(
3568 tmp + str_arg_l, f,
3569 int_arg);
3570 break;
3571 case 'h': str_arg_l += sprintf(
3572 tmp + str_arg_l, f,
3573 (short)int_arg);
3574 break;
3575 case 'l': str_arg_l += sprintf(
3576 tmp + str_arg_l, f, long_arg);
3577 break;
3578 case 'L': str_arg_l += sprintf(
3579 tmp + str_arg_l, f, llong_arg);
3580 break;
3581 }
3582 }
3583 else
3584 {
3585 // unsigned
3586 switch (length_modifier)
3587 {
3588 case '\0': str_arg_l += sprintf(
3589 tmp + str_arg_l, f,
3590 uint_arg);
3591 break;
3592 case 'h': str_arg_l += sprintf(
3593 tmp + str_arg_l, f,
3594 (unsigned short)uint_arg);
3595 break;
3596 case 'l': str_arg_l += sprintf(
3597 tmp + str_arg_l, f, ulong_arg);
3598 break;
3599 case 'L': str_arg_l += sprintf(
3600 tmp + str_arg_l, f, ullong_arg);
3601 break;
3602 }
3603 }
3604
3605 // include the optional minus sign and possible
3606 // "0x" in the region before the zero padding
3607 // insertion point
3608 if (zero_padding_insertion_ind < str_arg_l
3609 && tmp[zero_padding_insertion_ind] == '-')
3610 zero_padding_insertion_ind++;
3611 if (zero_padding_insertion_ind + 1 < str_arg_l
3612 && tmp[zero_padding_insertion_ind] == '0'
3613 && (tmp[zero_padding_insertion_ind + 1] == 'x'
3614 || tmp[zero_padding_insertion_ind + 1] == 'X'))
3615 zero_padding_insertion_ind += 2;
3616 }
3617
3618 {
3619 size_t num_of_digits = str_arg_l
3620 - zero_padding_insertion_ind;
3621
3622 if (alternate_form && fmt_spec == 'o'
3623 // unless zero is already the first
3624 // character
3625 && !(zero_padding_insertion_ind < str_arg_l
3626 && tmp[zero_padding_insertion_ind] == '0'))
3627 {
3628 // assure leading zero for alternate-form
3629 // octal numbers
3630 if (!precision_specified
3631 || precision < num_of_digits + 1)
3632 {
3633 // precision is increased to force the
3634 // first character to be zero, except if a
3635 // zero value is formatted with an
3636 // explicit precision of zero
3637 precision = num_of_digits + 1;
3638 }
3639 }
3640 // zero padding to specified precision?
3641 if (num_of_digits < precision)
3642 number_of_zeros_to_pad = precision - num_of_digits;
3643 }
3644 // zero padding to specified minimal field width?
3645 if (!justify_left && zero_padding)
3646 {
3647 int n = (int)(min_field_width - (str_arg_l
3648 + number_of_zeros_to_pad));
3649 if (n > 0)
3650 number_of_zeros_to_pad += n;
3651 }
3652 break;
3653 }
3654
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003655 case 'f':
3656 case 'F':
3657 case 'e':
3658 case 'E':
3659 case 'g':
3660 case 'G':
3661 {
3662 // Floating point.
3663 double f;
3664 double abs_f;
3665 char format[40];
3666 int l;
3667 int remove_trailing_zeroes = FALSE;
3668
3669 f =
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003670# if defined(FEAT_EVAL)
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003671 tvs != NULL ? tv_float(tvs, &arg_idx) :
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003672# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003673 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3674 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003675 va_arg(ap, double));
3676
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003677 abs_f = f < 0 ? -f : f;
3678
3679 if (fmt_spec == 'g' || fmt_spec == 'G')
3680 {
3681 // Would be nice to use %g directly, but it prints
3682 // "1.0" as "1", we don't want that.
3683 if ((abs_f >= 0.001 && abs_f < 10000000.0)
3684 || abs_f == 0.0)
3685 fmt_spec = ASCII_ISUPPER(fmt_spec) ? 'F' : 'f';
3686 else
3687 fmt_spec = fmt_spec == 'g' ? 'e' : 'E';
3688 remove_trailing_zeroes = TRUE;
3689 }
3690
3691 if ((fmt_spec == 'f' || fmt_spec == 'F') &&
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003692# ifdef VAX
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003693 abs_f > 1.0e38
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003694# else
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003695 abs_f > 1.0e307
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003696# endif
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003697 )
3698 {
3699 // Avoid a buffer overflow
3700 STRCPY(tmp, infinity_str(f > 0.0, fmt_spec,
3701 force_sign, space_for_positive));
3702 str_arg_l = STRLEN(tmp);
3703 zero_padding = 0;
3704 }
3705 else
3706 {
3707 if (isnan(f))
3708 {
3709 // Not a number: nan or NAN
3710 STRCPY(tmp, ASCII_ISUPPER(fmt_spec) ? "NAN"
3711 : "nan");
3712 str_arg_l = 3;
3713 zero_padding = 0;
3714 }
3715 else if (isinf(f))
3716 {
3717 STRCPY(tmp, infinity_str(f > 0.0, fmt_spec,
3718 force_sign, space_for_positive));
3719 str_arg_l = STRLEN(tmp);
3720 zero_padding = 0;
3721 }
3722 else
3723 {
3724 // Regular float number
3725 format[0] = '%';
3726 l = 1;
3727 if (force_sign)
3728 format[l++] = space_for_positive ? ' ' : '+';
3729 if (precision_specified)
3730 {
3731 size_t max_prec = TMP_LEN - 10;
3732
3733 // Make sure we don't get more digits than we
3734 // have room for.
3735 if ((fmt_spec == 'f' || fmt_spec == 'F')
3736 && abs_f > 1.0)
3737 max_prec -= (size_t)log10(abs_f);
3738 if (precision > max_prec)
3739 precision = max_prec;
3740 l += sprintf(format + l, ".%d", (int)precision);
3741 }
3742 format[l] = fmt_spec == 'F' ? 'f' : fmt_spec;
3743 format[l + 1] = NUL;
3744
3745 str_arg_l = sprintf(tmp, format, f);
3746 }
3747
3748 if (remove_trailing_zeroes)
3749 {
3750 int i;
3751 char *tp;
3752
3753 // Using %g or %G: remove superfluous zeroes.
3754 if (fmt_spec == 'f' || fmt_spec == 'F')
3755 tp = tmp + str_arg_l - 1;
3756 else
3757 {
3758 tp = (char *)vim_strchr((char_u *)tmp,
3759 fmt_spec == 'e' ? 'e' : 'E');
3760 if (tp != NULL)
3761 {
3762 // Remove superfluous '+' and leading
3763 // zeroes from the exponent.
3764 if (tp[1] == '+')
3765 {
3766 // Change "1.0e+07" to "1.0e07"
3767 STRMOVE(tp + 1, tp + 2);
3768 --str_arg_l;
3769 }
3770 i = (tp[1] == '-') ? 2 : 1;
3771 while (tp[i] == '0')
3772 {
3773 // Change "1.0e07" to "1.0e7"
3774 STRMOVE(tp + i, tp + i + 1);
3775 --str_arg_l;
3776 }
3777 --tp;
3778 }
3779 }
3780
3781 if (tp != NULL && !precision_specified)
3782 // Remove trailing zeroes, but keep the one
3783 // just after a dot.
3784 while (tp > tmp + 2 && *tp == '0'
3785 && tp[-1] != '.')
3786 {
3787 STRMOVE(tp, tp + 1);
3788 --tp;
3789 --str_arg_l;
3790 }
3791 }
3792 else
3793 {
3794 char *tp;
3795
3796 // Be consistent: some printf("%e") use 1.0e+12
3797 // and some 1.0e+012. Remove one zero in the last
3798 // case.
3799 tp = (char *)vim_strchr((char_u *)tmp,
3800 fmt_spec == 'e' ? 'e' : 'E');
3801 if (tp != NULL && (tp[1] == '+' || tp[1] == '-')
3802 && tp[2] == '0'
3803 && vim_isdigit(tp[3])
3804 && vim_isdigit(tp[4]))
3805 {
3806 STRMOVE(tp + 2, tp + 3);
3807 --str_arg_l;
3808 }
3809 }
3810 }
3811 if (zero_padding && min_field_width > str_arg_l
3812 && (tmp[0] == '-' || force_sign))
3813 {
3814 // padding 0's should be inserted after the sign
3815 number_of_zeros_to_pad = min_field_width - str_arg_l;
3816 zero_padding_insertion_ind = 1;
3817 }
3818 str_arg = tmp;
3819 break;
3820 }
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003821
3822 default:
3823 // unrecognized conversion specifier, keep format string
3824 // as-is
3825 zero_padding = 0; // turn zero padding off for non-numeric
3826 // conversion
3827 justify_left = 1;
3828 min_field_width = 0; // reset flags
3829
3830 // discard the unrecognized conversion, just keep *
3831 // the unrecognized conversion character
3832 str_arg = p;
3833 str_arg_l = 0;
3834 if (*p != NUL)
3835 str_arg_l++; // include invalid conversion specifier
3836 // unchanged if not at end-of-string
3837 break;
3838 }
3839
3840 if (*p != NUL)
3841 p++; // step over the just processed conversion specifier
3842
3843 // insert padding to the left as requested by min_field_width;
3844 // this does not include the zero padding in case of numerical
3845 // conversions
3846 if (!justify_left)
3847 {
3848 // left padding with blank or zero
3849 int pn = (int)(min_field_width - (str_arg_l + number_of_zeros_to_pad));
3850
3851 if (pn > 0)
3852 {
3853 if (str_l < str_m)
3854 {
3855 size_t avail = str_m - str_l;
3856
3857 vim_memset(str + str_l, zero_padding ? '0' : ' ',
3858 (size_t)pn > avail ? avail
3859 : (size_t)pn);
3860 }
3861 str_l += pn;
3862 }
3863 }
3864
3865 // zero padding as requested by the precision or by the minimal
3866 // field width for numeric conversions required?
3867 if (number_of_zeros_to_pad == 0)
3868 {
3869 // will not copy first part of numeric right now, *
3870 // force it to be copied later in its entirety
3871 zero_padding_insertion_ind = 0;
3872 }
3873 else
3874 {
3875 // insert first part of numerics (sign or '0x') before zero
3876 // padding
3877 int zn = (int)zero_padding_insertion_ind;
3878
3879 if (zn > 0)
3880 {
3881 if (str_l < str_m)
3882 {
3883 size_t avail = str_m - str_l;
3884
3885 mch_memmove(str + str_l, str_arg,
3886 (size_t)zn > avail ? avail
3887 : (size_t)zn);
3888 }
3889 str_l += zn;
3890 }
3891
3892 // insert zero padding as requested by the precision or min
3893 // field width
3894 zn = (int)number_of_zeros_to_pad;
3895 if (zn > 0)
3896 {
3897 if (str_l < str_m)
3898 {
3899 size_t avail = str_m - str_l;
3900
3901 vim_memset(str + str_l, '0',
3902 (size_t)zn > avail ? avail
3903 : (size_t)zn);
3904 }
3905 str_l += zn;
3906 }
3907 }
3908
3909 // insert formatted string
3910 // (or as-is conversion specifier for unknown conversions)
3911 {
3912 int sn = (int)(str_arg_l - zero_padding_insertion_ind);
3913
3914 if (sn > 0)
3915 {
3916 if (str_l < str_m)
3917 {
3918 size_t avail = str_m - str_l;
3919
3920 mch_memmove(str + str_l,
3921 str_arg + zero_padding_insertion_ind,
3922 (size_t)sn > avail ? avail : (size_t)sn);
3923 }
3924 str_l += sn;
3925 }
3926 }
3927
3928 // insert right padding
3929 if (justify_left)
3930 {
3931 // right blank padding to the field width
3932 int pn = (int)(min_field_width
3933 - (str_arg_l + number_of_zeros_to_pad));
3934
3935 if (pn > 0)
3936 {
3937 if (str_l < str_m)
3938 {
3939 size_t avail = str_m - str_l;
3940
3941 vim_memset(str + str_l, ' ',
3942 (size_t)pn > avail ? avail
3943 : (size_t)pn);
3944 }
3945 str_l += pn;
3946 }
3947 }
3948 vim_free(tofree);
3949 }
3950 }
3951
3952 if (str_m > 0)
3953 {
3954 // make sure the string is nul-terminated even at the expense of
3955 // overwriting the last character (shouldn't happen, but just in case)
3956 //
3957 str[str_l <= str_m - 1 ? str_l : str_m - 1] = '\0';
3958 }
3959
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003960 if (tvs != NULL && tvs[num_posarg != 0 ? num_posarg : arg_idx - 1].v_type != VAR_UNKNOWN)
Bram Moolenaar677658a2022-01-05 16:09:06 +00003961 emsg(_(e_too_many_arguments_to_printf));
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003962
Christ van Willegenc35fc032024-03-14 18:30:41 +01003963error:
K.Takata4c215ec2023-08-26 18:05:08 +02003964 vim_free((char*)ap_types);
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003965 va_end(ap);
3966
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003967 // Return the number of characters formatted (excluding trailing nul
3968 // character), that is, the number of characters that would have been
3969 // written to the buffer if it were large enough.
3970 return (int)str_l;
3971}
3972
3973#endif // PROTO